mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-09 12:03:15 +00:00
feat(vllm): add support for image-to-text and video-to-text (#3729)
* feat(vllm): add support for image-to-text Related to https://github.com/mudler/LocalAI/issues/3670 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(vllm): add support for video-to-text Closes: https://github.com/mudler/LocalAI/issues/2318 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(vllm): support CPU installations Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat(vllm): add bnb Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add docs reference Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Apply suggestions from code review Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
This commit is contained in:
parent
408dfe62ee
commit
2553de0187
@ -5,6 +5,8 @@ import argparse
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from typing import List
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
@ -15,6 +17,8 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
from vllm.multimodal.utils import fetch_image
|
||||||
|
from vllm.assets.video import VideoAsset
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
@ -105,6 +109,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
try:
|
try:
|
||||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -117,7 +122,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
)
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
print("Model loaded successfully", file=sys.stderr)
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
async def Predict(self, request, context):
|
async def Predict(self, request, context):
|
||||||
@ -196,15 +201,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.Seed != 0:
|
if request.Seed != 0:
|
||||||
sampling_params.seed = request.Seed
|
sampling_params.seed = request.Seed
|
||||||
|
|
||||||
|
# Extract image paths and process images
|
||||||
prompt = request.Prompt
|
prompt = request.Prompt
|
||||||
|
|
||||||
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
|
image_paths = request.Images
|
||||||
|
image_data = [self.load_image(img_path) for img_path in image_paths]
|
||||||
|
|
||||||
|
videos_path = request.Videos
|
||||||
|
video_data = [self.load_video(video_path) for video_path in videos_path]
|
||||||
|
|
||||||
|
# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
|
||||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Generate text
|
# Generate text using the LLM engine
|
||||||
request_id = random_uuid()
|
request_id = random_uuid()
|
||||||
outputs = self.llm.generate(prompt, sampling_params, request_id)
|
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
|
||||||
|
outputs = self.llm.generate(
|
||||||
|
{
|
||||||
|
"prompt": prompt,
|
||||||
|
"multi_modal_data": {
|
||||||
|
"image": image_data if image_data else None,
|
||||||
|
"video": video_data if video_data else None,
|
||||||
|
} if image_data or video_data else None,
|
||||||
|
},
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
|
||||||
# Stream the results
|
# Stream the results
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
@ -227,9 +250,49 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if streaming:
|
if streaming:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Remove the image files from /tmp folder
|
||||||
|
for img_path in image_paths:
|
||||||
|
try:
|
||||||
|
os.remove(img_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
||||||
|
|
||||||
# Sending the final generated text
|
# Sending the final generated text
|
||||||
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||||
|
|
||||||
|
def load_image(self, image_path: str):
|
||||||
|
"""
|
||||||
|
Load an image from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Image: The loaded image.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return Image.open(image_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
|
||||||
|
return self.load_video(image_path)
|
||||||
|
|
||||||
|
def load_video(self, video_path: str):
|
||||||
|
"""
|
||||||
|
Load a video from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Video: The loaded video.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
video = VideoAsset(name=video_path).np_ndarrays
|
||||||
|
return video
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
installRequirements
|
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||||
|
ensureVenv
|
||||||
|
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
|
||||||
|
if [ ! -d vllm ]; then
|
||||||
|
git clone https://github.com/vllm-project/vllm
|
||||||
|
fi
|
||||||
|
pushd vllm
|
||||||
|
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
|
||||||
|
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||||
|
popd
|
||||||
|
rm -rf vllm
|
||||||
|
else
|
||||||
|
installRequirements
|
||||||
|
fi
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
@ -1,3 +1,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
@ -1,4 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
@ -4,4 +4,5 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
bitsandbytes
|
Loading…
x
Reference in New Issue
Block a user