This commit is contained in:
Saifeddine ALOUI 2024-02-21 00:42:01 +01:00
parent 97842c708c
commit c29acbb6a6
6 changed files with 35 additions and 9 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 68
version: 70
binding_name: null
model_name: null
@ -91,7 +91,7 @@ ollama_base_url: http://localhost:11434
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda0
petals_device: cuda
# lollms service
enable_lollms_service: false
@ -105,6 +105,8 @@ elastic_search_url: http://localhost:9200
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
# Audio

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 68
version: 70
binding_name: null
model_name: null
@ -91,7 +91,7 @@ ollama_base_url: http://localhost:11434
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda0
petals_device: cuda
# lollms service
enable_lollms_service: false
@ -105,6 +105,8 @@ elastic_search_url: http://localhost:9200
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
# Audio

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 68
version: 70
binding_name: null
model_name: null
@ -91,7 +91,7 @@ ollama_base_url: http://localhost:11434
enable_petals_service: false
petals_base_url: http://localhost:8064
petals_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
petals_device: cuda0
petals_device: cuda
# lollms service
enable_lollms_service: false
@ -105,6 +105,8 @@ elastic_search_url: http://localhost:9200
enable_vllm_service: false
vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
# Audio

View File

@ -11,6 +11,7 @@ import sys
from lollms.app import LollmsApplication
from lollms.paths import LollmsPaths
from lollms.config import TypedConfig, ConfigTemplate, BaseConfig
from lollms.utilities import url2host_port
import time
import io
import sys
@ -103,12 +104,13 @@ class Service:
if not self.wait_for_service(1,False) and base_url is None:
ASCIIColors.info("Loading vllm service")
host, port = url2host_port(base_url)
# run vllm
if platform.system() == 'Windows':
#subprocess.Popen(['wsl', 'ls', '$HOME'])
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path])
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
else:
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path])
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
# Wait until the service is available at http://127.0.0.1:7860/
self.wait_for_service(max_retries=wait_max_retries)

View File

@ -4,7 +4,13 @@ PATH="$HOME/miniconda3/bin:$PATH"
export PATH
echo "Initializing conda"
$HOME/miniconda3/bin/conda init --all
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1"
echo "Initializing vllm with:"
echo "model :$1"
echo "host :$2"
echo "port :$3"
echo "max_model_len :$4"
echo "gpu_memory_utilization :$5"
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5"
# Wait for all background processes to finish
wait

View File

@ -128,6 +128,18 @@ def discussion_path_to_url(file_path:str|Path)->str:
return "/".join([urllib.parse.quote(p, safe="") for p in url.split("/")])
def url2host_port(url, default_port =8000):
if "http" in url:
parts = url.split(":")
host = ":".join(parts[:2])
port = url.split(":")[2] if len(parts)==3 else default_port
return host, port
else:
parts = url.split(":")
host = parts[0]
port = url.split(":")[1] if len(parts)==2 else default_port
return host, port
def is_asyncio_loop_running():
"""
# This function checks if an AsyncIO event loop is currently running. If an event loop is running, it returns True. If not, it returns False.