vllm upgraded

This commit is contained in:
Saifeddine ALOUI 2024-02-21 00:49:51 +01:00
parent c29acbb6a6
commit b8ed3581da
5 changed files with 9 additions and 6 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 70
version: 71
binding_name: null
model_name: null
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 70
version: 71
binding_name: null
model_name: null
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 70
version: 71
binding_name: null
model_name: null
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
vllm_gpu_memory_utilization: 0.9
vllm_max_model_len: 4096
vllm_max_num_seqs: 256
# Audio

View File

@ -108,9 +108,9 @@ class Service:
# run vllm
if platform.system() == 'Windows':
#subprocess.Popen(['wsl', 'ls', '$HOME'])
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
else:
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
# Wait until the service is available at http://127.0.0.1:7860/
self.wait_for_service(max_retries=wait_max_retries)

View File

@ -10,7 +10,7 @@ echo "host :$2"
echo "port :$3"
echo "max_model_len :$4"
echo "gpu_memory_utilization :$5"
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5"
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5" --max-num-seqs "$6"
# Wait for all background processes to finish
wait