mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-02-21 17:56:38 +00:00
vllm upgraded
This commit is contained in:
parent
c29acbb6a6
commit
b8ed3581da
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 70
|
version: 71
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
|||||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||||
vllm_gpu_memory_utilization: 0.9
|
vllm_gpu_memory_utilization: 0.9
|
||||||
vllm_max_model_len: 4096
|
vllm_max_model_len: 4096
|
||||||
|
vllm_max_num_seqs: 256
|
||||||
|
|
||||||
|
|
||||||
# Audio
|
# Audio
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 70
|
version: 71
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
|||||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||||
vllm_gpu_memory_utilization: 0.9
|
vllm_gpu_memory_utilization: 0.9
|
||||||
vllm_max_model_len: 4096
|
vllm_max_model_len: 4096
|
||||||
|
vllm_max_num_seqs: 256
|
||||||
|
|
||||||
|
|
||||||
# Audio
|
# Audio
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||||
version: 70
|
version: 71
|
||||||
binding_name: null
|
binding_name: null
|
||||||
model_name: null
|
model_name: null
|
||||||
|
|
||||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
|||||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||||
vllm_gpu_memory_utilization: 0.9
|
vllm_gpu_memory_utilization: 0.9
|
||||||
vllm_max_model_len: 4096
|
vllm_max_model_len: 4096
|
||||||
|
vllm_max_num_seqs: 256
|
||||||
|
|
||||||
|
|
||||||
# Audio
|
# Audio
|
||||||
|
@ -108,9 +108,9 @@ class Service:
|
|||||||
# run vllm
|
# run vllm
|
||||||
if platform.system() == 'Windows':
|
if platform.system() == 'Windows':
|
||||||
#subprocess.Popen(['wsl', 'ls', '$HOME'])
|
#subprocess.Popen(['wsl', 'ls', '$HOME'])
|
||||||
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
|
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
|
||||||
else:
|
else:
|
||||||
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
|
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
|
||||||
|
|
||||||
# Wait until the service is available at http://127.0.0.1:7860/
|
# Wait until the service is available at http://127.0.0.1:7860/
|
||||||
self.wait_for_service(max_retries=wait_max_retries)
|
self.wait_for_service(max_retries=wait_max_retries)
|
||||||
|
@ -10,7 +10,7 @@ echo "host :$2"
|
|||||||
echo "port :$3"
|
echo "port :$3"
|
||||||
echo "max_model_len :$4"
|
echo "max_model_len :$4"
|
||||||
echo "gpu_memory_utilization :$5"
|
echo "gpu_memory_utilization :$5"
|
||||||
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5"
|
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5" --max-num-seqs "$6"
|
||||||
|
|
||||||
# Wait for all background processes to finish
|
# Wait for all background processes to finish
|
||||||
wait
|
wait
|
Loading…
x
Reference in New Issue
Block a user