mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-02-20 17:33:03 +00:00
vllm upgraded
This commit is contained in:
parent
c29acbb6a6
commit
b8ed3581da
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 70
|
||||
version: 71
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
vllm_gpu_memory_utilization: 0.9
|
||||
vllm_max_model_len: 4096
|
||||
vllm_max_num_seqs: 256
|
||||
|
||||
|
||||
# Audio
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 70
|
||||
version: 71
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
vllm_gpu_memory_utilization: 0.9
|
||||
vllm_max_model_len: 4096
|
||||
vllm_max_num_seqs: 256
|
||||
|
||||
|
||||
# Audio
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 70
|
||||
version: 71
|
||||
binding_name: null
|
||||
model_name: null
|
||||
|
||||
@ -107,6 +107,7 @@ vllm_url: http://localhost:8000
|
||||
vllm_model_path: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
vllm_gpu_memory_utilization: 0.9
|
||||
vllm_max_model_len: 4096
|
||||
vllm_max_num_seqs: 256
|
||||
|
||||
|
||||
# Audio
|
||||
|
@ -108,9 +108,9 @@ class Service:
|
||||
# run vllm
|
||||
if platform.system() == 'Windows':
|
||||
#subprocess.Popen(['wsl', 'ls', '$HOME'])
|
||||
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
|
||||
subprocess.Popen(['wsl', 'bash', '$HOME/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
|
||||
else:
|
||||
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization)])
|
||||
subprocess.Popen(['bash', f'{Path.home()}/run_vllm.sh', self.app.config.vllm_model_path, host, str(port), str(self.app.config.vllm_max_model_len), str(self.app.config.vllm_gpu_memory_utilization), str(self.app.config.vllm_max_num_seqs)])
|
||||
|
||||
# Wait until the service is available at http://127.0.0.1:7860/
|
||||
self.wait_for_service(max_retries=wait_max_retries)
|
||||
|
@ -10,7 +10,7 @@ echo "host :$2"
|
||||
echo "port :$3"
|
||||
echo "max_model_len :$4"
|
||||
echo "gpu_memory_utilization :$5"
|
||||
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5"
|
||||
source activate vllm && python -m vllm.entrypoints.openai.api_server --model "$1" --host "$2" --port "$3" --max-model-len "$4" --gpu-memory-utilization "$5" --max-num-seqs "$6"
|
||||
|
||||
# Wait for all background processes to finish
|
||||
wait
|
Loading…
x
Reference in New Issue
Block a user