upgraded new configurations

This commit is contained in:
Saifeddine ALOUI 2024-04-27 02:01:14 +02:00
parent e56042bcec
commit cbe2a5363a
5 changed files with 124 additions and 4 deletions

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 81
version: 82
binding_name: null
model_name: null
model_variant: null
@ -109,6 +109,10 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
activate_lollms_server: True
activate_ollama_emulator: True
activate_openai_emulator: True
activate_mistralai_emulator: True
# elastic search service
elastic_search_service: false

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 81
version: 82
binding_name: null
model_name: null
model_variant: null
@ -109,6 +109,10 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
activate_lollms_server: True
activate_ollama_emulator: True
activate_openai_emulator: True
activate_mistralai_emulator: True
# elastic search service
elastic_search_service: false

View File

@ -1,5 +1,5 @@
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
version: 81
version: 82
binding_name: null
model_name: null
model_variant: null
@ -109,6 +109,10 @@ petals_device: cuda
# lollms service
enable_lollms_service: false
lollms_base_url: http://localhost:1234
activate_lollms_server: True
activate_ollama_emulator: True
activate_openai_emulator: True
activate_mistralai_emulator: True
# elastic search service
elastic_search_service: false

View File

@ -414,6 +414,73 @@ class CompletionGenerationRequest(BaseModel):
temperature: Optional[float] = -1
@router.post("/instruct/generate")
async def ollama_completion(request: CompletionGenerationRequest):
"""
Executes Python code and returns the output.
:param request: The HTTP request object.
:return: A JSON response with the status of the operation.
"""
try:
text = request.prompt
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict
temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature
# top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k
# top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p
# repeat_last_n = request.repeat_last_n if request.repeat_last_n>=0 else elf_server.config.repeat_last_n
# repeat_penalty = request.repeat_penalty if request.repeat_penalty>=0 else elf_server.config.repeat_penalty
stream = request.stream
if elf_server.binding is not None:
if stream:
output = {"response":""}
def generate_chunks():
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["response"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation")
output["response"] = remove_text_from_string(output["response"],antiprompt)
return False
else:
yield {"response":chunk}
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=temperature,
))
return StreamingResponse(generate_chunks())
else:
output = {"response":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["response"] += chunk
antiprompt = detect_antiprompt(output["response"])
if antiprompt:
ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation")
output["response"] = remove_text_from_string(output["response"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature
)
return output
else:
return None
except Exception as ex:
trace_exception(ex)
elf_server.error(ex)
return {"status":False,"error":str(ex)}
@router.post("/v1/completions")
async def v1_completion(request: CompletionGenerationRequest):
"""
@ -471,7 +538,7 @@ async def v1_completion(request: CompletionGenerationRequest):
text,
n_predict,
callback=callback,
temperature=data.get("temperature", elf_server.config.temperature)
temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature
)
return output["text"]
else:

View File

@ -99,3 +99,44 @@ def add_reference_to_local_model(data:ModelReferenceParams):
else:
return {"status": False, "error":"Model not found"}
@router.get("/api/tags")
async def ollama_list_models():
"""
Retrieve a list of available models for the currently selected binding.
Returns:
List[str]: A list of model names.
"""
if lollmsElfServer.binding is None:
return []
try:
model_list = lollmsElfServer.binding.get_available_models(lollmsElfServer)
md = {
"models": [
{
"name": model,
"modified_at": "2023-11-04T14:56:49.277302595-07:00",
"size": 7365960935,
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
"details": {
"format": "gguf",
"family": "llama",
"families": None,
"parameter_size": "13B",
"quantization_level": "Q4_0"
}
}
for model in model_list
]
}
except Exception as ex:
trace_exception(ex)
lollmsElfServer.error("Coudln't list models. Please reinstall the binding or notify ParisNeo on the discord server")
return []
return md