mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-04-05 01:49:08 +00:00
upgraded new configurations
This commit is contained in:
parent
e56042bcec
commit
cbe2a5363a
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 82
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -109,6 +109,10 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
activate_lollms_server: True
|
||||
activate_ollama_emulator: True
|
||||
activate_openai_emulator: True
|
||||
activate_mistralai_emulator: True
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 82
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -109,6 +109,10 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
activate_lollms_server: True
|
||||
activate_ollama_emulator: True
|
||||
activate_openai_emulator: True
|
||||
activate_mistralai_emulator: True
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
|
@ -1,5 +1,5 @@
|
||||
# =================== Lord Of Large Language Multimodal Systems Configuration file ===========================
|
||||
version: 81
|
||||
version: 82
|
||||
binding_name: null
|
||||
model_name: null
|
||||
model_variant: null
|
||||
@ -109,6 +109,10 @@ petals_device: cuda
|
||||
# lollms service
|
||||
enable_lollms_service: false
|
||||
lollms_base_url: http://localhost:1234
|
||||
activate_lollms_server: True
|
||||
activate_ollama_emulator: True
|
||||
activate_openai_emulator: True
|
||||
activate_mistralai_emulator: True
|
||||
|
||||
# elastic search service
|
||||
elastic_search_service: false
|
||||
|
@ -414,6 +414,73 @@ class CompletionGenerationRequest(BaseModel):
|
||||
temperature: Optional[float] = -1
|
||||
|
||||
|
||||
@router.post("/instruct/generate")
|
||||
async def ollama_completion(request: CompletionGenerationRequest):
|
||||
"""
|
||||
Executes Python code and returns the output.
|
||||
|
||||
:param request: The HTTP request object.
|
||||
:return: A JSON response with the status of the operation.
|
||||
"""
|
||||
try:
|
||||
text = request.prompt
|
||||
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict
|
||||
temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature
|
||||
# top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k
|
||||
# top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p
|
||||
# repeat_last_n = request.repeat_last_n if request.repeat_last_n>=0 else elf_server.config.repeat_last_n
|
||||
# repeat_penalty = request.repeat_penalty if request.repeat_penalty>=0 else elf_server.config.repeat_penalty
|
||||
stream = request.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"response":""}
|
||||
def generate_chunks():
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["response"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation")
|
||||
output["response"] = remove_text_from_string(output["response"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield {"response":chunk}
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=temperature,
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
else:
|
||||
output = {"response":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["response"] += chunk
|
||||
antiprompt = detect_antiprompt(output["response"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation")
|
||||
output["response"] = remove_text_from_string(output["response"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature
|
||||
)
|
||||
return output
|
||||
else:
|
||||
return None
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
elf_server.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
@router.post("/v1/completions")
|
||||
async def v1_completion(request: CompletionGenerationRequest):
|
||||
"""
|
||||
@ -471,7 +538,7 @@ async def v1_completion(request: CompletionGenerationRequest):
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=data.get("temperature", elf_server.config.temperature)
|
||||
temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
|
@ -99,3 +99,44 @@ def add_reference_to_local_model(data:ModelReferenceParams):
|
||||
else:
|
||||
return {"status": False, "error":"Model not found"}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@router.get("/api/tags")
|
||||
async def ollama_list_models():
|
||||
"""
|
||||
Retrieve a list of available models for the currently selected binding.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of model names.
|
||||
"""
|
||||
if lollmsElfServer.binding is None:
|
||||
return []
|
||||
try:
|
||||
model_list = lollmsElfServer.binding.get_available_models(lollmsElfServer)
|
||||
|
||||
md = {
|
||||
"models": [
|
||||
{
|
||||
"name": model,
|
||||
"modified_at": "2023-11-04T14:56:49.277302595-07:00",
|
||||
"size": 7365960935,
|
||||
"digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
|
||||
"details": {
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": None,
|
||||
"parameter_size": "13B",
|
||||
"quantization_level": "Q4_0"
|
||||
}
|
||||
}
|
||||
for model in model_list
|
||||
]
|
||||
}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lollmsElfServer.error("Coudln't list models. Please reinstall the binding or notify ParisNeo on the discord server")
|
||||
return []
|
||||
|
||||
return md
|
Loading…
x
Reference in New Issue
Block a user