diff --git a/configs/config.yaml b/configs/config.yaml index c3be472..2b22249 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 81 +version: 82 binding_name: null model_name: null model_variant: null @@ -109,6 +109,10 @@ petals_device: cuda # lollms service enable_lollms_service: false lollms_base_url: http://localhost:1234 +activate_lollms_server: True +activate_ollama_emulator: True +activate_openai_emulator: True +activate_mistralai_emulator: True # elastic search service elastic_search_service: false diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml index c3be472..2b22249 100644 --- a/lollms/configs/config.yaml +++ b/lollms/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 81 +version: 82 binding_name: null model_name: null model_variant: null @@ -109,6 +109,10 @@ petals_device: cuda # lollms service enable_lollms_service: false lollms_base_url: http://localhost:1234 +activate_lollms_server: True +activate_ollama_emulator: True +activate_openai_emulator: True +activate_mistralai_emulator: True # elastic search service elastic_search_service: false diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml index c3be472..2b22249 100644 --- a/lollms/server/configs/config.yaml +++ b/lollms/server/configs/config.yaml @@ -1,5 +1,5 @@ # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== -version: 81 +version: 82 binding_name: null model_name: null model_variant: null @@ -109,6 +109,10 @@ petals_device: cuda # lollms service enable_lollms_service: false lollms_base_url: http://localhost:1234 +activate_lollms_server: True +activate_ollama_emulator: True +activate_openai_emulator: True +activate_mistralai_emulator: True # elastic search service elastic_search_service: false diff --git a/lollms/server/endpoints/lollms_generator.py b/lollms/server/endpoints/lollms_generator.py index a1c7a1f..edcadd5 100644 --- a/lollms/server/endpoints/lollms_generator.py +++ b/lollms/server/endpoints/lollms_generator.py @@ -414,6 +414,73 @@ class CompletionGenerationRequest(BaseModel): temperature: Optional[float] = -1 +@router.post("/instruct/generate") +async def ollama_completion(request: CompletionGenerationRequest): + """ + Executes Python code and returns the output. + + :param request: The HTTP request object. + :return: A JSON response with the status of the operation. + """ + try: + text = request.prompt + n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict + temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature + # top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k + # top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p + # repeat_last_n = request.repeat_last_n if request.repeat_last_n>=0 else elf_server.config.repeat_last_n + # repeat_penalty = request.repeat_penalty if request.repeat_penalty>=0 else elf_server.config.repeat_penalty + stream = request.stream + + if elf_server.binding is not None: + if stream: + output = {"response":""} + def generate_chunks(): + def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK): + # Yield each chunk of data + output["response"] += chunk + antiprompt = detect_antiprompt(output["text"]) + if antiprompt: + ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation") + output["response"] = remove_text_from_string(output["response"],antiprompt) + return False + else: + yield {"response":chunk} + return True + return iter(elf_server.binding.generate( + text, + n_predict, + callback=callback, + temperature=temperature, + )) + + return StreamingResponse(generate_chunks()) + else: + output = {"response":""} + def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK): + # Yield each chunk of data + output["response"] += chunk + antiprompt = detect_antiprompt(output["response"]) + if antiprompt: + ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation") + output["response"] = remove_text_from_string(output["response"],antiprompt) + return False + else: + return True + elf_server.binding.generate( + text, + n_predict, + callback=callback, + temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature + ) + return output + else: + return None + except Exception as ex: + trace_exception(ex) + elf_server.error(ex) + return {"status":False,"error":str(ex)} + @router.post("/v1/completions") async def v1_completion(request: CompletionGenerationRequest): """ @@ -471,7 +538,7 @@ async def v1_completion(request: CompletionGenerationRequest): text, n_predict, callback=callback, - temperature=data.get("temperature", elf_server.config.temperature) + temperature=request.temperature if request.temperature>=0 else elf_server.config.temperature ) return output["text"] else: diff --git a/lollms/server/endpoints/lollms_models_infos.py b/lollms/server/endpoints/lollms_models_infos.py index 54be75a..f913c54 100644 --- a/lollms/server/endpoints/lollms_models_infos.py +++ b/lollms/server/endpoints/lollms_models_infos.py @@ -99,3 +99,44 @@ def add_reference_to_local_model(data:ModelReferenceParams): else: return {"status": False, "error":"Model not found"} + + + + +@router.get("/api/tags") +async def ollama_list_models(): + """ + Retrieve a list of available models for the currently selected binding. + + Returns: + List[str]: A list of model names. + """ + if lollmsElfServer.binding is None: + return [] + try: + model_list = lollmsElfServer.binding.get_available_models(lollmsElfServer) + + md = { + "models": [ + { + "name": model, + "modified_at": "2023-11-04T14:56:49.277302595-07:00", + "size": 7365960935, + "digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697", + "details": { + "format": "gguf", + "family": "llama", + "families": None, + "parameter_size": "13B", + "quantization_level": "Q4_0" + } + } + for model in model_list + ] + } + except Exception as ex: + trace_exception(ex) + lollmsElfServer.error("Coudln't list models. Please reinstall the binding or notify ParisNeo on the discord server") + return [] + + return md \ No newline at end of file