diff --git a/lollms/personality.py b/lollms/personality.py index 54e8d9e..ed4763e 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -1064,12 +1064,11 @@ class AIPersonality: self.vectorizer.index() if callback is not None: callback("File added successfully",MSG_TYPE.MSG_TYPE_INFO) - self.HideBlockingMessage("Adding file to vector store.\nPlease stand by") + self.HideBlockingMessage(client.client_id) return True except Exception as e: trace_exception(e) - self.HideBlockingMessage("Adding file to vector store.\nPlease stand by") - self.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}") + self.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id) return False def save_personality(self, package_path=None): """ diff --git a/lollms/server/endpoints/lollms_generator.py b/lollms/server/endpoints/lollms_generator.py index 761b4a0..638d734 100644 --- a/lollms/server/endpoints/lollms_generator.py +++ b/lollms/server/endpoints/lollms_generator.py @@ -282,19 +282,21 @@ class ModelResponse(BaseModel): """Usage statistics for the completion request.""" -class GenerationRequest(BaseModel): +class ChatGenerationRequest(BaseModel): model: str = "" messages: List[Message] - max_tokens: Optional[int] = 1024 + max_tokens: Optional[int] = -1 stream: Optional[bool] = False temperature: Optional[float] = 0.1 @router.post("/v1/chat/completions") -async def v1_chat_completions(request: GenerationRequest): +async def v1_chat_completions(request: ChatGenerationRequest): try: reception_manager=RECEPTION_MANAGER() messages = request.messages + max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict + temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature prompt = "" roles= False for message in messages: @@ -305,7 +307,7 @@ async def v1_chat_completions(request: GenerationRequest): prompt += f"{message.content}\n" if roles: prompt += "!@>assistant:" - n_predict = request.max_tokens if request.max_tokens>0 else 1024 + n_predict = max_tokens if max_tokens>0 else 1024 stream = request.stream prompt_tokens = len(elf_server.binding.tokenize(prompt)) if elf_server.binding is not None: @@ -346,7 +348,7 @@ async def v1_chat_completions(request: GenerationRequest): prompt, n_predict, callback=callback, - temperature=request.temperature or elf_server.config.temperature + temperature=temperature ) reception_manager.done = True thread = threading.Thread(target=chunks_builder) @@ -392,7 +394,7 @@ async def v1_chat_completions(request: GenerationRequest): prompt, n_predict, callback=callback, - temperature=request.temperature or elf_server.config.temperature + temperature=temperature ) completion_tokens = len(elf_server.binding.tokenize(reception_manager.reception_buffer)) return ModelResponse(id = _generate_id(), choices = [Choices(message=Message(role="assistant", content=reception_manager.reception_buffer), finish_reason="stop", index=0)], created=int(time.time()), model=request.model,usage=Usage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)) @@ -404,20 +406,31 @@ async def v1_chat_completions(request: GenerationRequest): return {"status":False,"error":str(ex)} +class CompletionGenerationRequest(BaseModel): + model: Optional[str] = "" + prompt: str = "" + max_tokens: Optional[int] = -1 + stream: Optional[bool] = False + temperature: Optional[float] = -1 + + @router.post("/v1/completions") -async def v1_completion(request: Request): +async def v1_completion(request: CompletionGenerationRequest): """ Executes Python code and returns the output. :param request: The HTTP request object. :return: A JSON response with the status of the operation. """ - try: - data = (await request.json()) - text = data.get("prompt") - n_predict = data.get("max_tokens") - stream = data.get("stream") + text = request.prompt + n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict + temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature + # top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k + # top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p + # repeat_last_n = request.repeat_last_n if request.repeat_last_n>=0 else elf_server.config.repeat_last_n + # repeat_penalty = request.repeat_penalty if request.repeat_penalty>=0 else elf_server.config.repeat_penalty + stream = request.stream if elf_server.binding is not None: if stream: @@ -438,8 +451,8 @@ async def v1_completion(request: Request): text, n_predict, callback=callback, - temperature=data.get("temperature", elf_server.config.temperature) - )) + temperature=temperature, + )) return StreamingResponse(generate_chunks()) else: diff --git a/lollms/server/endpoints/lollms_personalities_infos.py b/lollms/server/endpoints/lollms_personalities_infos.py index 29aa336..3817449 100644 --- a/lollms/server/endpoints/lollms_personalities_infos.py +++ b/lollms/server/endpoints/lollms_personalities_infos.py @@ -234,6 +234,7 @@ def clear_personality_files_list(): lollmsElfServer.personality.remove_all_files() return {"state":True} class RemoveFileData(BaseModel): + client_id:str name:str @router.post("/remove_file") @@ -241,6 +242,8 @@ def remove_file(data:RemoveFileData): """ Removes a file form the personality files """ + check_access(lollmsElfServer, data.client_id) + if lollmsElfServer.personality is None: return {"state":False, "error":"No personality selected"} lollmsElfServer.personality.remove_file(data.name)