fixed generation code

This commit is contained in:
Saifeddine ALOUI 2024-04-07 22:08:16 +02:00
parent 2f0b299817
commit b47a8e326d
3 changed files with 32 additions and 17 deletions

View File

@ -1064,12 +1064,11 @@ class AIPersonality:
self.vectorizer.index()
if callback is not None:
callback("File added successfully",MSG_TYPE.MSG_TYPE_INFO)
self.HideBlockingMessage("Adding file to vector store.\nPlease stand by")
self.HideBlockingMessage(client.client_id)
return True
except Exception as e:
trace_exception(e)
self.HideBlockingMessage("Adding file to vector store.\nPlease stand by")
self.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}")
self.InfoMessage(f"Unsupported file format or empty file.\nSupported formats are {GenericDataLoader.get_supported_file_types()}",client_id=client.client_id)
return False
def save_personality(self, package_path=None):
"""

View File

@ -282,19 +282,21 @@ class ModelResponse(BaseModel):
"""Usage statistics for the completion request."""
class GenerationRequest(BaseModel):
class ChatGenerationRequest(BaseModel):
model: str = ""
messages: List[Message]
max_tokens: Optional[int] = 1024
max_tokens: Optional[int] = -1
stream: Optional[bool] = False
temperature: Optional[float] = 0.1
@router.post("/v1/chat/completions")
async def v1_chat_completions(request: GenerationRequest):
async def v1_chat_completions(request: ChatGenerationRequest):
try:
reception_manager=RECEPTION_MANAGER()
messages = request.messages
max_tokens = request.max_tokens if request.max_tokens>0 else elf_server.config.max_n_predict
temperature = request.temperature if elf_server.config.temperature else elf_server.config.temperature
prompt = ""
roles= False
for message in messages:
@ -305,7 +307,7 @@ async def v1_chat_completions(request: GenerationRequest):
prompt += f"{message.content}\n"
if roles:
prompt += "!@>assistant:"
n_predict = request.max_tokens if request.max_tokens>0 else 1024
n_predict = max_tokens if max_tokens>0 else 1024
stream = request.stream
prompt_tokens = len(elf_server.binding.tokenize(prompt))
if elf_server.binding is not None:
@ -346,7 +348,7 @@ async def v1_chat_completions(request: GenerationRequest):
prompt,
n_predict,
callback=callback,
temperature=request.temperature or elf_server.config.temperature
temperature=temperature
)
reception_manager.done = True
thread = threading.Thread(target=chunks_builder)
@ -392,7 +394,7 @@ async def v1_chat_completions(request: GenerationRequest):
prompt,
n_predict,
callback=callback,
temperature=request.temperature or elf_server.config.temperature
temperature=temperature
)
completion_tokens = len(elf_server.binding.tokenize(reception_manager.reception_buffer))
return ModelResponse(id = _generate_id(), choices = [Choices(message=Message(role="assistant", content=reception_manager.reception_buffer), finish_reason="stop", index=0)], created=int(time.time()), model=request.model,usage=Usage(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
@ -404,20 +406,31 @@ async def v1_chat_completions(request: GenerationRequest):
return {"status":False,"error":str(ex)}
class CompletionGenerationRequest(BaseModel):
model: Optional[str] = ""
prompt: str = ""
max_tokens: Optional[int] = -1
stream: Optional[bool] = False
temperature: Optional[float] = -1
@router.post("/v1/completions")
async def v1_completion(request: Request):
async def v1_completion(request: CompletionGenerationRequest):
"""
Executes Python code and returns the output.
:param request: The HTTP request object.
:return: A JSON response with the status of the operation.
"""
try:
data = (await request.json())
text = data.get("prompt")
n_predict = data.get("max_tokens")
stream = data.get("stream")
text = request.prompt
n_predict = request.max_tokens if request.max_tokens>=0 else elf_server.config.max_n_predict
temperature = request.temperature if request.temperature>=0 else elf_server.config.temperature
# top_k = request.top_k if request.top_k>=0 else elf_server.config.top_k
# top_p = request.top_p if request.top_p>=0 else elf_server.config.top_p
# repeat_last_n = request.repeat_last_n if request.repeat_last_n>=0 else elf_server.config.repeat_last_n
# repeat_penalty = request.repeat_penalty if request.repeat_penalty>=0 else elf_server.config.repeat_penalty
stream = request.stream
if elf_server.binding is not None:
if stream:
@ -438,8 +451,8 @@ async def v1_completion(request: Request):
text,
n_predict,
callback=callback,
temperature=data.get("temperature", elf_server.config.temperature)
))
temperature=temperature,
))
return StreamingResponse(generate_chunks())
else:

View File

@ -234,6 +234,7 @@ def clear_personality_files_list():
lollmsElfServer.personality.remove_all_files()
return {"state":True}
class RemoveFileData(BaseModel):
client_id:str
name:str
@router.post("/remove_file")
@ -241,6 +242,8 @@ def remove_file(data:RemoveFileData):
"""
Removes a file form the personality files
"""
check_access(lollmsElfServer, data.client_id)
if lollmsElfServer.personality is None:
return {"state":False, "error":"No personality selected"}
lollmsElfServer.personality.remove_file(data.name)