mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-02-20 17:33:03 +00:00
upgraded text
This commit is contained in:
parent
22e1ef85ce
commit
94425bce30
@ -44,6 +44,9 @@ class LollmsApplication(LoLLMsCom):
|
||||
self.config = config
|
||||
self.lollms_paths = lollms_paths
|
||||
|
||||
# TODO : implement
|
||||
self.embedding_models = []
|
||||
|
||||
self.menu = MainMenu(self, callback)
|
||||
self.mounted_personalities = []
|
||||
self.personality:AIPersonality = None
|
||||
|
@ -8,13 +8,25 @@ description:
|
||||
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi import APIRouter, Request, Body
|
||||
from lollms.server.elf_server import LOLLMSElfServer
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import StreamingResponse
|
||||
from lollms.types import MSG_TYPE
|
||||
from lollms.utilities import detect_antiprompt, remove_text_from_string
|
||||
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
|
||||
from ascii_colors import ASCIIColors
|
||||
import time
|
||||
import threading
|
||||
from typing import List, Optional, Union
|
||||
import random
|
||||
import string
|
||||
import json
|
||||
|
||||
def _generate_id(length=10):
|
||||
letters_and_digits = string.ascii_letters + string.digits
|
||||
random_id = ''.join(random.choice(letters_and_digits) for _ in range(length))
|
||||
return random_id
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
|
||||
text: str
|
||||
@ -77,16 +89,30 @@ def get_generation_status():
|
||||
|
||||
|
||||
# ----------------------------------- Generation -----------------------------------------
|
||||
@router.post("/generate")
|
||||
def lollms_generate(request_data: Request):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server.
|
||||
class LollmsGenerateRequest(BaseModel):
|
||||
text: str
|
||||
model_name: Optional[str] = None
|
||||
personality: Optional[int] = None
|
||||
n_predict: Optional[int] = 1024
|
||||
stream: bool = False
|
||||
temperature: float = None
|
||||
top_k: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
repeat_penalty: Optional[float] = None
|
||||
repeat_last_n: Optional[int] = None
|
||||
seed: Optional[int] = None
|
||||
n_threads: Optional[int] = None
|
||||
|
||||
@router.post("/lollms_generate")
|
||||
async def lollms_generate(request: LollmsGenerateRequest):
|
||||
""" Endpoint for generating text from prompts using the LoLLMs fastAPI server.
|
||||
|
||||
Args:
|
||||
Data model for the Generate Request.
|
||||
|
||||
Attributes:
|
||||
- text: str representing the input text prompt for text generation.
|
||||
- text: str : representing the input text prompt for text generation.
|
||||
- model_name: Optional[str] = None : The name of the model to be used (it should be one of the current models)
|
||||
- personality_id: Optional[int] = None : The name of the mounted personality to be used (if a personality is None, the endpoint will just return a completion text). To get the list of mounted personalities, just use /list_mounted_personalities
|
||||
- n_predict: int representing the number of predictions to generate.
|
||||
- stream: bool indicating whether to stream the generated text or not.
|
||||
- temperature: float representing the temperature parameter for text generation.
|
||||
@ -97,21 +123,79 @@ def lollms_generate(request_data: Request):
|
||||
- seed: int representing the seed for text generation.
|
||||
- n_threads: int representing the number of threads for text generation.
|
||||
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
text = request_data["text"]
|
||||
n_predict = request_data.get("n_predict", 1024)
|
||||
stream = request_data.get("stream", False)
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
"""
|
||||
|
||||
try:
|
||||
text = request.text
|
||||
n_predict = request.n_predict
|
||||
stream = request.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
|
||||
output = {"text":"","waiting":True,"new":[]}
|
||||
def generate_chunks():
|
||||
lk = threading.Lock()
|
||||
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
if elf_server.cancel_gen:
|
||||
return False
|
||||
if chunk is None:
|
||||
return
|
||||
output["text"] += chunk
|
||||
# Yield each chunk of data
|
||||
lk.acquire()
|
||||
try:
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
lk.release()
|
||||
return False
|
||||
else:
|
||||
output["new"].append(chunk)
|
||||
lk.release()
|
||||
return True
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lk.release()
|
||||
return True
|
||||
def chunks_builder():
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request.temperature if request.temperature is not None else elf_server.config.temperature,
|
||||
top_k=request.top_k if request.top_k is not None else elf_server.config.top_k,
|
||||
top_p=request.top_p if request.top_p is not None else elf_server.config.top_p,
|
||||
repeat_penalty=request.repeat_penalty if request.repeat_penalty is not None else elf_server.config.repeat_penalty,
|
||||
repeat_last_n=request.repeat_last_n if request.repeat_last_n is not None else elf_server.config.repeat_last_n,
|
||||
seed=request.seed if request.seed is not None else elf_server.config.seed,
|
||||
n_threads=request.n_threads if request.n_threads is not None else elf_server.config.n_threads
|
||||
)
|
||||
output["waiting"] = False
|
||||
thread = threading.Thread(target=chunks_builder)
|
||||
thread.start()
|
||||
current_index = 0
|
||||
while (output["waiting"] and elf_server.cancel_gen == False):
|
||||
while (output["waiting"] and len(output["new"])==0):
|
||||
time.sleep(0.001)
|
||||
lk.acquire()
|
||||
for i in range(len(output["new"])):
|
||||
current_index += 1
|
||||
yield output["new"][i]
|
||||
output["new"]=[]
|
||||
lk.release()
|
||||
elf_server.cancel_gen = False
|
||||
|
||||
return StreamingResponse(iter(generate_chunks()))
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
@ -121,79 +205,199 @@ def lollms_generate(request_data: Request):
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature,
|
||||
top_k=request_data.top_k,
|
||||
top_p=request_data.top_p,
|
||||
repeat_penalty=request_data.repeat_penalty,
|
||||
repeat_last_n=request_data.repeat_last_n,
|
||||
seed=request_data.seed,
|
||||
n_threads=request_data.n_threads
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request.temperature if request.temperature is not None else elf_server.config.temperature,
|
||||
top_k=request.top_k if request.top_k is not None else elf_server.config.top_k,
|
||||
top_p=request.top_p if request.top_p is not None else elf_server.config.top_p,
|
||||
repeat_penalty=request.repeat_penalty if request.repeat_penalty is not None else elf_server.config.repeat_penalty,
|
||||
repeat_last_n=request.repeat_last_n if request.repeat_last_n is not None else elf_server.config.repeat_last_n,
|
||||
seed=request.seed if request.seed is not None else elf_server.config.seed,
|
||||
n_threads=request.n_threads if request.n_threads is not None else elf_server.config.n_threads
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature,
|
||||
top_k=request_data.top_k,
|
||||
top_p=request_data.top_p,
|
||||
repeat_penalty=request_data.repeat_penalty,
|
||||
repeat_last_n=request_data.repeat_last_n,
|
||||
seed=request_data.seed,
|
||||
n_threads=request_data.n_threads
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
|
||||
return None
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
elf_server.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
|
||||
# ----------------------- Open AI ----------------------------------------
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
class Delta(BaseModel):
|
||||
content : str = ""
|
||||
role : str = "assistant"
|
||||
|
||||
|
||||
class Choices(BaseModel):
|
||||
finish_reason: Optional[str] = None,
|
||||
index: Optional[int] = 0,
|
||||
message: Optional[str] = "",
|
||||
logprobs: Optional[float] = None
|
||||
|
||||
|
||||
|
||||
class Usage(BaseModel):
|
||||
prompt_tokens: Optional[int]=0,
|
||||
completion_tokens : Optional[int]=0,
|
||||
completion_tokens : Optional[int]=0,
|
||||
|
||||
|
||||
class StreamingChoices(BaseModel):
|
||||
finish_reason : Optional[str] = "stop"
|
||||
index : Optional[int] = 0
|
||||
delta : Optional[Delta] = None
|
||||
logprobs : Optional[List[float]|None] = None
|
||||
|
||||
class StreamingModelResponse(BaseModel):
|
||||
id: str
|
||||
"""A unique identifier for the completion."""
|
||||
|
||||
choices: List[StreamingChoices]
|
||||
"""The list of completion choices the model generated for the input prompt."""
|
||||
|
||||
created: int
|
||||
"""The Unix timestamp (in seconds) of when the completion was created."""
|
||||
|
||||
model: Optional[str] = None
|
||||
"""The model used for completion."""
|
||||
|
||||
object: Optional[str] = "text_completion"
|
||||
"""The object type, which is always "text_completion" """
|
||||
|
||||
system_fingerprint: Optional[str] = None
|
||||
"""This fingerprint represents the backend configuration that the model runs with.
|
||||
|
||||
Can be used in conjunction with the `seed` request parameter to understand when
|
||||
backend changes have been made that might impact determinism.
|
||||
"""
|
||||
|
||||
usage: Optional[Usage] = None
|
||||
"""Usage statistics for the completion request."""
|
||||
|
||||
_hidden_params: dict = {}
|
||||
def encode(self, charset):
|
||||
encoded = json.dumps(self.dict()).encode(charset)
|
||||
return encoded
|
||||
|
||||
class ModelResponse(BaseModel):
|
||||
id: str
|
||||
"""A unique identifier for the completion."""
|
||||
|
||||
choices: List[Choices]
|
||||
"""The list of completion choices the model generated for the input prompt."""
|
||||
|
||||
created: int
|
||||
"""The Unix timestamp (in seconds) of when the completion was created."""
|
||||
|
||||
model: Optional[str] = None
|
||||
"""The model used for completion."""
|
||||
|
||||
object: Optional[str] = "text_completion"
|
||||
"""The object type, which is always "text_completion" """
|
||||
|
||||
system_fingerprint: Optional[str] = None
|
||||
"""This fingerprint represents the backend configuration that the model runs with.
|
||||
|
||||
Can be used in conjunction with the `seed` request parameter to understand when
|
||||
backend changes have been made that might impact determinism.
|
||||
"""
|
||||
|
||||
usage: Optional[Usage] = None
|
||||
"""Usage statistics for the completion request."""
|
||||
|
||||
_hidden_params: dict = {}
|
||||
|
||||
class GenerationRequest(BaseModel):
|
||||
messages: List[Message]
|
||||
max_tokens: Optional[int] = 1024
|
||||
stream: Optional[bool] = False
|
||||
temperature: Optional[float] = 0.1
|
||||
|
||||
|
||||
# openai compatible generation
|
||||
@router.post("/v1/chat/completions")
|
||||
def v1_chat_generate(request_data: V1ChatGenerateRequest):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
|
||||
This endpoint is compatible with open ai API and mistralAI API
|
||||
Args:
|
||||
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||
async def v1_chat_completions(request: GenerationRequest):
|
||||
try:
|
||||
messages = request.messages
|
||||
text = ""
|
||||
for message in messages:
|
||||
text += f"{message.role}: {message.content}\n"
|
||||
n_predict = request.max_tokens if request.max_tokens>0 else 1024
|
||||
stream = request.stream
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
messages = request_data.messages
|
||||
text = ""
|
||||
for message in messages:
|
||||
text += f"{message['role']}: {message['content']}\n"
|
||||
n_predict = request_data.max_tokens
|
||||
stream = request_data.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":"","waiting":True,"new":[]}
|
||||
def generate_chunks():
|
||||
lk = threading.Lock()
|
||||
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
if elf_server.cancel_gen:
|
||||
return False
|
||||
if chunk is None:
|
||||
return
|
||||
output["text"] += chunk
|
||||
# Yield each chunk of data
|
||||
lk.acquire()
|
||||
try:
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
lk.release()
|
||||
return False
|
||||
else:
|
||||
output["new"].append(chunk)
|
||||
lk.release()
|
||||
return True
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lk.release()
|
||||
return True
|
||||
def chunks_builder():
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request.temperature or elf_server.config.temperature
|
||||
)
|
||||
output["waiting"] = False
|
||||
thread = threading.Thread(target=chunks_builder)
|
||||
thread.start()
|
||||
current_index = 0
|
||||
while (output["waiting"] and elf_server.cancel_gen == False):
|
||||
while (output["waiting"] and len(output["new"])==0):
|
||||
time.sleep(0.001)
|
||||
lk.acquire()
|
||||
for i in range(len(output["new"])):
|
||||
output_val = StreamingModelResponse(
|
||||
id = _generate_id(),
|
||||
choices = [StreamingChoices(index= current_index, delta=Delta(content=output["new"][i]))],
|
||||
created=int(time.time()),
|
||||
model=elf_server.config.model_name,
|
||||
usage=Usage(prompt_tokens= 0, completion_tokens= 10)
|
||||
)
|
||||
current_index += 1
|
||||
yield output_val
|
||||
output["new"]=[]
|
||||
lk.release()
|
||||
elf_server.cancel_gen = False
|
||||
|
||||
return StreamingResponse(iter(generate_chunks()))
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
if chunk is None:
|
||||
return
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
@ -201,65 +405,62 @@ def v1_chat_generate(request_data: V1ChatGenerateRequest):
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request.temperature or elf_server.config.temperature
|
||||
)
|
||||
return ModelResponse(id = _generate_id(), choices = [Choices(message=output["text"])], created=int(time.time()))
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
return None
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
elf_server.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
|
||||
|
||||
|
||||
# openai compatible generation
|
||||
@router.post("/v1/completions")
|
||||
def v1_instruct_generate(request_data: V1InstructGenerateRequest):
|
||||
async def v1_completion(request: Request):
|
||||
"""
|
||||
Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
|
||||
This endpoint is compatible with open ai API and mistralAI API
|
||||
Args:
|
||||
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
|
||||
Executes Python code and returns the output.
|
||||
|
||||
Returns:
|
||||
- If the elf_server binding is not None:
|
||||
- If stream is True, returns a StreamingResponse of generated text chunks.
|
||||
- If stream is False, returns the generated text as a string.
|
||||
- If the elf_server binding is None, returns None.
|
||||
"""
|
||||
|
||||
text = request_data.prompt
|
||||
n_predict = request_data.max_tokens
|
||||
stream = request_data.stream
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
:param request: The HTTP request object.
|
||||
:return: A JSON response with the status of the operation.
|
||||
"""
|
||||
|
||||
try:
|
||||
data = (await request.json())
|
||||
text = data.get("prompt")
|
||||
n_predict = data.get("max_tokens")
|
||||
stream = data.get("stream")
|
||||
|
||||
if elf_server.binding is not None:
|
||||
if stream:
|
||||
output = {"text":""}
|
||||
def generate_chunks():
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=data.get("temperature", elf_server.config.temperature)
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
@ -269,38 +470,20 @@ def v1_instruct_generate(request_data: V1InstructGenerateRequest):
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
yield chunk
|
||||
return True
|
||||
return iter(elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
))
|
||||
|
||||
return StreamingResponse(generate_chunks())
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=data.get("temperature", elf_server.config.temperature)
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
output = {"text":""}
|
||||
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
|
||||
# Yield each chunk of data
|
||||
output["text"] += chunk
|
||||
antiprompt = detect_antiprompt(output["text"])
|
||||
if antiprompt:
|
||||
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
|
||||
output["text"] = remove_text_from_string(output["text"],antiprompt)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
elf_server.binding.generate(
|
||||
text,
|
||||
n_predict,
|
||||
callback=callback,
|
||||
temperature=request_data.temperature
|
||||
)
|
||||
return output["text"]
|
||||
else:
|
||||
return None
|
||||
|
||||
return None
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
elf_server.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
|
||||
@router.post("/stop_gen")
|
||||
|
@ -61,6 +61,9 @@ def list_personalities(category:str):
|
||||
ASCIIColors.error(f"No personalities found. Using default one {ex}")
|
||||
return personalities
|
||||
|
||||
@router.get("/list_mounted_personalities")
|
||||
def list_mounted_personalities():
|
||||
return lollmsElfServer.config.personalities
|
||||
|
||||
@router.get("/get_all_personalities")
|
||||
def get_all_personalities():
|
||||
@ -501,10 +504,17 @@ async def set_active_personality_settings(request: Request):
|
||||
|
||||
# ------------------------------------------- Interaction with personas ------------------------------------------------
|
||||
@router.post("/post_to_personality")
|
||||
def post_to_personality(data):
|
||||
async def post_to_personality(request: Request):
|
||||
"""Post data to a personality"""
|
||||
if hasattr(lollmsElfServer.personality.processor,'handle_request'):
|
||||
return lollmsElfServer.personality.processor.handle_request(data)
|
||||
else:
|
||||
return {}
|
||||
|
||||
try:
|
||||
config_data = (await request.json())
|
||||
if hasattr(lollmsElfServer.personality.processor,'handle_request'):
|
||||
return lollmsElfServer.personality.processor.handle_request(config_data)
|
||||
else:
|
||||
return {}
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
lollmsElfServer.error(ex)
|
||||
return {"status":False,"error":str(ex)}
|
||||
|
||||
|
@ -58,152 +58,149 @@ def add_events(sio:socketio):
|
||||
run_async(partial(lollmsElfServer.socketio.emit,"busy", {"message":"I am busy. Come back later."}, to=client_id))
|
||||
ASCIIColors.warning(f"OOps request {client_id} refused!! Server busy")
|
||||
return
|
||||
def generate_text():
|
||||
lollmsElfServer.busy = True
|
||||
try:
|
||||
model = lollmsElfServer.model
|
||||
lollmsElfServer.connections[client_id]["is_generating"]=True
|
||||
lollmsElfServer.connections[client_id]["requested_stop"]=False
|
||||
prompt = data['prompt']
|
||||
tokenized = model.tokenize(prompt)
|
||||
personality_id = data.get('personality', -1)
|
||||
lollmsElfServer.busy = True
|
||||
try:
|
||||
model = lollmsElfServer.model
|
||||
lollmsElfServer.connections[client_id]["is_generating"]=True
|
||||
lollmsElfServer.connections[client_id]["requested_stop"]=False
|
||||
prompt = data['prompt']
|
||||
tokenized = model.tokenize(prompt)
|
||||
personality_id = data.get('personality', -1)
|
||||
|
||||
n_crop = data.get('n_crop', len(tokenized))
|
||||
if n_crop!=-1:
|
||||
prompt = model.detokenize(tokenized[-n_crop:])
|
||||
n_crop = data.get('n_crop', len(tokenized))
|
||||
if n_crop!=-1:
|
||||
prompt = model.detokenize(tokenized[-n_crop:])
|
||||
|
||||
n_predicts = data["n_predicts"]
|
||||
parameters = data.get("parameters",{
|
||||
"temperature":lollmsElfServer.config["temperature"],
|
||||
"top_k":lollmsElfServer.config["top_k"],
|
||||
"top_p":lollmsElfServer.config["top_p"],
|
||||
"repeat_penalty":lollmsElfServer.config["repeat_penalty"],
|
||||
"repeat_last_n":lollmsElfServer.config["repeat_last_n"],
|
||||
"seed":lollmsElfServer.config["seed"]
|
||||
})
|
||||
n_predicts = data["n_predicts"]
|
||||
parameters = data.get("parameters",{
|
||||
"temperature":lollmsElfServer.config["temperature"],
|
||||
"top_k":lollmsElfServer.config["top_k"],
|
||||
"top_p":lollmsElfServer.config["top_p"],
|
||||
"repeat_penalty":lollmsElfServer.config["repeat_penalty"],
|
||||
"repeat_last_n":lollmsElfServer.config["repeat_last_n"],
|
||||
"seed":lollmsElfServer.config["seed"]
|
||||
})
|
||||
|
||||
if personality_id==-1:
|
||||
# Raw text generation
|
||||
lollmsElfServer.answer = {"full_text":""}
|
||||
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
ASCIIColors.success(f"generated:{len(lollmsElfServer.answer['full_text'].split())} words", end='\r')
|
||||
if text is not None:
|
||||
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, to=client_id))
|
||||
if client_id in lollmsElfServer.connections:# Client disconnected
|
||||
if lollmsElfServer.connections[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
tk = model.tokenize(prompt)
|
||||
n_tokens = len(tk)
|
||||
fd = model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_predicts,n_tokens):])
|
||||
|
||||
try:
|
||||
ASCIIColors.print("warming up", ASCIIColors.color_bright_cyan)
|
||||
|
||||
generated_text = model.generate(fd,
|
||||
n_predict=n_predicts,
|
||||
callback=callback,
|
||||
temperature = parameters["temperature"],
|
||||
top_k = parameters["top_k"],
|
||||
top_p = parameters["top_p"],
|
||||
repeat_penalty = parameters["repeat_penalty"],
|
||||
repeat_last_n = parameters["repeat_last_n"],
|
||||
seed = parameters["seed"],
|
||||
)
|
||||
ASCIIColors.success(f"\ndone")
|
||||
|
||||
if client_id in lollmsElfServer.connections:
|
||||
if not lollmsElfServer.connections[client_id]["requested_stop"]:
|
||||
# Emit the generated text to the client
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
|
||||
except Exception as ex:
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
|
||||
ASCIIColors.error(f"\ndone")
|
||||
lollmsElfServer.busy = False
|
||||
else:
|
||||
try:
|
||||
personality: AIPersonality = lollmsElfServer.personalities[personality_id]
|
||||
ump = lollmsElfServer.config.discussion_prompt_separator +lollmsElfServer.config.user_name.strip() if lollmsElfServer.config.use_user_name_in_discussions else lollmsElfServer.personality.user_message_prefix
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
|
||||
lollmsElfServer.answer["full_text"] = ''
|
||||
full_discussion_blocks = lollmsElfServer.connections[client_id]["full_discussion_blocks"]
|
||||
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(ump)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
else:
|
||||
|
||||
full_discussion_blocks.append(ump)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
|
||||
if personality_id==-1:
|
||||
# Raw text generation
|
||||
lollmsElfServer.answer = {"full_text":""}
|
||||
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
ASCIIColors.success(f"generated:{len(lollmsElfServer.answer['full_text'].split())} words", end='\r')
|
||||
if text is not None:
|
||||
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, to=client_id))
|
||||
if client_id in lollmsElfServer.connections:# Client disconnected
|
||||
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text}, to=client_id))
|
||||
try:
|
||||
if lollmsElfServer.connections[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = model.tokenize(prompt)
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_predicts,n_tokens):])
|
||||
fd = personality.model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_cond_tk-personality.model_n_predicts,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
ASCIIColors.info("processing...")
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
else:
|
||||
ASCIIColors.info("generating...")
|
||||
generated_text = personality.model.generate(
|
||||
personality.personality_conditioning+fd,
|
||||
n_predict=personality.model_n_predicts,
|
||||
callback=callback)
|
||||
|
||||
try:
|
||||
ASCIIColors.print("warming up", ASCIIColors.color_bright_cyan)
|
||||
|
||||
generated_text = model.generate(fd,
|
||||
n_predict=n_predicts,
|
||||
callback=callback,
|
||||
temperature = parameters["temperature"],
|
||||
top_k = parameters["top_k"],
|
||||
top_p = parameters["top_p"],
|
||||
repeat_penalty = parameters["repeat_penalty"],
|
||||
repeat_last_n = parameters["repeat_last_n"],
|
||||
seed = parameters["seed"],
|
||||
)
|
||||
ASCIIColors.success(f"\ndone")
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
if client_id in lollmsElfServer.connections:
|
||||
if not lollmsElfServer.connections[client_id]["requested_stop"]:
|
||||
# Emit the generated text to the client
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
|
||||
except Exception as ex:
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
|
||||
ASCIIColors.error(f"\ndone")
|
||||
lollmsElfServer.busy = False
|
||||
else:
|
||||
try:
|
||||
personality: AIPersonality = lollmsElfServer.personalities[personality_id]
|
||||
ump = lollmsElfServer.config.discussion_prompt_separator +lollmsElfServer.config.user_name.strip() if lollmsElfServer.config.use_user_name_in_discussions else lollmsElfServer.personality.user_message_prefix
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
ASCIIColors.success("\ndone")
|
||||
|
||||
lollmsElfServer.answer["full_text"] = ''
|
||||
full_discussion_blocks = lollmsElfServer.connections[client_id]["full_discussion_blocks"]
|
||||
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(ump)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
else:
|
||||
|
||||
full_discussion_blocks.append(ump)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
|
||||
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text}, to=client_id))
|
||||
try:
|
||||
if lollmsElfServer.connections[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = personality.model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_cond_tk-personality.model_n_predicts,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
ASCIIColors.info("processing...")
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
else:
|
||||
ASCIIColors.info("generating...")
|
||||
generated_text = personality.model.generate(
|
||||
personality.personality_conditioning+fd,
|
||||
n_predict=personality.model_n_predicts,
|
||||
callback=callback)
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
ASCIIColors.success("\ndone")
|
||||
|
||||
# Emit the generated text to the client
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
|
||||
except Exception as ex:
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
|
||||
ASCIIColors.error(f"\ndone")
|
||||
lollmsElfServer.busy = False
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
# Emit the generated text to the client
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
|
||||
except Exception as ex:
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
|
||||
lollmsElfServer.busy = False
|
||||
ASCIIColors.error(f"\ndone")
|
||||
lollmsElfServer.busy = False
|
||||
except Exception as ex:
|
||||
trace_exception(ex)
|
||||
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
|
||||
lollmsElfServer.busy = False
|
||||
|
||||
# Start the text generation task in a separate thread
|
||||
task = lollmsElfServer.socketio.start_background_task(target=generate_text)
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user