upgraded text

This commit is contained in:
Saifeddine ALOUI 2024-01-14 00:24:00 +01:00
parent 22e1ef85ce
commit 94425bce30
4 changed files with 496 additions and 303 deletions

View File

@ -44,6 +44,9 @@ class LollmsApplication(LoLLMsCom):
self.config = config
self.lollms_paths = lollms_paths
# TODO : implement
self.embedding_models = []
self.menu = MainMenu(self, callback)
self.mounted_personalities = []
self.personality:AIPersonality = None

View File

@ -8,13 +8,25 @@ description:
"""
from fastapi import APIRouter, Request
from fastapi import APIRouter, Request, Body
from lollms.server.elf_server import LOLLMSElfServer
from pydantic import BaseModel
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.utilities import detect_antiprompt, remove_text_from_string
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception
from ascii_colors import ASCIIColors
import time
import threading
from typing import List, Optional, Union
import random
import string
import json
def _generate_id(length=10):
letters_and_digits = string.ascii_letters + string.digits
random_id = ''.join(random.choice(letters_and_digits) for _ in range(length))
return random_id
class GenerateRequest(BaseModel):
text: str
@ -77,16 +89,30 @@ def get_generation_status():
# ----------------------------------- Generation -----------------------------------------
@router.post("/generate")
def lollms_generate(request_data: Request):
"""
Endpoint for generating text from prompts using the lollms fastapi server.
class LollmsGenerateRequest(BaseModel):
text: str
model_name: Optional[str] = None
personality: Optional[int] = None
n_predict: Optional[int] = 1024
stream: bool = False
temperature: float = None
top_k: Optional[int] = None
top_p: Optional[float] = None
repeat_penalty: Optional[float] = None
repeat_last_n: Optional[int] = None
seed: Optional[int] = None
n_threads: Optional[int] = None
@router.post("/lollms_generate")
async def lollms_generate(request: LollmsGenerateRequest):
""" Endpoint for generating text from prompts using the LoLLMs fastAPI server.
Args:
Data model for the Generate Request.
Attributes:
- text: str representing the input text prompt for text generation.
- text: str : representing the input text prompt for text generation.
- model_name: Optional[str] = None : The name of the model to be used (it should be one of the current models)
- personality_id: Optional[int] = None : The name of the mounted personality to be used (if a personality is None, the endpoint will just return a completion text). To get the list of mounted personalities, just use /list_mounted_personalities
- n_predict: int representing the number of predictions to generate.
- stream: bool indicating whether to stream the generated text or not.
- temperature: float representing the temperature parameter for text generation.
@ -97,21 +123,79 @@ def lollms_generate(request_data: Request):
- seed: int representing the seed for text generation.
- n_threads: int representing the number of threads for text generation.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
text = request_data["text"]
n_predict = request_data.get("n_predict", 1024)
stream = request_data.get("stream", False)
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
"""
try:
text = request.text
n_predict = request.n_predict
stream = request.stream
if elf_server.binding is not None:
if stream:
output = {"text":"","waiting":True,"new":[]}
def generate_chunks():
lk = threading.Lock()
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
if elf_server.cancel_gen:
return False
if chunk is None:
return
output["text"] += chunk
# Yield each chunk of data
lk.acquire()
try:
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
lk.release()
return False
else:
output["new"].append(chunk)
lk.release()
return True
except Exception as ex:
trace_exception(ex)
lk.release()
return True
def chunks_builder():
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request.temperature if request.temperature is not None else elf_server.config.temperature,
top_k=request.top_k if request.top_k is not None else elf_server.config.top_k,
top_p=request.top_p if request.top_p is not None else elf_server.config.top_p,
repeat_penalty=request.repeat_penalty if request.repeat_penalty is not None else elf_server.config.repeat_penalty,
repeat_last_n=request.repeat_last_n if request.repeat_last_n is not None else elf_server.config.repeat_last_n,
seed=request.seed if request.seed is not None else elf_server.config.seed,
n_threads=request.n_threads if request.n_threads is not None else elf_server.config.n_threads
)
output["waiting"] = False
thread = threading.Thread(target=chunks_builder)
thread.start()
current_index = 0
while (output["waiting"] and elf_server.cancel_gen == False):
while (output["waiting"] and len(output["new"])==0):
time.sleep(0.001)
lk.acquire()
for i in range(len(output["new"])):
current_index += 1
yield output["new"][i]
output["new"]=[]
lk.release()
elf_server.cancel_gen = False
return StreamingResponse(iter(generate_chunks()))
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
@ -121,79 +205,199 @@ def lollms_generate(request_data: Request):
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature,
top_k=request_data.top_k,
top_p=request_data.top_p,
repeat_penalty=request_data.repeat_penalty,
repeat_last_n=request_data.repeat_last_n,
seed=request_data.seed,
n_threads=request_data.n_threads
))
return StreamingResponse(generate_chunks())
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request.temperature if request.temperature is not None else elf_server.config.temperature,
top_k=request.top_k if request.top_k is not None else elf_server.config.top_k,
top_p=request.top_p if request.top_p is not None else elf_server.config.top_p,
repeat_penalty=request.repeat_penalty if request.repeat_penalty is not None else elf_server.config.repeat_penalty,
repeat_last_n=request.repeat_last_n if request.repeat_last_n is not None else elf_server.config.repeat_last_n,
seed=request.seed if request.seed is not None else elf_server.config.seed,
n_threads=request.n_threads if request.n_threads is not None else elf_server.config.n_threads
)
return output["text"]
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature,
top_k=request_data.top_k,
top_p=request_data.top_p,
repeat_penalty=request_data.repeat_penalty,
repeat_last_n=request_data.repeat_last_n,
seed=request_data.seed,
n_threads=request_data.n_threads
)
return output["text"]
else:
return None
return None
except Exception as ex:
trace_exception(ex)
elf_server.error(ex)
return {"status":False,"error":str(ex)}
# ----------------------- Open AI ----------------------------------------
class Message(BaseModel):
role: str
content: str
class Delta(BaseModel):
content : str = ""
role : str = "assistant"
class Choices(BaseModel):
finish_reason: Optional[str] = None,
index: Optional[int] = 0,
message: Optional[str] = "",
logprobs: Optional[float] = None
class Usage(BaseModel):
prompt_tokens: Optional[int]=0,
completion_tokens : Optional[int]=0,
completion_tokens : Optional[int]=0,
class StreamingChoices(BaseModel):
finish_reason : Optional[str] = "stop"
index : Optional[int] = 0
delta : Optional[Delta] = None
logprobs : Optional[List[float]|None] = None
class StreamingModelResponse(BaseModel):
id: str
"""A unique identifier for the completion."""
choices: List[StreamingChoices]
"""The list of completion choices the model generated for the input prompt."""
created: int
"""The Unix timestamp (in seconds) of when the completion was created."""
model: Optional[str] = None
"""The model used for completion."""
object: Optional[str] = "text_completion"
"""The object type, which is always "text_completion" """
system_fingerprint: Optional[str] = None
"""This fingerprint represents the backend configuration that the model runs with.
Can be used in conjunction with the `seed` request parameter to understand when
backend changes have been made that might impact determinism.
"""
usage: Optional[Usage] = None
"""Usage statistics for the completion request."""
_hidden_params: dict = {}
def encode(self, charset):
encoded = json.dumps(self.dict()).encode(charset)
return encoded
class ModelResponse(BaseModel):
id: str
"""A unique identifier for the completion."""
choices: List[Choices]
"""The list of completion choices the model generated for the input prompt."""
created: int
"""The Unix timestamp (in seconds) of when the completion was created."""
model: Optional[str] = None
"""The model used for completion."""
object: Optional[str] = "text_completion"
"""The object type, which is always "text_completion" """
system_fingerprint: Optional[str] = None
"""This fingerprint represents the backend configuration that the model runs with.
Can be used in conjunction with the `seed` request parameter to understand when
backend changes have been made that might impact determinism.
"""
usage: Optional[Usage] = None
"""Usage statistics for the completion request."""
_hidden_params: dict = {}
class GenerationRequest(BaseModel):
messages: List[Message]
max_tokens: Optional[int] = 1024
stream: Optional[bool] = False
temperature: Optional[float] = 0.1
# openai compatible generation
@router.post("/v1/chat/completions")
def v1_chat_generate(request_data: V1ChatGenerateRequest):
"""
Endpoint for generating text from prompts using the lollms fastapi server in chat completion mode.
This endpoint is compatible with open ai API and mistralAI API
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
async def v1_chat_completions(request: GenerationRequest):
try:
messages = request.messages
text = ""
for message in messages:
text += f"{message.role}: {message.content}\n"
n_predict = request.max_tokens if request.max_tokens>0 else 1024
stream = request.stream
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
messages = request_data.messages
text = ""
for message in messages:
text += f"{message['role']}: {message['content']}\n"
n_predict = request_data.max_tokens
stream = request_data.stream
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
if elf_server.binding is not None:
if stream:
output = {"text":"","waiting":True,"new":[]}
def generate_chunks():
lk = threading.Lock()
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
if elf_server.cancel_gen:
return False
if chunk is None:
return
output["text"] += chunk
# Yield each chunk of data
lk.acquire()
try:
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
lk.release()
return False
else:
output["new"].append(chunk)
lk.release()
return True
except Exception as ex:
trace_exception(ex)
lk.release()
return True
def chunks_builder():
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request.temperature or elf_server.config.temperature
)
output["waiting"] = False
thread = threading.Thread(target=chunks_builder)
thread.start()
current_index = 0
while (output["waiting"] and elf_server.cancel_gen == False):
while (output["waiting"] and len(output["new"])==0):
time.sleep(0.001)
lk.acquire()
for i in range(len(output["new"])):
output_val = StreamingModelResponse(
id = _generate_id(),
choices = [StreamingChoices(index= current_index, delta=Delta(content=output["new"][i]))],
created=int(time.time()),
model=elf_server.config.model_name,
usage=Usage(prompt_tokens= 0, completion_tokens= 10)
)
current_index += 1
yield output_val
output["new"]=[]
lk.release()
elf_server.cancel_gen = False
return StreamingResponse(iter(generate_chunks()))
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
if chunk is None:
return
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
@ -201,65 +405,62 @@ def v1_chat_generate(request_data: V1ChatGenerateRequest):
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
))
return StreamingResponse(generate_chunks())
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request.temperature or elf_server.config.temperature
)
return ModelResponse(id = _generate_id(), choices = [Choices(message=output["text"])], created=int(time.time()))
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
)
return output["text"]
else:
return None
return None
except Exception as ex:
trace_exception(ex)
elf_server.error(ex)
return {"status":False,"error":str(ex)}
# openai compatible generation
@router.post("/v1/completions")
def v1_instruct_generate(request_data: V1InstructGenerateRequest):
async def v1_completion(request: Request):
"""
Endpoint for generating text from prompts using the lollms fastapi server in instruct completion mode.
This endpoint is compatible with open ai API and mistralAI API
Args:
- request_data: GenerateRequest object containing the input text, number of predictions, and stream flag.
Executes Python code and returns the output.
Returns:
- If the elf_server binding is not None:
- If stream is True, returns a StreamingResponse of generated text chunks.
- If stream is False, returns the generated text as a string.
- If the elf_server binding is None, returns None.
"""
text = request_data.prompt
n_predict = request_data.max_tokens
stream = request_data.stream
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
:param request: The HTTP request object.
:return: A JSON response with the status of the operation.
"""
try:
data = (await request.json())
text = data.get("prompt")
n_predict = data.get("max_tokens")
stream = data.get("stream")
if elf_server.binding is not None:
if stream:
output = {"text":""}
def generate_chunks():
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=data.get("temperature", elf_server.config.temperature)
))
return StreamingResponse(generate_chunks())
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
@ -269,38 +470,20 @@ def v1_instruct_generate(request_data: V1InstructGenerateRequest):
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
yield chunk
return True
return iter(elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
))
return StreamingResponse(generate_chunks())
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=data.get("temperature", elf_server.config.temperature)
)
return output["text"]
else:
output = {"text":""}
def callback(chunk, chunk_type:MSG_TYPE=MSG_TYPE.MSG_TYPE_CHUNK):
# Yield each chunk of data
output["text"] += chunk
antiprompt = detect_antiprompt(output["text"])
if antiprompt:
ASCIIColors.warning(f"\nDetected hallucination with antiprompt: {antiprompt}")
output["text"] = remove_text_from_string(output["text"],antiprompt)
return False
else:
return True
elf_server.binding.generate(
text,
n_predict,
callback=callback,
temperature=request_data.temperature
)
return output["text"]
else:
return None
return None
except Exception as ex:
trace_exception(ex)
elf_server.error(ex)
return {"status":False,"error":str(ex)}
@router.post("/stop_gen")

View File

@ -61,6 +61,9 @@ def list_personalities(category:str):
ASCIIColors.error(f"No personalities found. Using default one {ex}")
return personalities
@router.get("/list_mounted_personalities")
def list_mounted_personalities():
return lollmsElfServer.config.personalities
@router.get("/get_all_personalities")
def get_all_personalities():
@ -501,10 +504,17 @@ async def set_active_personality_settings(request: Request):
# ------------------------------------------- Interaction with personas ------------------------------------------------
@router.post("/post_to_personality")
def post_to_personality(data):
async def post_to_personality(request: Request):
"""Post data to a personality"""
if hasattr(lollmsElfServer.personality.processor,'handle_request'):
return lollmsElfServer.personality.processor.handle_request(data)
else:
return {}
try:
config_data = (await request.json())
if hasattr(lollmsElfServer.personality.processor,'handle_request'):
return lollmsElfServer.personality.processor.handle_request(config_data)
else:
return {}
except Exception as ex:
trace_exception(ex)
lollmsElfServer.error(ex)
return {"status":False,"error":str(ex)}

View File

@ -58,152 +58,149 @@ def add_events(sio:socketio):
run_async(partial(lollmsElfServer.socketio.emit,"busy", {"message":"I am busy. Come back later."}, to=client_id))
ASCIIColors.warning(f"OOps request {client_id} refused!! Server busy")
return
def generate_text():
lollmsElfServer.busy = True
try:
model = lollmsElfServer.model
lollmsElfServer.connections[client_id]["is_generating"]=True
lollmsElfServer.connections[client_id]["requested_stop"]=False
prompt = data['prompt']
tokenized = model.tokenize(prompt)
personality_id = data.get('personality', -1)
lollmsElfServer.busy = True
try:
model = lollmsElfServer.model
lollmsElfServer.connections[client_id]["is_generating"]=True
lollmsElfServer.connections[client_id]["requested_stop"]=False
prompt = data['prompt']
tokenized = model.tokenize(prompt)
personality_id = data.get('personality', -1)
n_crop = data.get('n_crop', len(tokenized))
if n_crop!=-1:
prompt = model.detokenize(tokenized[-n_crop:])
n_crop = data.get('n_crop', len(tokenized))
if n_crop!=-1:
prompt = model.detokenize(tokenized[-n_crop:])
n_predicts = data["n_predicts"]
parameters = data.get("parameters",{
"temperature":lollmsElfServer.config["temperature"],
"top_k":lollmsElfServer.config["top_k"],
"top_p":lollmsElfServer.config["top_p"],
"repeat_penalty":lollmsElfServer.config["repeat_penalty"],
"repeat_last_n":lollmsElfServer.config["repeat_last_n"],
"seed":lollmsElfServer.config["seed"]
})
n_predicts = data["n_predicts"]
parameters = data.get("parameters",{
"temperature":lollmsElfServer.config["temperature"],
"top_k":lollmsElfServer.config["top_k"],
"top_p":lollmsElfServer.config["top_p"],
"repeat_penalty":lollmsElfServer.config["repeat_penalty"],
"repeat_last_n":lollmsElfServer.config["repeat_last_n"],
"seed":lollmsElfServer.config["seed"]
})
if personality_id==-1:
# Raw text generation
lollmsElfServer.answer = {"full_text":""}
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
ASCIIColors.success(f"generated:{len(lollmsElfServer.answer['full_text'].split())} words", end='\r')
if text is not None:
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, to=client_id))
if client_id in lollmsElfServer.connections:# Client disconnected
if lollmsElfServer.connections[client_id]["requested_stop"]:
return False
else:
return True
else:
return False
tk = model.tokenize(prompt)
n_tokens = len(tk)
fd = model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_predicts,n_tokens):])
try:
ASCIIColors.print("warming up", ASCIIColors.color_bright_cyan)
generated_text = model.generate(fd,
n_predict=n_predicts,
callback=callback,
temperature = parameters["temperature"],
top_k = parameters["top_k"],
top_p = parameters["top_p"],
repeat_penalty = parameters["repeat_penalty"],
repeat_last_n = parameters["repeat_last_n"],
seed = parameters["seed"],
)
ASCIIColors.success(f"\ndone")
if client_id in lollmsElfServer.connections:
if not lollmsElfServer.connections[client_id]["requested_stop"]:
# Emit the generated text to the client
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
except Exception as ex:
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
ASCIIColors.error(f"\ndone")
lollmsElfServer.busy = False
else:
try:
personality: AIPersonality = lollmsElfServer.personalities[personality_id]
ump = lollmsElfServer.config.discussion_prompt_separator +lollmsElfServer.config.user_name.strip() if lollmsElfServer.config.use_user_name_in_discussions else lollmsElfServer.personality.user_message_prefix
personality.model = model
cond_tk = personality.model.tokenize(personality.personality_conditioning)
n_cond_tk = len(cond_tk)
# Placeholder code for text generation
# Replace this with your actual text generation logic
print(f"Text generation requested by client: {client_id}")
lollmsElfServer.answer["full_text"] = ''
full_discussion_blocks = lollmsElfServer.connections[client_id]["full_discussion_blocks"]
if prompt != '':
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
preprocessed_prompt = personality.processor.process_model_input(prompt)
else:
preprocessed_prompt = prompt
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
full_discussion_blocks.append(ump)
full_discussion_blocks.append(preprocessed_prompt)
else:
full_discussion_blocks.append(ump)
full_discussion_blocks.append(preprocessed_prompt)
full_discussion_blocks.append(personality.link_text)
full_discussion_blocks.append(personality.ai_message_prefix)
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
if personality_id==-1:
# Raw text generation
lollmsElfServer.answer = {"full_text":""}
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
ASCIIColors.success(f"generated:{len(lollmsElfServer.answer['full_text'].split())} words", end='\r')
if text is not None:
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, to=client_id))
if client_id in lollmsElfServer.connections:# Client disconnected
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text}, to=client_id))
try:
if lollmsElfServer.connections[client_id]["requested_stop"]:
return False
else:
return True
else:
return False
except: # If the client is disconnected then we stop talking to it
return False
tk = model.tokenize(prompt)
tk = personality.model.tokenize(full_discussion)
n_tokens = len(tk)
fd = model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_predicts,n_tokens):])
fd = personality.model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_cond_tk-personality.model_n_predicts,n_tokens):])
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
ASCIIColors.info("processing...")
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
else:
ASCIIColors.info("generating...")
generated_text = personality.model.generate(
personality.personality_conditioning+fd,
n_predict=personality.model_n_predicts,
callback=callback)
try:
ASCIIColors.print("warming up", ASCIIColors.color_bright_cyan)
generated_text = model.generate(fd,
n_predict=n_predicts,
callback=callback,
temperature = parameters["temperature"],
top_k = parameters["top_k"],
top_p = parameters["top_p"],
repeat_penalty = parameters["repeat_penalty"],
repeat_last_n = parameters["repeat_last_n"],
seed = parameters["seed"],
)
ASCIIColors.success(f"\ndone")
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
generated_text = personality.processor.process_model_output(generated_text)
if client_id in lollmsElfServer.connections:
if not lollmsElfServer.connections[client_id]["requested_stop"]:
# Emit the generated text to the client
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
except Exception as ex:
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
ASCIIColors.error(f"\ndone")
lollmsElfServer.busy = False
else:
try:
personality: AIPersonality = lollmsElfServer.personalities[personality_id]
ump = lollmsElfServer.config.discussion_prompt_separator +lollmsElfServer.config.user_name.strip() if lollmsElfServer.config.use_user_name_in_discussions else lollmsElfServer.personality.user_message_prefix
personality.model = model
cond_tk = personality.model.tokenize(personality.personality_conditioning)
n_cond_tk = len(cond_tk)
# Placeholder code for text generation
# Replace this with your actual text generation logic
print(f"Text generation requested by client: {client_id}")
full_discussion_blocks.append(generated_text.strip())
ASCIIColors.success("\ndone")
lollmsElfServer.answer["full_text"] = ''
full_discussion_blocks = lollmsElfServer.connections[client_id]["full_discussion_blocks"]
if prompt != '':
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
preprocessed_prompt = personality.processor.process_model_input(prompt)
else:
preprocessed_prompt = prompt
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
full_discussion_blocks.append(ump)
full_discussion_blocks.append(preprocessed_prompt)
else:
full_discussion_blocks.append(ump)
full_discussion_blocks.append(preprocessed_prompt)
full_discussion_blocks.append(personality.link_text)
full_discussion_blocks.append(personality.ai_message_prefix)
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
def callback(text, message_type: MSG_TYPE, metadata:dict={}):
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
lollmsElfServer.answer["full_text"] = lollmsElfServer.answer["full_text"] + text
run_async(partial(lollmsElfServer.socketio.emit,'text_chunk', {'chunk': text}, to=client_id))
try:
if lollmsElfServer.connections[client_id]["requested_stop"]:
return False
else:
return True
except: # If the client is disconnected then we stop talking to it
return False
tk = personality.model.tokenize(full_discussion)
n_tokens = len(tk)
fd = personality.model.detokenize(tk[-min(lollmsElfServer.config.ctx_size-n_cond_tk-personality.model_n_predicts,n_tokens):])
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
ASCIIColors.info("processing...")
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
else:
ASCIIColors.info("generating...")
generated_text = personality.model.generate(
personality.personality_conditioning+fd,
n_predict=personality.model_n_predicts,
callback=callback)
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
generated_text = personality.processor.process_model_output(generated_text)
full_discussion_blocks.append(generated_text.strip())
ASCIIColors.success("\ndone")
# Emit the generated text to the client
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
except Exception as ex:
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
ASCIIColors.error(f"\ndone")
lollmsElfServer.busy = False
except Exception as ex:
trace_exception(ex)
# Emit the generated text to the client
run_async(partial(lollmsElfServer.socketio.emit,'text_generated', {'text': generated_text}, to=client_id))
except Exception as ex:
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
lollmsElfServer.busy = False
ASCIIColors.error(f"\ndone")
lollmsElfServer.busy = False
except Exception as ex:
trace_exception(ex)
run_async(partial(lollmsElfServer.socketio.emit,'generation_error', {'error': str(ex)}, to=client_id))
lollmsElfServer.busy = False
# Start the text generation task in a separate thread
task = lollmsElfServer.socketio.start_background_task(target=generate_text)