mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-18 20:27:58 +00:00
Merge branch 'main' of https://github.com/ParisNeo/lollms
This commit is contained in:
commit
ae812c568e
@ -14,6 +14,7 @@ import shutil
|
||||
import subprocess
|
||||
import yaml
|
||||
from enum import Enum
|
||||
from lollms.helpers import ASCIIColors
|
||||
|
||||
class MSG_TYPE(Enum):
|
||||
MSG_TYPE_CHUNK=0
|
||||
@ -168,7 +169,7 @@ class APScript:
|
||||
antiprompt = self.personality.detect_antiprompt(bot_says)
|
||||
if antiprompt:
|
||||
self.bot_says = self.remove_text_from_string(bot_says,antiprompt)
|
||||
print("Detected hallucination")
|
||||
ASCIIColors.warning("Detected hallucination")
|
||||
return False
|
||||
else:
|
||||
self.bot_says = bot_says
|
||||
|
144
lollms/server.py
144
lollms/server.py
@ -125,7 +125,7 @@ class LoLLMsServer:
|
||||
self.socketio_log.addHandler(logging.StreamHandler())
|
||||
|
||||
self.initialize_routes()
|
||||
self.run()
|
||||
self.run(args.host, args.port)
|
||||
|
||||
|
||||
def load_binding(self):
|
||||
@ -457,15 +457,19 @@ class LoLLMsServer:
|
||||
fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
|
||||
|
||||
ASCIIColors.print("warm up", ASCIIColors.color_bright_cyan)
|
||||
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback,
|
||||
temperature = parameters["temperature"],
|
||||
top_k = parameters["top_k"],
|
||||
top_p = parameters["top_p"],
|
||||
repeat_penalty = parameters["repeat_penalty"],
|
||||
repeat_last_n = parameters["repeat_last_n"],
|
||||
seed = parameters["seed"]
|
||||
)
|
||||
ASCIIColors.success(f"\ndone")
|
||||
try:
|
||||
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback,
|
||||
temperature = parameters["temperature"],
|
||||
top_k = parameters["top_k"],
|
||||
top_p = parameters["top_p"],
|
||||
repeat_penalty = parameters["repeat_penalty"],
|
||||
repeat_last_n = parameters["repeat_last_n"],
|
||||
seed = parameters["seed"]
|
||||
)
|
||||
ASCIIColors.success(f"\ndone")
|
||||
except Exception as ex:
|
||||
self.socketio.emit('generation_error', {'error': str(ex)}, room=client_id)
|
||||
ASCIIColors.error(f"\ndone")
|
||||
if client_id in self.clients:
|
||||
if not self.clients[client_id]["requested_stop"]:
|
||||
# Emit the generated text to the client
|
||||
@ -473,72 +477,76 @@ class LoLLMsServer:
|
||||
self.socketio.sleep(0)
|
||||
self.is_ready = True
|
||||
else:
|
||||
personality: AIPersonality = self.personalities[personality_id]
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
try:
|
||||
personality: AIPersonality = self.personalities[personality_id]
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
|
||||
self.answer["full_text"] = ''
|
||||
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
||||
self.answer["full_text"] = ''
|
||||
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
||||
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
else:
|
||||
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
self.answer["full_text"] = self.answer["full_text"] + text
|
||||
self.socketio.emit('text_chunk', {'chunk': text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
try:
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
print("processing...", end="", flush=True)
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
else:
|
||||
ASCIIColors.info("generating...", end="", flush=True)
|
||||
generated_text = personality.model.generate(
|
||||
personality.personality_conditioning+fd,
|
||||
n_predict=personality.model_n_predicts,
|
||||
callback=callback)
|
||||
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
ASCIIColors.success("\ndone", end="", flush=True)
|
||||
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
self.answer["full_text"] = self.answer["full_text"] + text
|
||||
self.socketio.emit('text_chunk', {'chunk': text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
try:
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
print("processing...", end="", flush=True)
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
else:
|
||||
ASCIIColors.info("generating...", end="", flush=True)
|
||||
generated_text = personality.model.generate(
|
||||
personality.personality_conditioning+fd,
|
||||
n_predict=personality.model_n_predicts,
|
||||
callback=callback)
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
ASCIIColors.success("\ndone", end="", flush=True)
|
||||
|
||||
# Emit the generated text to the client
|
||||
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
# Emit the generated text to the client
|
||||
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
except Exception as ex:
|
||||
self.socketio.emit('generation_error', {'error': str(ex)}, room=client_id)
|
||||
ASCIIColors.error(f"\ndone")
|
||||
self.is_ready = True
|
||||
# Start the text generation task in a separate thread
|
||||
self.socketio.start_background_task(target=generate_text, once=True)
|
||||
|
Loading…
Reference in New Issue
Block a user