mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-20 05:08:00 +00:00
bugfixes
This commit is contained in:
parent
9fe9d4e9c7
commit
ca4a4a7802
@ -1 +1 @@
|
|||||||
Subproject commit 003043c7e237fb6ce952c326df9da1404499d810
|
Subproject commit 02543f5b23ad31cd587dffd4b8b8f2623a7ea3a2
|
188
lollms/server.py
188
lollms/server.py
@ -17,6 +17,7 @@ import logging
|
|||||||
import shutil
|
import shutil
|
||||||
import yaml
|
import yaml
|
||||||
import copy
|
import copy
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
class LoLLMsServer:
|
class LoLLMsServer:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -323,110 +324,125 @@ class LoLLMsServer:
|
|||||||
@self.socketio.on('cancel_generation')
|
@self.socketio.on('cancel_generation')
|
||||||
def cancel_generation(data):
|
def cancel_generation(data):
|
||||||
client_id = request.sid
|
client_id = request.sid
|
||||||
self.clients[client_id]["requested_stop"]=False
|
self.clients[client_id]["requested_stop"]=True
|
||||||
print(f"Client {client_id} requested canceling generation")
|
print(f"Client {client_id} requested canceling generation")
|
||||||
emit("generation_canceled", {"message":"Generation is canceled."})
|
emit("generation_canceled", {"message":"Generation is canceled."})
|
||||||
|
self.socketio.sleep(0)
|
||||||
|
|
||||||
|
|
||||||
@self.socketio.on('generate_text')
|
@self.socketio.on('generate_text')
|
||||||
def handle_generate_text(data):
|
def handle_generate_text(data):
|
||||||
|
client_id = request.sid
|
||||||
if not self.is_ready:
|
if not self.is_ready:
|
||||||
emit("buzzy", {"message":"I am buzzy. Come back later."})
|
emit("buzzy", {"message":"I am buzzy. Come back later."})
|
||||||
|
self.socketio.sleep(0)
|
||||||
return
|
return
|
||||||
model = self.current_model
|
def generate_text():
|
||||||
client_id = request.sid
|
model = self.current_model
|
||||||
self.clients[client_id]["is_generating"]=True
|
self.clients[client_id]["is_generating"]=True
|
||||||
self.clients[client_id]["requested_stop"]=False
|
self.clients[client_id]["requested_stop"]=False
|
||||||
prompt = data['prompt']
|
prompt = data['prompt']
|
||||||
personality_id = data['personality']
|
personality_id = data['personality']
|
||||||
n_predicts = data["n_predicts"]
|
n_predicts = data["n_predicts"]
|
||||||
if personality_id==-1:
|
if personality_id==-1:
|
||||||
# Raw text generation
|
# Raw text generation
|
||||||
print(f"Text generation requested by client: {client_id}")
|
print(f"Text generation requested by client: {client_id}")
|
||||||
self.answer[0] = ''
|
self.answer = {"full_text":""}
|
||||||
def callback(text, message_type: MSG_TYPE):
|
def callback(text, message_type: MSG_TYPE):
|
||||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||||
self.answer[0] = self.answer[0] + text
|
print(f"generated:{len(self.answer['full_text'])} words", end='\r')
|
||||||
emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
|
self.answer["full_text"] = self.answer["full_text"] + text
|
||||||
if self.clients[client_id]["requested_stop"]:
|
self.socketio.emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
|
||||||
return False
|
self.socketio.sleep(0)
|
||||||
else:
|
if client_id in self.clients:# Client disconnected
|
||||||
return True
|
if self.clients[client_id]["requested_stop"]:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
tk = model.tokenize(prompt)
|
tk = model.tokenize(prompt)
|
||||||
n_tokens = len(tk)
|
n_tokens = len(tk)
|
||||||
fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
|
fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
|
||||||
|
|
||||||
print("generating...", end="", flush=True)
|
ASCIIColors.print("warm up", ASCIIColors.color_bright_cyan)
|
||||||
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
|
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
|
||||||
ASCIIColors.success(f"ok")
|
ASCIIColors.success(f"\ndone")
|
||||||
|
if client_id in self.clients:
|
||||||
|
if not self.clients[client_id]["requested_stop"]:
|
||||||
|
# Emit the generated text to the client
|
||||||
|
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||||
|
self.socketio.sleep(0)
|
||||||
|
|
||||||
# Emit the generated text to the client
|
else:
|
||||||
emit('text_generated', {'text': generated_text}, room=client_id)
|
personality: AIPersonality = self.personalities[personality_id]
|
||||||
else:
|
personality.model = model
|
||||||
personality: AIPersonality = self.personalities[personality_id]
|
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||||
personality.model = model
|
n_cond_tk = len(cond_tk)
|
||||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
# Placeholder code for text generation
|
||||||
n_cond_tk = len(cond_tk)
|
# Replace this with your actual text generation logic
|
||||||
# Placeholder code for text generation
|
print(f"Text generation requested by client: {client_id}")
|
||||||
# Replace this with your actual text generation logic
|
|
||||||
print(f"Text generation requested by client: {client_id}")
|
|
||||||
|
|
||||||
self.answer[0] = ''
|
self.answer["full_text"] = ''
|
||||||
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
||||||
|
|
||||||
if prompt != '':
|
if prompt != '':
|
||||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||||
else:
|
else:
|
||||||
preprocessed_prompt = prompt
|
preprocessed_prompt = prompt
|
||||||
|
|
||||||
|
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||||
|
full_discussion_blocks.append(personality.user_message_prefix)
|
||||||
|
full_discussion_blocks.append(preprocessed_prompt)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
full_discussion_blocks.append(personality.user_message_prefix)
|
||||||
|
full_discussion_blocks.append(preprocessed_prompt)
|
||||||
|
full_discussion_blocks.append(personality.link_text)
|
||||||
|
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||||
|
|
||||||
|
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||||
|
|
||||||
|
def callback(text, message_type: MSG_TYPE):
|
||||||
|
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||||
|
self.answer["full_text"] = self.answer["full_text"] + text
|
||||||
|
self.socketio.emit('text_chunk', {'chunk': text}, room=client_id)
|
||||||
|
self.socketio.sleep(0)
|
||||||
|
try:
|
||||||
|
if self.clients[client_id]["requested_stop"]:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
except: # If the client is disconnected then we stop talking to it
|
||||||
|
return False
|
||||||
|
|
||||||
|
tk = personality.model.tokenize(full_discussion)
|
||||||
|
n_tokens = len(tk)
|
||||||
|
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
||||||
|
|
||||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||||
full_discussion_blocks.append(personality.user_message_prefix)
|
print("processing...", end="", flush=True)
|
||||||
full_discussion_blocks.append(preprocessed_prompt)
|
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
ASCIIColors.info("generating...", end="", flush=True)
|
||||||
|
generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)
|
||||||
|
|
||||||
full_discussion_blocks.append(personality.user_message_prefix)
|
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||||
full_discussion_blocks.append(preprocessed_prompt)
|
generated_text = personality.processor.process_model_output(generated_text)
|
||||||
full_discussion_blocks.append(personality.link_text)
|
|
||||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
|
||||||
|
|
||||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
full_discussion_blocks.append(generated_text.strip())
|
||||||
|
ASCIIColors.success("\ndone", end="", flush=True)
|
||||||
def callback(text, message_type: MSG_TYPE):
|
|
||||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
|
||||||
self.answer[0] = self.answer[0] + text
|
|
||||||
emit('text_chunk', {'chunk': text}, room=client_id)
|
|
||||||
try:
|
|
||||||
if self.clients[client_id]["requested_stop"]:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
except: # If the client is disconnected then we stop talking to it
|
|
||||||
return False
|
|
||||||
|
|
||||||
tk = personality.model.tokenize(full_discussion)
|
|
||||||
n_tokens = len(tk)
|
|
||||||
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
|
||||||
|
|
||||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
|
||||||
print("processing...", end="", flush=True)
|
|
||||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
|
||||||
print(generated_text)
|
|
||||||
else:
|
|
||||||
print("generating...", end="", flush=True)
|
|
||||||
generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)
|
|
||||||
|
|
||||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
|
||||||
generated_text = personality.processor.process_model_output(generated_text)
|
|
||||||
|
|
||||||
full_discussion_blocks.append(generated_text.strip())
|
|
||||||
print(f"{ASCIIColors.color_green}ok{ASCIIColors.color_reset}", end="", flush=True)
|
|
||||||
|
|
||||||
# Emit the generated text to the client
|
|
||||||
emit('text_generated', {'text': generated_text}, room=client_id)
|
|
||||||
|
|
||||||
|
# Emit the generated text to the client
|
||||||
|
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||||
|
self.socketio.sleep(0)
|
||||||
|
# Start the text generation task in a separate thread
|
||||||
|
self.socketio.start_background_task(target=generate_text,)
|
||||||
|
generate_text()
|
||||||
|
|
||||||
def build_binding(self, bindings_path: Path, cfg: LOLLMSConfig)->LLMBinding:
|
def build_binding(self, bindings_path: Path, cfg: LOLLMSConfig)->LLMBinding:
|
||||||
binding_path = Path(bindings_path) / cfg["binding_name"]
|
binding_path = Path(bindings_path) / cfg["binding_name"]
|
||||||
# first find out if there is a requirements.txt file
|
# first find out if there is a requirements.txt file
|
||||||
@ -450,7 +466,7 @@ class LoLLMsServer:
|
|||||||
return binding_class
|
return binding_class
|
||||||
|
|
||||||
|
|
||||||
def run(self, host="localhost", port="9600"):
|
def run(self, host="localhost", port="9601"):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--host', '-hst', default=host, help='Host name')
|
parser.add_argument('--host', '-hst', default=host, help='Host name')
|
||||||
parser.add_argument('--port', '-prt', default=port, help='Port number')
|
parser.add_argument('--port', '-prt', default=port, help='Port number')
|
||||||
@ -530,7 +546,7 @@ class LoLLMsServer:
|
|||||||
|
|
||||||
self.menu.show_logo()
|
self.menu.show_logo()
|
||||||
print(f"{ASCIIColors.color_red}Current personality : {ASCIIColors.color_reset}{self.active_personality}")
|
print(f"{ASCIIColors.color_red}Current personality : {ASCIIColors.color_reset}{self.active_personality}")
|
||||||
print("running...")
|
ASCIIColors.info(f"Serving on address: http://{args.host}:{args.port}")
|
||||||
|
|
||||||
self.socketio.run(self.app, host=args.host, port=args.port)
|
self.socketio.run(self.app, host=args.host, port=args.port)
|
||||||
|
|
||||||
|
2
setup.py
2
setup.py
@ -26,7 +26,7 @@ def get_all_files(path):
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="lollms",
|
name="lollms",
|
||||||
version="1.2.6",
|
version="1.2.8",
|
||||||
author="Saifeddine ALOUI",
|
author="Saifeddine ALOUI",
|
||||||
author_email="aloui.saifeddine@gmail.com",
|
author_email="aloui.saifeddine@gmail.com",
|
||||||
description="A python library for AI personality definition",
|
description="A python library for AI personality definition",
|
||||||
|
Loading…
Reference in New Issue
Block a user