mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-01-19 03:06:37 +00:00
bugfixes
This commit is contained in:
parent
9fe9d4e9c7
commit
ca4a4a7802
@ -1 +1 @@
|
||||
Subproject commit 003043c7e237fb6ce952c326df9da1404499d810
|
||||
Subproject commit 02543f5b23ad31cd587dffd4b8b8f2623a7ea3a2
|
188
lollms/server.py
188
lollms/server.py
@ -17,6 +17,7 @@ import logging
|
||||
import shutil
|
||||
import yaml
|
||||
import copy
|
||||
from threading import Thread
|
||||
|
||||
class LoLLMsServer:
|
||||
def __init__(self):
|
||||
@ -323,110 +324,125 @@ class LoLLMsServer:
|
||||
@self.socketio.on('cancel_generation')
|
||||
def cancel_generation(data):
|
||||
client_id = request.sid
|
||||
self.clients[client_id]["requested_stop"]=False
|
||||
self.clients[client_id]["requested_stop"]=True
|
||||
print(f"Client {client_id} requested canceling generation")
|
||||
emit("generation_canceled", {"message":"Generation is canceled."})
|
||||
self.socketio.sleep(0)
|
||||
|
||||
|
||||
@self.socketio.on('generate_text')
|
||||
def handle_generate_text(data):
|
||||
client_id = request.sid
|
||||
if not self.is_ready:
|
||||
emit("buzzy", {"message":"I am buzzy. Come back later."})
|
||||
self.socketio.sleep(0)
|
||||
return
|
||||
model = self.current_model
|
||||
client_id = request.sid
|
||||
self.clients[client_id]["is_generating"]=True
|
||||
self.clients[client_id]["requested_stop"]=False
|
||||
prompt = data['prompt']
|
||||
personality_id = data['personality']
|
||||
n_predicts = data["n_predicts"]
|
||||
if personality_id==-1:
|
||||
# Raw text generation
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
self.answer[0] = ''
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
self.answer[0] = self.answer[0] + text
|
||||
emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
def generate_text():
|
||||
model = self.current_model
|
||||
self.clients[client_id]["is_generating"]=True
|
||||
self.clients[client_id]["requested_stop"]=False
|
||||
prompt = data['prompt']
|
||||
personality_id = data['personality']
|
||||
n_predicts = data["n_predicts"]
|
||||
if personality_id==-1:
|
||||
# Raw text generation
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
self.answer = {"full_text":""}
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
print(f"generated:{len(self.answer['full_text'])} words", end='\r')
|
||||
self.answer["full_text"] = self.answer["full_text"] + text
|
||||
self.socketio.emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
if client_id in self.clients:# Client disconnected
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
tk = model.tokenize(prompt)
|
||||
n_tokens = len(tk)
|
||||
fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
|
||||
tk = model.tokenize(prompt)
|
||||
n_tokens = len(tk)
|
||||
fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
|
||||
|
||||
print("generating...", end="", flush=True)
|
||||
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
|
||||
ASCIIColors.success(f"ok")
|
||||
ASCIIColors.print("warm up", ASCIIColors.color_bright_cyan)
|
||||
generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
|
||||
ASCIIColors.success(f"\ndone")
|
||||
if client_id in self.clients:
|
||||
if not self.clients[client_id]["requested_stop"]:
|
||||
# Emit the generated text to the client
|
||||
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
|
||||
# Emit the generated text to the client
|
||||
emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
else:
|
||||
personality: AIPersonality = self.personalities[personality_id]
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
else:
|
||||
personality: AIPersonality = self.personalities[personality_id]
|
||||
personality.model = model
|
||||
cond_tk = personality.model.tokenize(personality.personality_conditioning)
|
||||
n_cond_tk = len(cond_tk)
|
||||
# Placeholder code for text generation
|
||||
# Replace this with your actual text generation logic
|
||||
print(f"Text generation requested by client: {client_id}")
|
||||
|
||||
self.answer[0] = ''
|
||||
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
||||
self.answer["full_text"] = ''
|
||||
full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
|
||||
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
if prompt != '':
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_input"]:
|
||||
preprocessed_prompt = personality.processor.process_model_input(prompt)
|
||||
else:
|
||||
preprocessed_prompt = prompt
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
else:
|
||||
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
self.answer["full_text"] = self.answer["full_text"] + text
|
||||
self.socketio.emit('text_chunk', {'chunk': text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
try:
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
|
||||
print("processing...", end="", flush=True)
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
else:
|
||||
ASCIIColors.info("generating...", end="", flush=True)
|
||||
generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)
|
||||
|
||||
full_discussion_blocks.append(personality.user_message_prefix)
|
||||
full_discussion_blocks.append(preprocessed_prompt)
|
||||
full_discussion_blocks.append(personality.link_text)
|
||||
full_discussion_blocks.append(personality.ai_message_prefix)
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
|
||||
|
||||
def callback(text, message_type: MSG_TYPE):
|
||||
if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
|
||||
self.answer[0] = self.answer[0] + text
|
||||
emit('text_chunk', {'chunk': text}, room=client_id)
|
||||
try:
|
||||
if self.clients[client_id]["requested_stop"]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
except: # If the client is disconnected then we stop talking to it
|
||||
return False
|
||||
|
||||
tk = personality.model.tokenize(full_discussion)
|
||||
n_tokens = len(tk)
|
||||
fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
|
||||
print("processing...", end="", flush=True)
|
||||
generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
|
||||
print(generated_text)
|
||||
else:
|
||||
print("generating...", end="", flush=True)
|
||||
generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)
|
||||
|
||||
if personality.processor is not None and personality.processor_cfg["process_model_output"]:
|
||||
generated_text = personality.processor.process_model_output(generated_text)
|
||||
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
print(f"{ASCIIColors.color_green}ok{ASCIIColors.color_reset}", end="", flush=True)
|
||||
|
||||
# Emit the generated text to the client
|
||||
emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
full_discussion_blocks.append(generated_text.strip())
|
||||
ASCIIColors.success("\ndone", end="", flush=True)
|
||||
|
||||
# Emit the generated text to the client
|
||||
self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
|
||||
self.socketio.sleep(0)
|
||||
# Start the text generation task in a separate thread
|
||||
self.socketio.start_background_task(target=generate_text,)
|
||||
generate_text()
|
||||
|
||||
def build_binding(self, bindings_path: Path, cfg: LOLLMSConfig)->LLMBinding:
|
||||
binding_path = Path(bindings_path) / cfg["binding_name"]
|
||||
# first find out if there is a requirements.txt file
|
||||
@ -450,7 +466,7 @@ class LoLLMsServer:
|
||||
return binding_class
|
||||
|
||||
|
||||
def run(self, host="localhost", port="9600"):
|
||||
def run(self, host="localhost", port="9601"):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--host', '-hst', default=host, help='Host name')
|
||||
parser.add_argument('--port', '-prt', default=port, help='Port number')
|
||||
@ -530,7 +546,7 @@ class LoLLMsServer:
|
||||
|
||||
self.menu.show_logo()
|
||||
print(f"{ASCIIColors.color_red}Current personality : {ASCIIColors.color_reset}{self.active_personality}")
|
||||
print("running...")
|
||||
ASCIIColors.info(f"Serving on address: http://{args.host}:{args.port}")
|
||||
|
||||
self.socketio.run(self.app, host=args.host, port=args.port)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user