diff --git a/lollms/server.py b/lollms/server.py index 10edc4e..d3acb2d 100644 --- a/lollms/server.py +++ b/lollms/server.py @@ -458,15 +458,19 @@ class LoLLMsServer: fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):]) ASCIIColors.print("warm up", ASCIIColors.color_bright_cyan) - generated_text = model.generate(fd, n_predict=n_predicts, callback=callback, - temperature = parameters["temperature"], - top_k = parameters["top_k"], - top_p = parameters["top_p"], - repeat_penalty = parameters["repeat_penalty"], - repeat_last_n = parameters["repeat_last_n"], - seed = parameters["seed"] - ) - ASCIIColors.success(f"\ndone") + try: + generated_text = model.generate(fd, n_predict=n_predicts, callback=callback, + temperature = parameters["temperature"], + top_k = parameters["top_k"], + top_p = parameters["top_p"], + repeat_penalty = parameters["repeat_penalty"], + repeat_last_n = parameters["repeat_last_n"], + seed = parameters["seed"] + ) + ASCIIColors.success(f"\ndone") + except Exception as ex: + self.socketio.emit('generation_error', {'error': str(ex)}, room=client_id) + ASCIIColors.error(f"\ndone") if client_id in self.clients: if not self.clients[client_id]["requested_stop"]: # Emit the generated text to the client @@ -474,72 +478,76 @@ class LoLLMsServer: self.socketio.sleep(0) self.is_ready = True else: - personality: AIPersonality = self.personalities[personality_id] - personality.model = model - cond_tk = personality.model.tokenize(personality.personality_conditioning) - n_cond_tk = len(cond_tk) - # Placeholder code for text generation - # Replace this with your actual text generation logic - print(f"Text generation requested by client: {client_id}") + try: + personality: AIPersonality = self.personalities[personality_id] + personality.model = model + cond_tk = personality.model.tokenize(personality.personality_conditioning) + n_cond_tk = len(cond_tk) + # Placeholder code for text generation + # Replace this with your actual text generation logic + print(f"Text generation requested by client: {client_id}") - self.answer["full_text"] = '' - full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"] + self.answer["full_text"] = '' + full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"] - if prompt != '': - if personality.processor is not None and personality.processor_cfg["process_model_input"]: - preprocessed_prompt = personality.processor.process_model_input(prompt) - else: - preprocessed_prompt = prompt + if prompt != '': + if personality.processor is not None and personality.processor_cfg["process_model_input"]: + preprocessed_prompt = personality.processor.process_model_input(prompt) + else: + preprocessed_prompt = prompt + + if personality.processor is not None and personality.processor_cfg["custom_workflow"]: + full_discussion_blocks.append(personality.user_message_prefix) + full_discussion_blocks.append(preprocessed_prompt) + + else: + + full_discussion_blocks.append(personality.user_message_prefix) + full_discussion_blocks.append(preprocessed_prompt) + full_discussion_blocks.append(personality.link_text) + full_discussion_blocks.append(personality.ai_message_prefix) + + full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks) + + def callback(text, message_type: MSG_TYPE): + if message_type == MSG_TYPE.MSG_TYPE_CHUNK: + self.answer["full_text"] = self.answer["full_text"] + text + self.socketio.emit('text_chunk', {'chunk': text}, room=client_id) + self.socketio.sleep(0) + try: + if self.clients[client_id]["requested_stop"]: + return False + else: + return True + except: # If the client is disconnected then we stop talking to it + return False + + tk = personality.model.tokenize(full_discussion) + n_tokens = len(tk) + fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):]) if personality.processor is not None and personality.processor_cfg["custom_workflow"]: - full_discussion_blocks.append(personality.user_message_prefix) - full_discussion_blocks.append(preprocessed_prompt) - + print("processing...", end="", flush=True) + generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback) else: + ASCIIColors.info("generating...", end="", flush=True) + generated_text = personality.model.generate( + personality.personality_conditioning+fd, + n_predict=personality.model_n_predicts, + callback=callback) - full_discussion_blocks.append(personality.user_message_prefix) - full_discussion_blocks.append(preprocessed_prompt) - full_discussion_blocks.append(personality.link_text) - full_discussion_blocks.append(personality.ai_message_prefix) + if personality.processor is not None and personality.processor_cfg["process_model_output"]: + generated_text = personality.processor.process_model_output(generated_text) - full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks) + full_discussion_blocks.append(generated_text.strip()) + ASCIIColors.success("\ndone", end="", flush=True) - def callback(text, message_type: MSG_TYPE): - if message_type == MSG_TYPE.MSG_TYPE_CHUNK: - self.answer["full_text"] = self.answer["full_text"] + text - self.socketio.emit('text_chunk', {'chunk': text}, room=client_id) - self.socketio.sleep(0) - try: - if self.clients[client_id]["requested_stop"]: - return False - else: - return True - except: # If the client is disconnected then we stop talking to it - return False - - tk = personality.model.tokenize(full_discussion) - n_tokens = len(tk) - fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):]) - - if personality.processor is not None and personality.processor_cfg["custom_workflow"]: - print("processing...", end="", flush=True) - generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback) - else: - ASCIIColors.info("generating...", end="", flush=True) - generated_text = personality.model.generate( - personality.personality_conditioning+fd, - n_predict=personality.model_n_predicts, - callback=callback) - - if personality.processor is not None and personality.processor_cfg["process_model_output"]: - generated_text = personality.processor.process_model_output(generated_text) - - full_discussion_blocks.append(generated_text.strip()) - ASCIIColors.success("\ndone", end="", flush=True) - - # Emit the generated text to the client - self.socketio.emit('text_generated', {'text': generated_text}, room=client_id) - self.socketio.sleep(0) + # Emit the generated text to the client + self.socketio.emit('text_generated', {'text': generated_text}, room=client_id) + self.socketio.sleep(0) + except Exception as ex: + self.socketio.emit('generation_error', {'error': str(ex)}, room=client_id) + ASCIIColors.error(f"\ndone") self.is_ready = True # Start the text generation task in a separate thread self.socketio.start_background_task(target=generate_text, once=True) diff --git a/setup.py b/setup.py index 8b073c7..6451ebb 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ def get_all_files(path): setuptools.setup( name="lollms", - version="1.2.10", + version="1.2.11", author="Saifeddine ALOUI", author_email="aloui.saifeddine@gmail.com", description="A python library for AI personality definition",