bugfixes

2025-04-16 06:56:33 +00:00 · 2023-06-19 12:47:24 +02:00 · 2023-06-19 12:47:24 +02:00 · ca4a4a7802
commit ca4a4a7802
parent 9fe9d4e9c7
3 changed files with 104 additions and 88 deletions
--- a/lollms/personalities_zoo
+++ b/lollms/personalities_zoo
@ -1 +1 @@
-Subproject commit 003043c7e237fb6ce952c326df9da1404499d810
+Subproject commit 02543f5b23ad31cd587dffd4b8b8f2623a7ea3a2
--- a/lollms/server.py
+++ b/lollms/server.py
@ -17,6 +17,7 @@ import logging
 import shutil
 import yaml
 import copy
+from threading import Thread

 class LoLLMsServer:
    def __init__(self):
@ -323,110 +324,125 @@ class LoLLMsServer:
        @self.socketio.on('cancel_generation')
        def cancel_generation(data):
            client_id = request.sid
-            self.clients[client_id]["requested_stop"]=False
+            self.clients[client_id]["requested_stop"]=True
            print(f"Client {client_id} requested canceling generation")
            emit("generation_canceled", {"message":"Generation is canceled."})
+            self.socketio.sleep(0)


        @self.socketio.on('generate_text')
        def handle_generate_text(data):
+            client_id = request.sid
            if not self.is_ready:
                emit("buzzy", {"message":"I am buzzy. Come back later."})
+                self.socketio.sleep(0)
                return
-            model = self.current_model
-            client_id = request.sid
-            self.clients[client_id]["is_generating"]=True
-            self.clients[client_id]["requested_stop"]=False
-            prompt          = data['prompt']
-            personality_id  = data['personality']
-            n_predicts      = data["n_predicts"]
-            if personality_id==-1:
-                # Raw text generation
-                print(f"Text generation requested by client: {client_id}")
-                self.answer[0] = ''
-                def callback(text, message_type: MSG_TYPE):
-                    if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
-                        self.answer[0] = self.answer[0] + text
-                        emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
-                    if self.clients[client_id]["requested_stop"]:
-                        return False
-                    else:
-                        return True
+            def generate_text():
+                model = self.current_model
+                self.clients[client_id]["is_generating"]=True
+                self.clients[client_id]["requested_stop"]=False
+                prompt          = data['prompt']
+                personality_id  = data['personality']
+                n_predicts      = data["n_predicts"]
+                if personality_id==-1:
+                    # Raw text generation
+                    print(f"Text generation requested by client: {client_id}")
+                    self.answer = {"full_text":""}
+                    def callback(text, message_type: MSG_TYPE):
+                        if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
+                            print(f"generated:{len(self.answer['full_text'])} words", end='\r')
+                            self.answer["full_text"] = self.answer["full_text"] + text
+                            self.socketio.emit('text_chunk', {'chunk': text, 'type':MSG_TYPE.MSG_TYPE_CHUNK.value}, room=client_id)
+                            self.socketio.sleep(0)
+                        if client_id in self.clients:# Client disconnected                      
+                            if self.clients[client_id]["requested_stop"]:
+                                return False
+                            else:
+                                return True
+                        else:
+                            return False                            

-                tk = model.tokenize(prompt)
-                n_tokens = len(tk)
-                fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])
+                    tk = model.tokenize(prompt)
+                    n_tokens = len(tk)
+                    fd = model.detokenize(tk[-min(self.config.ctx_size,n_tokens):])

-                print("generating...", end="", flush=True)
-                generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
-                ASCIIColors.success(f"ok")
+                    ASCIIColors.print("warm up", ASCIIColors.color_bright_cyan)
+                    generated_text = model.generate(fd, n_predict=n_predicts, callback=callback)
+                    ASCIIColors.success(f"\ndone")
+                    if client_id in self.clients:
+                        if not self.clients[client_id]["requested_stop"]:
+                            # Emit the generated text to the client
+                            self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)                
+                            self.socketio.sleep(0)

-                # Emit the generated text to the client
-                emit('text_generated', {'text': generated_text}, room=client_id)                
-            else:
-                personality: AIPersonality = self.personalities[personality_id]
-                personality.model = model
-                cond_tk = personality.model.tokenize(personality.personality_conditioning)
-                n_cond_tk = len(cond_tk)
-                # Placeholder code for text generation
-                # Replace this with your actual text generation logic
-                print(f"Text generation requested by client: {client_id}")
+                else:
+                    personality: AIPersonality = self.personalities[personality_id]
+                    personality.model = model
+                    cond_tk = personality.model.tokenize(personality.personality_conditioning)
+                    n_cond_tk = len(cond_tk)
+                    # Placeholder code for text generation
+                    # Replace this with your actual text generation logic
+                    print(f"Text generation requested by client: {client_id}")

-                self.answer[0] = ''
-                full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]
+                    self.answer["full_text"] = ''
+                    full_discussion_blocks = self.clients[client_id]["full_discussion_blocks"]

-                if prompt != '':
-                    if personality.processor is not None and personality.processor_cfg["process_model_input"]:
-                        preprocessed_prompt = personality.processor.process_model_input(prompt)
-                    else:
-                        preprocessed_prompt = prompt
+                    if prompt != '':
+                        if personality.processor is not None and personality.processor_cfg["process_model_input"]:
+                            preprocessed_prompt = personality.processor.process_model_input(prompt)
+                        else:
+                            preprocessed_prompt = prompt
+                        
+                        if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
+                            full_discussion_blocks.append(personality.user_message_prefix)
+                            full_discussion_blocks.append(preprocessed_prompt)
+                    
+                        else:
+
+                            full_discussion_blocks.append(personality.user_message_prefix)
+                            full_discussion_blocks.append(preprocessed_prompt)
+                            full_discussion_blocks.append(personality.link_text)
+                            full_discussion_blocks.append(personality.ai_message_prefix)
+
+                    full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
+
+                    def callback(text, message_type: MSG_TYPE):
+                        if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
+                            self.answer["full_text"] = self.answer["full_text"] + text
+                            self.socketio.emit('text_chunk', {'chunk': text}, room=client_id)
+                            self.socketio.sleep(0)
+                        try:
+                            if self.clients[client_id]["requested_stop"]:
+                                return False
+                            else:
+                                return True
+                        except: # If the client is disconnected then we stop talking to it
+                            return False
+
+                    tk = personality.model.tokenize(full_discussion)
+                    n_tokens = len(tk)
+                    fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
                    
                    if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
-                        full_discussion_blocks.append(personality.user_message_prefix)
-                        full_discussion_blocks.append(preprocessed_prompt)
-                
+                        print("processing...", end="", flush=True)
+                        generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
                    else:
+                        ASCIIColors.info("generating...", end="", flush=True)
+                        generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)

-                        full_discussion_blocks.append(personality.user_message_prefix)
-                        full_discussion_blocks.append(preprocessed_prompt)
-                        full_discussion_blocks.append(personality.link_text)
-                        full_discussion_blocks.append(personality.ai_message_prefix)
+                    if personality.processor is not None and personality.processor_cfg["process_model_output"]: 
+                        generated_text = personality.processor.process_model_output(generated_text)

-                full_discussion = personality.personality_conditioning + ''.join(full_discussion_blocks)
-
-                def callback(text, message_type: MSG_TYPE):
-                    if message_type == MSG_TYPE.MSG_TYPE_CHUNK:
-                        self.answer[0] = self.answer[0] + text
-                        emit('text_chunk', {'chunk': text}, room=client_id)
-                    try:
-                        if self.clients[client_id]["requested_stop"]:
-                            return False
-                        else:
-                            return True
-                    except: # If the client is disconnected then we stop talking to it
-                        return False
-
-                tk = personality.model.tokenize(full_discussion)
-                n_tokens = len(tk)
-                fd = personality.model.detokenize(tk[-min(self.config.ctx_size-n_cond_tk,n_tokens):])
-                
-                if personality.processor is not None and personality.processor_cfg["custom_workflow"]:
-                    print("processing...", end="", flush=True)
-                    generated_text = personality.processor.run_workflow(prompt, previous_discussion_text=personality.personality_conditioning+fd, callback=callback)
-                    print(generated_text)
-                else:
-                    print("generating...", end="", flush=True)
-                    generated_text = personality.model.generate(personality.personality_conditioning+fd, n_predict=personality.model_n_predicts, callback=callback)
-
-                if personality.processor is not None and personality.processor_cfg["process_model_output"]: 
-                    generated_text = personality.processor.process_model_output(generated_text)
-
-                full_discussion_blocks.append(generated_text.strip())
-                print(f"{ASCIIColors.color_green}ok{ASCIIColors.color_reset}", end="", flush=True)
-
-                # Emit the generated text to the client
-                emit('text_generated', {'text': generated_text}, room=client_id)
+                    full_discussion_blocks.append(generated_text.strip())
+                    ASCIIColors.success("\ndone", end="", flush=True)

+                    # Emit the generated text to the client
+                    self.socketio.emit('text_generated', {'text': generated_text}, room=client_id)
+                    self.socketio.sleep(0)
+            # Start the text generation task in a separate thread
+            self.socketio.start_background_task(target=generate_text,)
+            generate_text()
+            
    def build_binding(self, bindings_path: Path, cfg: LOLLMSConfig)->LLMBinding:
        binding_path = Path(bindings_path) / cfg["binding_name"]
        # first find out if there is a requirements.txt file
@ -450,7 +466,7 @@ class LoLLMsServer:
        return binding_class


-    def run(self, host="localhost", port="9600"):
+    def run(self, host="localhost", port="9601"):
        parser = argparse.ArgumentParser()
        parser.add_argument('--host', '-hst', default=host, help='Host name')
        parser.add_argument('--port', '-prt', default=port, help='Port number')
@ -530,7 +546,7 @@ class LoLLMsServer:

        self.menu.show_logo()
        print(f"{ASCIIColors.color_red}Current personality : {ASCIIColors.color_reset}{self.active_personality}")
-        print("running...")
+        ASCIIColors.info(f"Serving on address: http://{args.host}:{args.port}")

        self.socketio.run(self.app, host=args.host, port=args.port)

--- a/setup.py
+++ b/setup.py
@ -26,7 +26,7 @@ def get_all_files(path):

 setuptools.setup(
    name="lollms",
-    version="1.2.6",
+    version="1.2.8",
    author="Saifeddine ALOUI",
    author_email="aloui.saifeddine@gmail.com",
    description="A python library for AI personality definition",