moved to threading instead of threadpools

2025-03-01 03:36:18 +00:00 · 2023-04-24 22:02:50 +02:00 · 2023-04-24 22:02:50 +02:00 · 79426ce9d0
commit 79426ce9d0
parent 917600f412
2 changed files with 24 additions and 18 deletions
--- a/app.py
+++ b/app.py
@ -20,7 +20,7 @@ import argparse
 import json
 import re
 import traceback
-from concurrent.futures import ThreadPoolExecutor
+import threading
 import sys
 from pyGpt4All.db import DiscussionsDB, Discussion
 from flask import (
@ -263,8 +263,10 @@ class Gpt4AllWebUI(GPT4AllAPI):
        self.discussion_messages = self.prepare_query(message_id)
        self.prepare_reception()
        self.generating = True
-        app.config['executor'] = ThreadPoolExecutor(max_workers=1)
-        app.config['executor'].submit(self.generate_message)
+        # app.config['executor'] = ThreadPoolExecutor(max_workers=1)
+        # app.config['executor'].submit(self.generate_message)
+        tpe = threading.Thread(target=self.generate_message)
+        tpe.start()
        while self.generating:
            try:
                while not self.text_queue.empty():
@ -279,7 +281,8 @@ class Gpt4AllWebUI(GPT4AllAPI):
                time.sleep(0.1)
            if self.cancel_gen:
                self.generating = False
-        app.config['executor'].shutdown(True)
+        tpe = None
+        gc.collect()
        print("## Done ##")
        self.current_discussion.update_message(response_id, self.bot_says)
        self.full_message_list.append(self.bot_says)
@ -583,8 +586,8 @@ if __name__ == "__main__":

    personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml")

-    executor = ThreadPoolExecutor(max_workers=1)
-    app.config['executor'] = executor
+    # executor = ThreadPoolExecutor(max_workers=1)
+    # app.config['executor'] = executor

    bot = Gpt4AllWebUI(app, config, personality, config_file_path)

--- a/backends/llama_cpp/init.py
+++ b/backends/llama_cpp/init.py
@ -50,15 +50,18 @@ class LLAMACPP(GPTBackend):
            new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
            verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
        """
-        self.model.generate(
-            prompt,
-            new_text_callback=new_text_callback,
-            n_predict=n_predict,
-            temp=self.config['temp'],
-            top_k=self.config['top_k'],
-            top_p=self.config['top_p'],
-            repeat_penalty=self.config['repeat_penalty'],
-            repeat_last_n = self.config['repeat_last_n'],
-            n_threads=self.config['n_threads'],
-            verbose=verbose
-        )
+        try:
+            self.model.generate(
+                prompt,
+                new_text_callback=new_text_callback,
+                n_predict=n_predict,
+                temp=self.config['temp'],
+                top_k=self.config['top_k'],
+                top_p=self.config['top_p'],
+                repeat_penalty=self.config['repeat_penalty'],
+                repeat_last_n = self.config['repeat_last_n'],
+                n_threads=self.config['n_threads'],
+                verbose=verbose
+            )
+        except Exception as ex:
+            print(ex)