From 79426ce9d06cde450a7f0c47f80a016169b9add0 Mon Sep 17 00:00:00 2001
From: ParisNeo <aloui.seifeddine@gmail.com>
Date: Mon, 24 Apr 2023 22:02:50 +0200
Subject: [PATCH] moved to threading instead of threadpools

---
 app.py                         | 15 +++++++++------
 backends/llama_cpp/__init__.py | 27 +++++++++++++++------------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/app.py b/app.py
index 42deb15f..929772ce 100644
--- a/app.py
+++ b/app.py
@@ -20,7 +20,7 @@ import argparse
 import json
 import re
 import traceback
-from concurrent.futures import ThreadPoolExecutor
+import threading
 import sys
 from pyGpt4All.db import DiscussionsDB, Discussion
 from flask import (
@@ -263,8 +263,10 @@ class Gpt4AllWebUI(GPT4AllAPI):
         self.discussion_messages = self.prepare_query(message_id)
         self.prepare_reception()
         self.generating = True
-        app.config['executor'] = ThreadPoolExecutor(max_workers=1)
-        app.config['executor'].submit(self.generate_message)
+        # app.config['executor'] = ThreadPoolExecutor(max_workers=1)
+        # app.config['executor'].submit(self.generate_message)
+        tpe = threading.Thread(target=self.generate_message)
+        tpe.start()
         while self.generating:
             try:
                 while not self.text_queue.empty():
@@ -279,7 +281,8 @@ class Gpt4AllWebUI(GPT4AllAPI):
                 time.sleep(0.1)
             if self.cancel_gen:
                 self.generating = False
-        app.config['executor'].shutdown(True)
+        tpe = None
+        gc.collect()
         print("## Done ##")
         self.current_discussion.update_message(response_id, self.bot_says)
         self.full_message_list.append(self.bot_says)
@@ -583,8 +586,8 @@ if __name__ == "__main__":
 
     personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml")
 
-    executor = ThreadPoolExecutor(max_workers=1)
-    app.config['executor'] = executor
+    # executor = ThreadPoolExecutor(max_workers=1)
+    # app.config['executor'] = executor
 
     bot = Gpt4AllWebUI(app, config, personality, config_file_path)
 
diff --git a/backends/llama_cpp/__init__.py b/backends/llama_cpp/__init__.py
index f84b3e4b..3aec6563 100644
--- a/backends/llama_cpp/__init__.py
+++ b/backends/llama_cpp/__init__.py
@@ -50,15 +50,18 @@ class LLAMACPP(GPTBackend):
             new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
             verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
         """
-        self.model.generate(
-            prompt,
-            new_text_callback=new_text_callback,
-            n_predict=n_predict,
-            temp=self.config['temp'],
-            top_k=self.config['top_k'],
-            top_p=self.config['top_p'],
-            repeat_penalty=self.config['repeat_penalty'],
-            repeat_last_n = self.config['repeat_last_n'],
-            n_threads=self.config['n_threads'],
-            verbose=verbose
-        )
+        try:
+            self.model.generate(
+                prompt,
+                new_text_callback=new_text_callback,
+                n_predict=n_predict,
+                temp=self.config['temp'],
+                top_k=self.config['top_k'],
+                top_p=self.config['top_p'],
+                repeat_penalty=self.config['repeat_penalty'],
+                repeat_last_n = self.config['repeat_last_n'],
+                n_threads=self.config['n_threads'],
+                verbose=verbose
+            )
+        except Exception as ex:
+            print(ex)
\ No newline at end of file