From 79426ce9d06cde450a7f0c47f80a016169b9add0 Mon Sep 17 00:00:00 2001 From: ParisNeo Date: Mon, 24 Apr 2023 22:02:50 +0200 Subject: [PATCH] moved to threading instead of threadpools --- app.py | 15 +++++++++------ backends/llama_cpp/__init__.py | 27 +++++++++++++++------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/app.py b/app.py index 42deb15f..929772ce 100644 --- a/app.py +++ b/app.py @@ -20,7 +20,7 @@ import argparse import json import re import traceback -from concurrent.futures import ThreadPoolExecutor +import threading import sys from pyGpt4All.db import DiscussionsDB, Discussion from flask import ( @@ -263,8 +263,10 @@ class Gpt4AllWebUI(GPT4AllAPI): self.discussion_messages = self.prepare_query(message_id) self.prepare_reception() self.generating = True - app.config['executor'] = ThreadPoolExecutor(max_workers=1) - app.config['executor'].submit(self.generate_message) + # app.config['executor'] = ThreadPoolExecutor(max_workers=1) + # app.config['executor'].submit(self.generate_message) + tpe = threading.Thread(target=self.generate_message) + tpe.start() while self.generating: try: while not self.text_queue.empty(): @@ -279,7 +281,8 @@ class Gpt4AllWebUI(GPT4AllAPI): time.sleep(0.1) if self.cancel_gen: self.generating = False - app.config['executor'].shutdown(True) + tpe = None + gc.collect() print("## Done ##") self.current_discussion.update_message(response_id, self.bot_says) self.full_message_list.append(self.bot_says) @@ -583,8 +586,8 @@ if __name__ == "__main__": personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml") - executor = ThreadPoolExecutor(max_workers=1) - app.config['executor'] = executor + # executor = ThreadPoolExecutor(max_workers=1) + # app.config['executor'] = executor bot = Gpt4AllWebUI(app, config, personality, config_file_path) diff --git a/backends/llama_cpp/__init__.py b/backends/llama_cpp/__init__.py index f84b3e4b..3aec6563 100644 --- a/backends/llama_cpp/__init__.py +++ b/backends/llama_cpp/__init__.py @@ -50,15 +50,18 @@ class LLAMACPP(GPTBackend): new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None. verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False. """ - self.model.generate( - prompt, - new_text_callback=new_text_callback, - n_predict=n_predict, - temp=self.config['temp'], - top_k=self.config['top_k'], - top_p=self.config['top_p'], - repeat_penalty=self.config['repeat_penalty'], - repeat_last_n = self.config['repeat_last_n'], - n_threads=self.config['n_threads'], - verbose=verbose - ) + try: + self.model.generate( + prompt, + new_text_callback=new_text_callback, + n_predict=n_predict, + temp=self.config['temp'], + top_k=self.config['top_k'], + top_p=self.config['top_p'], + repeat_penalty=self.config['repeat_penalty'], + repeat_last_n = self.config['repeat_last_n'], + n_threads=self.config['n_threads'], + verbose=verbose + ) + except Exception as ex: + print(ex) \ No newline at end of file