From 2d598cc5fe9c5e8956cdf5e508f4d8f412326da7 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Thu, 20 Apr 2023 19:30:03 +0200 Subject: [PATCH] New model format --- app.py | 55 ++++++++++- configs/default.yaml | 6 +- models/README.md | 5 +- personalities/gpt4all_chatbot.yaml | 47 --------- pyGpt4All/api.py | 17 ++-- pyGpt4All/backends/__init__.py | 6 ++ pyGpt4All/backends/backend.py | 37 ++++++++ pyGpt4All/backends/gpt_j.py | 60 ++++++++++++ pyGpt4All/backends/llamacpp.py | 62 ++++++++++++ pyGpt4All/config.py | 5 + pyGpt4All/db.py | 18 ++++ pyGpt4All/extension.py | 5 + requirements.txt | 5 +- requirements_dev.txt | 7 ++ setup.py | 35 +++++++ static/js/main.js | 2 +- static/js/settings.js | 148 +++++++++++++++++++++++++---- templates/settings.html | 15 +++ webui.bat | 9 +- 19 files changed, 453 insertions(+), 91 deletions(-) delete mode 100644 personalities/gpt4all_chatbot.yaml create mode 100644 pyGpt4All/backends/__init__.py create mode 100644 pyGpt4All/backends/backend.py create mode 100644 pyGpt4All/backends/gpt_j.py create mode 100644 pyGpt4All/backends/llamacpp.py create mode 100644 requirements_dev.txt create mode 100644 setup.py diff --git a/app.py b/app.py index c8f5e580..e48581d1 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,13 @@ # Made by the community for the community ###### +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + + + import argparse import json import re @@ -40,9 +47,18 @@ class Gpt4AllWebUI(GPT4AllAPI): self.app = _app + self.add_endpoint( + "/list_backends", "list_backends", self.list_backends, methods=["GET"] + ) self.add_endpoint( "/list_models", "list_models", self.list_models, methods=["GET"] ) + self.add_endpoint( + "/list_personalities_languages", "list_personalities_languages", self.list_personalities_languages, methods=["GET"] + ) + self.add_endpoint( + "/list_personalities_categories", "list_personalities_categories", self.list_personalities_categories, methods=["GET"] + ) self.add_endpoint( "/list_personalities", "list_personalities", self.list_personalities, methods=["GET"] ) @@ -55,6 +71,9 @@ class Gpt4AllWebUI(GPT4AllAPI): "/list_discussions", "list_discussions", self.list_discussions, methods=["GET"] ) + self.add_endpoint("/set_personality_language", "set_personality_language", self.set_personality_language, methods=["GET"]) + self.add_endpoint("/set_personality_category", "set_personality_category", self.set_personality_category, methods=["GET"]) + self.add_endpoint("/", "", self.index, methods=["GET"]) self.add_endpoint("/export_discussion", "export_discussion", self.export_discussion, methods=["GET"]) @@ -116,14 +135,30 @@ class Gpt4AllWebUI(GPT4AllAPI): "/help", "help", self.help, methods=["GET"] ) + def list_backends(self): + backends_dir = Path('./pyGpt4All/backends') # replace with the actual path to the models folder + backends = [f.stem for f in backends_dir.glob('*.py') if f.name!="backend" and f.stem!="__init__"] + return jsonify(backends) + def list_models(self): - models_dir = Path('./models') # replace with the actual path to the models folder + models_dir = Path('./models')/self.config["backend"] # replace with the actual path to the models folder models = [f.name for f in models_dir.glob('*.bin')] return jsonify(models) + + def list_personalities_languages(self): + personalities_languages_dir = Path(f'./personalities') # replace with the actual path to the models folder + personalities_languages = [f.stem for f in personalities_languages_dir.iterdir() if f.is_dir()] + return jsonify(personalities_languages) + + def list_personalities_categories(self): + personalities_categories_dir = Path(f'./personalities/{self.config["personality_language"]}') # replace with the actual path to the models folder + personalities_categories = [f.stem for f in personalities_categories_dir.iterdir() if f.is_dir()] + return jsonify(personalities_categories) + def list_personalities(self): - personalities_dir = Path('./personalities') # replace with the actual path to the models folder + personalities_dir = Path(f'./personalities/{self.config["personality_language"]}/{self.config["personality_category"]}') # replace with the actual path to the models folder personalities = [f.stem for f in personalities_dir.glob('*.yaml')] return jsonify(personalities) @@ -145,6 +180,16 @@ class Gpt4AllWebUI(GPT4AllAPI): return jsonify(discussions) + def set_personality_language(self): + lang = request.args.get('language') + self.config['personality_language'] = lang + return jsonify({'success':True}) + + def set_personality_category(self): + category = request.args.get('category') + self.config['personality_category'] = category + return jsonify({'success':True}) + def add_endpoint( self, endpoint=None, @@ -247,7 +292,7 @@ class Gpt4AllWebUI(GPT4AllAPI): return Response( stream_with_context( self.parse_to_prompt_stream(message, message_id) - ) + ), content_type='text/plain; charset=utf-8' ) @@ -284,7 +329,7 @@ class Gpt4AllWebUI(GPT4AllAPI): for message in messages: message["content"] = markdown.markdown(message["content"]) - return jsonify(messages) + return jsonify(messages), {'Content-Type': 'application/json; charset=utf-8'} def delete_discussion(self): data = request.get_json() @@ -470,7 +515,7 @@ if __name__ == "__main__": if arg_value is not None: config[arg_name] = arg_value - personality = load_config(f"personalities/{config['personality']}.yaml") + personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml") executor = ThreadPoolExecutor(max_workers=2) app.config['executor'] = executor diff --git a/configs/default.yaml b/configs/default.yaml index 2d637530..9c353b2a 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -5,9 +5,13 @@ debug: false n_threads: 8 host: localhost language: en-US +# Supported backends are llamacpp and gpt-j +backend: llama_cpp model: gpt4all-lora-quantized-ggml.bin n_predict: 1024 nb_messages_to_remember: 5 +personality_language: english +personality_category: general personality: gpt4all_chatbot port: 9600 repeat_last_n: 40 @@ -18,4 +22,4 @@ top_k: 50 top_p: 0.95 voice: "" use_gpu: false # Not active yet -auto_read: false \ No newline at end of file +auto_read: false diff --git a/models/README.md b/models/README.md index 35146387..a5415ec9 100644 --- a/models/README.md +++ b/models/README.md @@ -1 +1,4 @@ -Here you can drop your models \ No newline at end of file +Here you can drop your models depending on the selected backend +Currently, supported backends are: +- llamacpp +- gpt-j \ No newline at end of file diff --git a/personalities/gpt4all_chatbot.yaml b/personalities/gpt4all_chatbot.yaml deleted file mode 100644 index 7e339571..00000000 --- a/personalities/gpt4all_chatbot.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# GPT4All Chatbot conditionning file -# Author : @ParisNeo -# Version : 1.0 -# Description : -# An NLP needs conditionning to instruct it to be whatever we want it to be. -# This file is used by the GPT4All web ui to condition the personality of the model you are -# talking to. - -# Name of the personality -name: gpt4all - -# Name of the user -user_name: user - -# Language (see the list of supported languages here : https://github.com/ParisNeo/GPT4All_Personalities/blob/main/README.md) -language: "en_XX" - -# Category -category: "General" - -# Personality description: -personality_description: | - This personality is a helpful and Kind AI ready to help you solve your problems - -# The conditionning instructions sent to eh model at the start of the discussion -personality_conditionning: | - GPT4All is a smart and helpful AI chat bot built by Nomic-AI. It can generate stories on demand. - -#Welcome message to be sent to the user when a new discussion is started -welcome_message: "Welcome! I am GPT4All A free and open discussion AI. What can I do for you today?" - -# This prefix is added at the beginning of any message input by the user -user_message_prefix: "user: " - -# A text to put between user and chatbot messages -link_text: "\n" - -# This prefix is added at the beginning of any message output by the ai -ai_message_prefix: "gpt4all: " - -# Here is the list of extensions this personality requires -dependencies: [] - -# Some personalities need a disclaimer to warn the user of potential harm that can be caused by the AI -# for example, for medical assistants, it is important to tell the user to be careful and not use medication -# without advise from a real docor. -disclaimer: "" diff --git a/pyGpt4All/api.py b/pyGpt4All/api.py index a72ac0b8..63035883 100644 --- a/pyGpt4All/api.py +++ b/pyGpt4All/api.py @@ -11,11 +11,15 @@ import gc import sys from queue import Queue from datetime import datetime -from pyllamacpp.model import Model from pyGpt4All.db import DiscussionsDB +from pyGpt4All.backends import BACKENDS_LIST +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" class GPT4AllAPI(): - def __init__(self, config:dict, personality:dict, config_file_path) -> None: + def __init__(self, config:dict, personality:dict, config_file_path:str) -> None: self.config = config self.personality = personality self.config_file_path = config_file_path @@ -38,6 +42,9 @@ class GPT4AllAPI(): # This is used to keep track of messages self.full_message_list = [] + # Select backend + self.backend = BACKENDS_LIST[self.config["backend"]] + # Build chatbot self.chatbot_bindings = self.create_chatbot() print("Chatbot created successfully") @@ -66,11 +73,7 @@ class GPT4AllAPI(): def create_chatbot(self): try: - return Model( - ggml_model=f"./models/{self.config['model']}", - n_ctx=self.config['ctx_size'], - seed=self.config['seed'], - ) + return self.backend(self.config) except Exception as ex: print(f"Exception {ex}") return None diff --git a/pyGpt4All/backends/__init__.py b/pyGpt4All/backends/__init__.py new file mode 100644 index 00000000..92bf3d3d --- /dev/null +++ b/pyGpt4All/backends/__init__.py @@ -0,0 +1,6 @@ +from pyGpt4All.backends.llamacpp import LLAMACPP +from pyGpt4All.backends.gpt_j import GPT_J +BACKENDS_LIST={ + "llama_cpp":LLAMACPP, + "gpt_j":GPT_J +} diff --git a/pyGpt4All/backends/backend.py b/pyGpt4All/backends/backend.py new file mode 100644 index 00000000..781513ae --- /dev/null +++ b/pyGpt4All/backends/backend.py @@ -0,0 +1,37 @@ +###### +# Project : GPT4ALL-UI +# File : backend.py +# Author : ParisNeo with the help of the community +# Supported by Nomic-AI +# Licence : Apache 2.0 +# Description : +# This is an interface class for GPT4All-ui backends. +###### +from pathlib import Path +from typing import Callable + +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + + +class GPTBackend: + def __init__(self, config:dict) -> None: + self.config = config + def generate(self, + prompt:str, + n_predict: int = 128, + new_text_callback: Callable[[str], None] = None, + verbose: bool = False, + **gpt_params ): + """Generates text out of a prompt + This should ber implemented by child class + + Args: + prompt (str): The prompt to use for generation + n_predict (int, optional): Number of tokens to prodict. Defaults to 128. + new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None. + verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False. + """ + pass \ No newline at end of file diff --git a/pyGpt4All/backends/gpt_j.py b/pyGpt4All/backends/gpt_j.py new file mode 100644 index 00000000..c94c99c6 --- /dev/null +++ b/pyGpt4All/backends/gpt_j.py @@ -0,0 +1,60 @@ +###### +# Project : GPT4ALL-UI +# File : backend.py +# Author : ParisNeo with the help of the community +# Supported by Nomic-AI +# Licence : Apache 2.0 +# Description : +# This is an interface class for GPT4All-ui backends. +###### +from pathlib import Path +from typing import Callable +from gpt4allj import Model +from pyGpt4All.backends.backend import GPTBackend + +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + + +class GPT_J(GPTBackend): + def __init__(self, config:dict) -> None: + """Builds a GPT-J backend + + Args: + config (dict): The configuration file + """ + super().__init__(config) + self.config = config + self.model = Model( + ggml_model=f"./models/gptj/{self.config['model']}" + ) + + + def generate(self, + prompt:str, + n_predict: int = 128, + new_text_callback: Callable[[str], None] = bool, + verbose: bool = False, + **gpt_params ): + """Generates text out of a prompt + + Args: + prompt (str): The prompt to use for generation + n_predict (int, optional): Number of tokens to prodict. Defaults to 128. + new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None. + verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False. + """ + self.model.generate( + prompt, + new_text_callback=new_text_callback, + n_predict=n_predict, + temp=self.config['temp'], + top_k=self.config['top_k'], + top_p=self.config['top_p'], + repeat_penalty=self.config['repeat_penalty'], + repeat_last_n = self.config['repeat_last_n'], + n_threads=self.config['n_threads'], + verbose=verbose + ) diff --git a/pyGpt4All/backends/llamacpp.py b/pyGpt4All/backends/llamacpp.py new file mode 100644 index 00000000..b0a271a1 --- /dev/null +++ b/pyGpt4All/backends/llamacpp.py @@ -0,0 +1,62 @@ +###### +# Project : GPT4ALL-UI +# File : backend.py +# Author : ParisNeo with the help of the community +# Supported by Nomic-AI +# Licence : Apache 2.0 +# Description : +# This is an interface class for GPT4All-ui backends. +###### +from pathlib import Path +from typing import Callable +from pyllamacpp.model import Model +from pyGpt4All.backends.backend import GPTBackend + +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + + +class LLAMACPP(GPTBackend): + def __init__(self, config:dict) -> None: + """Builds a LLAMACPP backend + + Args: + config (dict): The configuration file + """ + super().__init__(config) + + self.model = Model( + ggml_model=f"./models/llamacpp/{self.config['model']}", + n_ctx=self.config['ctx_size'], + seed=self.config['seed'], + ) + + + def generate(self, + prompt:str, + n_predict: int = 128, + new_text_callback: Callable[[str], None] = bool, + verbose: bool = False, + **gpt_params ): + """Generates text out of a prompt + + Args: + prompt (str): The prompt to use for generation + n_predict (int, optional): Number of tokens to prodict. Defaults to 128. + new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None. + verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False. + """ + self.model.generate( + prompt, + new_text_callback=new_text_callback, + n_predict=n_predict, + temp=self.config['temp'], + top_k=self.config['top_k'], + top_p=self.config['top_p'], + repeat_penalty=self.config['repeat_penalty'], + repeat_last_n = self.config['repeat_last_n'], + n_threads=self.config['n_threads'], + verbose=verbose + ) diff --git a/pyGpt4All/config.py b/pyGpt4All/config.py index 8f211d8e..9cb27841 100644 --- a/pyGpt4All/config.py +++ b/pyGpt4All/config.py @@ -11,6 +11,11 @@ ###### import yaml +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + def load_config(file_path): with open(file_path, 'r') as stream: config = yaml.safe_load(stream) diff --git a/pyGpt4All/db.py b/pyGpt4All/db.py index 63444ff0..ae100eae 100644 --- a/pyGpt4All/db.py +++ b/pyGpt4All/db.py @@ -1,5 +1,12 @@ import sqlite3 + +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + + # =================================== Database ================================================================== class DiscussionsDB: MSG_TYPE_NORMAL = 0 @@ -13,6 +20,17 @@ class DiscussionsDB: create database schema """ db_version = 2 + # Verify encoding and change it if it is not complient + with sqlite3.connect(self.db_path) as conn: + # Execute a PRAGMA statement to get the current encoding of the database + cur = conn.execute('PRAGMA encoding') + current_encoding = cur.fetchone()[0] + + if current_encoding != 'UTF-8': + # The current encoding is not UTF-8, so we need to change it + print(f"The current encoding is {current_encoding}, changing to UTF-8...") + conn.execute('PRAGMA encoding = "UTF-8"') + conn.commit() print("Checking discussions database...") with sqlite3.connect(self.db_path) as conn: diff --git a/pyGpt4All/extension.py b/pyGpt4All/extension.py index 4bf00c23..db0b0d32 100644 --- a/pyGpt4All/extension.py +++ b/pyGpt4All/extension.py @@ -5,6 +5,11 @@ # it gives your code access to the model, the callback functions, the model conditionning etc from config import load_config, save_config +__author__ = "parisneo" +__github__ = "https://github.com/nomic-ai/gpt4all-ui" +__copyright__ = "Copyright 2023, " +__license__ = "Apache 2.0" + class Extension(): def __init__(self, metadata_file_path:str, app) -> None: self.app = app diff --git a/requirements.txt b/requirements.txt index 50f6b1e1..04c60984 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ flask nomic pytest -pyllamacpp==1.0.6 pyyaml -markdown \ No newline at end of file +markdown +pyllamacpp==1.0.6 +gpt4allj diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 00000000..04c60984 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,7 @@ +flask +nomic +pytest +pyyaml +markdown +pyllamacpp==1.0.6 +gpt4allj diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..d662bef4 --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +from pathlib import Path +from typing import Union + +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + + +def read_requirements(path: Union[str, Path]): + with open(path, "r") as file: + return file.read().splitlines() + + +requirements = read_requirements("requirements.txt") +requirements_dev = read_requirements("requirements_dev.txt") + +setuptools.setup( + name="GPT4Allui", + version="0.0.5", + author="Saifeddine ALOUI", + author_email="aloui.saifeddine@gmail.com", + description="A web ui for running chat models with different backends. Supports multiple personalities and extensions.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/nomic-ai/gpt4all-ui", + packages=setuptools.find_packages(), + install_requires=requirements, + extras_require={"dev": requirements_dev}, + classifiers=[ + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: Apache 2.0 License", + "Operating System :: OS Independent", + ], +) diff --git a/static/js/main.js b/static/js/main.js index 7dd1c837..fa8ca27c 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -93,7 +93,7 @@ function update_main(){ else{ // For the other enrtries, these are just the text of the chatbot for (const char of text) { - txt = hiddenElement.innerHTML; + txt = bot_msg.hiddenElement.innerHTML; if (char != '\f') { txt += char bot_msg.hiddenElement.innerHTML = txt; diff --git a/static/js/settings.js b/static/js/settings.js index 2f2e5fde..022c1ce6 100644 --- a/static/js/settings.js +++ b/static/js/settings.js @@ -3,7 +3,7 @@ fetch('/settings') .then(response => response.text()) .then(html => { document.getElementById('settings').innerHTML = html; - + backendInput = document.getElementById('backend'); modelInput = document.getElementById('model'); personalityInput = document.getElementById('personalities'); languageInput = document.getElementById('language'); @@ -54,6 +54,7 @@ fetch('/settings') .then((response) => response.json()) .then((data) => { console.log(data); + backendInput.value = data["backend"] modelInput.value = data["model"] personalityInput.value = data["personality"] languageInput.value = data["language"] @@ -89,6 +90,7 @@ fetch('/settings') // Get form values and put them in an object const formValues = { seed: seedInput.value, + backend: backendInput.value, model: modelInput.value, personality: personalityInput.value, language: languageInput.value, @@ -129,10 +131,16 @@ fetch('/settings') function populate_models(){ // Get a reference to the + +
+
+ + +
+
+ + +