Upgraded models

2025-04-10 12:19:58 +00:00 · 2023-05-17 17:38:40 +02:00 · 2023-05-17 17:38:40 +02:00 · 7102a3f8c2
commit 7102a3f8c2
parent 55fc07b072
18 changed files with 271 additions and 158 deletions
--- a/backends/c_transformers/init.py
+++ b/backends/c_transformers/init.py
@ -0,0 +1,106 @@
+######
+# Project       : GPT4ALL-UI
+# File          : backend.py
+# Author        : ParisNeo with the help of the community
+# Underlying backend : Abdeladim's pygptj backend
+# Supported by Nomic-AI
+# Licence       : Apache 2.0
+# Description   : 
+# This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to marella's backend
+# Follow him on his github project : https://github.com/marella/ctransformers
+
+######
+from pathlib import Path
+from typing import Callable
+from gpt4all_api.backend import GPTBackend
+import yaml
+from ctransformers import AutoModelForCausalLM
+
+__author__ = "parisneo"
+__github__ = "https://github.com/nomic-ai/gpt4all-ui"
+__copyright__ = "Copyright 2023, "
+__license__ = "Apache 2.0"
+
+backend_name = "GPTJ"
+
+class GPTJ(GPTBackend):
+    file_extension='*.bin'
+    def __init__(self, config:dict) -> None:
+        """Builds a LLAMACPP backend
+
+        Args:
+            config (dict): The configuration file
+        """
+        super().__init__(config, False)
+        if 'gpt2' in self.config['model']:
+            model_type='gpt2'
+        elif 'gptj' in self.config['model']:
+            model_type='gptj'
+        elif 'gpt_neox' in self.config['model']:
+            model_type='gpt_neox'
+        elif 'dolly-v2' in self.config['model']:
+            model_type='dolly-v2'
+        elif 'starcoder' in self.config['model']:
+            model_type='starcoder'
+        else:
+            print("The model you are using is not supported by this backend")
+            return
+        
+        
+        if self.config["use_avx2"]:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                    f"./models/c_transformers/{self.config['model']}", model_type=model_type
+                    )
+        else:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                    f"./models/c_transformers/{self.config['model']}", model_type=model_type, lib = "avx"
+                    )
+
+    def generate(self, 
+                 prompt:str,                  
+                 n_predict: int = 128,
+                 new_text_callback: Callable[[str], None] = bool,
+                 verbose: bool = False,
+                 **gpt_params ):
+        """Generates text out of a prompt
+
+        Args:
+            prompt (str): The prompt to use for generation
+            n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
+            new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
+            verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
+        """
+        try:
+            self.model.reset()
+            tokens = self.model.tokenize(prompt.encode())
+            for tok in self.model.generate(
+                                            tokens, 
+                                            seed=self.config['seed'],
+                                            n_threads=self.config['n_threads'],
+                                            n_predict=n_predict,
+                                            top_k=self.config['top_k'],
+                                            top_p=self.config['top_p'],
+                                            temp=self.config['temperature'],
+                                            repeat_penalty=self.config['repeat_penalty'],
+                                            repeat_last_n=self.config['repeat_last_n'],
+                                            n_batch=8,
+                                            reset=True,
+                                           ):
+                if not new_text_callback(self.model.detokenize(tok)):
+                    return
+        except Exception as ex:
+            print(ex)
+            
+            
+    @staticmethod
+    def get_available_models():
+        # Create the file path relative to the child class's directory
+        backend_path = Path(__file__).parent
+        file_path = backend_path/"models.yaml"
+
+        with open(file_path, 'r') as file:
+            yaml_data = yaml.safe_load(file)
+        
+        return yaml_data
--- a/backends/c_transformers/models.yaml
+++ b/backends/c_transformers/models.yaml
@ -0,0 +1,7 @@
+- bestGPTJ: 'true'
+  description: Current best commercially licensable model based on GPT-J and trained
+    by Nomic AI on the latest curated GPT4All dataset.
+  filename: ggml-gpt4all-j-v1.3-groovy.bin
+  filesize: '3785248281'
+  isDefault: 'true'
+  md5sum: 81a09a0ddf89690372fc296ff7f625af
--- a/backends/gpt_4all/init.py
+++ b/backends/gpt_4all/init.py
@ -6,6 +6,11 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+
+# This backend is a wrapper to gpt4all's official backend
+# Follow him on his github project : https://github.com/nomic-ai/gpt4all
+
 ######
 from pathlib import Path
 from typing import Callable
--- a/backends/gpt_j_a/init.py
+++ b/backends/gpt_j_a/init.py
@ -7,6 +7,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to abdeladim's backend
+# Follow him on his github project : https://github.com/abdeladim-s/pygptj
+
 ######
 from pathlib import Path
 from typing import Callable
--- a/backends/gpt_j_m/init.py
+++ b/backends/gpt_j_m/init.py
@ -7,6 +7,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to marella's backend
+# Follow him on his github project : https://github.com/marella/gpt4all-j 
+
 ######
 from pathlib import Path
 from typing import Callable
--- a/backends/hugging_face/models.yaml
+++ b/backends/hugging_face/models.yaml
@ -1,72 +0,0 @@
- bestGPTJ: 'true'
-  description: Current best commercially licensable model based on GPT-J and trained
-    by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.3-groovy.bin
-  filesize: '3785248281'
-  isDefault: 'true'
-  md5sum: 81a09a0ddf89690372fc296ff7f625af
- bestLlama: 'true'
-  description: Current best non-commercially licensable model based on Llama 13b and
-    trained by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-l13b-snoozy.bin
-  filesize: '8136770688'
-  md5sum: 91f886b68fbce697e9a3cd501951e455
- bestMPT: 'true'
-  description: Current best non-commercially licensable chat model based on MPT and
-    trained by Mosaic ML.
-  filename: ggml-mpt-7b-chat.bin
-  filesize: '4854401050'
-  isDefault: 'true'
-  md5sum: 756249d3d6abe23bde3b1ae272628640
-  requires: 2.4.1
- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v2 GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.2-jazzy.bin
-  filesize: '3785248281'
-  md5sum: 879344aaa9d62fdccbda0be7a09e7976
- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v1 GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.1-breezy.bin
-  filesize: '3785248281'
-  md5sum: 61d48a82cb188cceb14ebb8082bfec37
- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v0 GPT4All dataset.
-  filename: ggml-gpt4all-j.bin
-  filesize: '3785248281'
-  md5sum: 5b5a3f9b858d33b29b52b89692415595
- description: A non-commercially licensable model based on Llama 7b and trained by
-    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-7b-1.1-q4_2.bin
-  filesize: '4212859520'
-  md5sum: 29119f8fa11712704c6b22ac5ab792ea
- description: A non-commercially licensable model based on Llama 13b and trained
-    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-13b-1.1-q4_2.bin
-  filesize: '8136770688'
-  md5sum: 95999b7b0699e2070af63bf5d34101a8
- description: A non-commercially licensable model based on Llama 7b and trained by
-    Microsoft and Peking University.
-  filename: ggml-wizardLM-7B.q4_2.bin
-  filesize: '4212864640'
-  md5sum: 99e6d129745a3f1fb1121abed747b05a
- description: A non-commercially licensable model based on Llama 13b and RLHF trained
-    by Stable AI.
-  filename: ggml-stable-vicuna-13B.q4_2.bin
-  filesize: '8136777088'
-  md5sum: 6cb4ee297537c9133bddab9692879de0
- description: A commercially licensable model base pre-trained by Mosaic ML.
-  filename: ggml-mpt-7b-base.bin
-  filesize: '4854401028'
-  md5sum: 120c32a51d020066288df045ef5d52b9
-  requires: 2.4.1
- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
-    on ~180,000 instructions, trained by Nous Research.
-  filename: ggml-nous-gpt4-vicuna-13b.bin
-  filesize: '8136777088'
-  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
- description: A commericially licensable instruct model based on MPT and trained
-    by Mosaic ML.
-  filename: ggml-mpt-7b-instruct.bin
-  filesize: '4854401028'
-  md5sum: 1cfa4958f489f0a0d1ffdf6b37322809
-  requires: 2.4.1
--- a/backends/llama_cpp/models.yaml
+++ b/backends/llama_cpp/models.yaml
@ -1,47 +0,0 @@
- bestLlama: 'false'
-  description: The model who started it all
-  filename: gpt4all-lora-quantized-ggml.new.bin
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://huggingface.co/ParisNeo/GPT4All/resolve/main/
- bestLlama: 'false'
-  description: The model who started it all (uncensored version)
-  filename: gpt4all-lora-unfiltered-quantized.new.bin
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://huggingface.co/ParisNeo/GPT4All/resolve/main/
- bestLlama: 'true'
-  description: Current best non-commercially licensable model based on Llama 13b and
-    trained by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-l13b-snoozy.bin
-  filesize: '8136770688'
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://gpt4all.io/models/
- description: A non-commercially licensable model based on Llama 7b and trained by
-    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-7b-1.1-q4_2.bin
-  filesize: '4212859520'
-  md5sum: 29119f8fa11712704c6b22ac5ab792ea
-  server: https://gpt4all.io/models/
- description: A non-commercially licensable model based on Llama 13b and trained
-    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-13b-1.1-q4_2.bin
-  filesize: '8136770688'
-  md5sum: 95999b7b0699e2070af63bf5d34101a8
-  server: https://gpt4all.io/models/
- description: A non-commercially licensable model based on Llama 7b and trained by
-    Microsoft and Peking University.
-  filename: ggml-wizardLM-7B.q4_2.bin
-  filesize: '4212864640'
-  md5sum: 99e6d129745a3f1fb1121abed747b05a
-  server: https://gpt4all.io/models/
- description: A non-commercially licensable model based on Llama 13b and RLHF trained
-    by Stable AI.
-  filename: ggml-stable-vicuna-13B.q4_2.bin
-  filesize: '8136777088'
-  md5sum: 6cb4ee297537c9133bddab9692879de0
-  server: https://gpt4all.io/models/
- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
-    on ~180,000 instructions, trained by Nous Research.
-  filename: ggml-nous-gpt4-vicuna-13b.bin
-  filesize: '8136777088'
-  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
-  server: https://gpt4all.io/models/
--- a/backends/llama_cpp_official/init.py
+++ b/backends/llama_cpp_official/init.py
@ -6,38 +6,39 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to the official llamacpp python bindings
+# Follow him on his github project : https://github.com/abetlen/llama-cpp-python
+
 ######
 from pathlib import Path
 from typing import Callable
-from accelerate import init_empty_weights
-from accelerate import load_checkpoint_and_dispatch
-from transformers import AutoTokenizer
-from transformers import AutoConfig, AutoModelForCausalLM
+from llama_cpp import Llama
 from gpt4all_api.backend import GPTBackend
-import torch
+import yaml
+import random

 __author__ = "parisneo"
-__github__ = "https://github.com/ParisNeo/GPTQ_backend"
+__github__ = "https://github.com/nomic-ai/gpt4all-ui"
 __copyright__ = "Copyright 2023, "
 __license__ = "Apache 2.0"

-backend_name = "HuggingFace"
+backend_name = "LLAMACPP"

-class HuggingFace(GPTBackend):
-    file_extension='*'
+class LLAMACPP(GPTBackend):
+    file_extension='*.bin'
    def __init__(self, config:dict) -> None:
-        """Builds a HuggingFace backend
+        """Builds a LLAMACPP backend

        Args:
            config (dict): The configuration file
        """
        super().__init__(config, False)
-
-
-        # load quantized model, currently only support cpu or single gpu
-        config_path = AutoConfig.from_pretrained(config["model"])
-        self.tokenizer = AutoTokenizer.from_pretrained(config["model"])
-        self.model = AutoModelForCausalLM.from_pretrained(config["model"], load_in_8bit=True, device_map='auto')
+        seed = config["seed"]
+        if seed <=0:
+            seed = random.randint(1, 2**31)
+            
+        self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_gpu_layers=40, seed=seed)

    def generate(self, 
                 prompt:str,                  
@ -54,30 +55,31 @@ class HuggingFace(GPTBackend):
            verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
        """
        try:
-            tok = self.tokenizer.decode(self.model.generate(**self.tokenizer(prompt, return_tensors="pt").to("cuda:0"))[0])
-            new_text_callback(tok)
-            """
            self.model.reset()
-            for tok in self.model.generate(prompt, 
-                                           n_predict=n_predict,                                           
-                                            temp=self.config['temp'],
+            tokens = self.model.tokenize(prompt.encode())
+            count = 0
+            for tok in self.model.generate(tokens, 
+                                            temp=self.config['temperature'],
                                            top_k=self.config['top_k'],
                                            top_p=self.config['top_p'],
                                            repeat_penalty=self.config['repeat_penalty'],
-                                            repeat_last_n = self.config['repeat_last_n'],
-                                            n_threads=self.config['n_threads'],
                                           ):
-                if not new_text_callback(tok):
+                if count >= n_predict or (tok == self.model.token_eos()):
+                    break
+                word = self.model.detokenize([tok]).decode()
+                if not new_text_callback(word):
                    return
-            """
+                count += 1
        except Exception as ex:
            print(ex)
            
    @staticmethod
-    def list_models(config:dict):
-        """Lists the models for this backend
-        """
+    def get_available_models():
+        # Create the file path relative to the child class's directory
+        backend_path = Path(__file__).parent
+        file_path = backend_path/"models.yaml"
+
+        with open(file_path, 'r') as file:
+            yaml_data = yaml.safe_load(file)
        
-        return [
-            "EleutherAI/gpt-j-6B"
-        ]
+        return yaml_data
--- a/backends/llama_cpp_official/models.yaml
+++ b/backends/llama_cpp_official/models.yaml
@ -0,0 +1,29 @@
+- bestLlama: 'true'
+  license: Non commercial
+  description: The official open assistant 30B model finally here
+  filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin
+  sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5
+  server: https://huggingface.co/TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardVicuna model 13B
+  filename: wizard-vicuna-13B.ggml.q4_0.bin
+  sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5
+  server: https://huggingface.co/TheBloke/wizard-vicuna-13B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardLM model 7B
+  filename: WizardLM-7B-uncensored.ggml.q4_0.bin
+  sha256: b1e53a3c3a9389b9c5d81e0813cfb90ebaff6acad1733fad08cd28974fa3ac30
+  server: https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardLM model uncensored
+  filename: WizardLM-7B-uncensored.ggml.q4_0.bin
+  md5sum: b1e53a3c3a9389b9c5d81e0813cfb90ebaff6acad1733fad08cd28974fa3ac30
+  server: https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/
+
+
--- a/backends/llama_cpp_official/requirements.txt
+++ b/backends/llama_cpp_official/requirements.txt
@ -0,0 +1 @@
+llama-cpp-python
--- a/backends/py_llama_cpp/init.py
+++ b/backends/py_llama_cpp/init.py
@ -6,6 +6,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to abdeladim's backend
+# Follow him on his github project : https://github.com/abdeladim-s/pyllamacpp
+
 ######
 from pathlib import Path
 from typing import Callable
@ -18,9 +22,9 @@ __github__ = "https://github.com/nomic-ai/gpt4all-ui"
 __copyright__ = "Copyright 2023, "
 __license__ = "Apache 2.0"

-backend_name = "LLAMACPP"
+backend_name = "PyLLAMACPP"

-class LLAMACPP(GPTBackend):
+class PyLLAMACPP(GPTBackend):
    file_extension='*.bin'
    def __init__(self, config:dict) -> None:
        """Builds a LLAMACPP backend
@ -31,7 +35,7 @@ class LLAMACPP(GPTBackend):
        super().__init__(config, False)
        
        self.model = Model(
-                model_path=f"./models/llama_cpp/{self.config['model']}",
+                model_path=f"./models/py_llama_cpp/{self.config['model']}",
                prompt_context="", prompt_prefix="", prompt_suffix="",
                n_ctx=self.config['ctx_size'], 
                seed=self.config['seed'],
--- a/backends/py_llama_cpp/models.yaml
+++ b/backends/py_llama_cpp/models.yaml
@ -0,0 +1,11 @@
+- bestLlama: 'true'
+  description: The official open assistant 30B model finally here
+  filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin
+  md5sum: 91f886b68fbce697e9a3cd501951e455
+  server: https://huggingface.co/TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  description: Stable vicuna 13B
+  filename: stable-vicuna-13B.ggml.q5_1.bin
+  md5sum: 91f886b68fbce697e9a3cd501951e455
+  server: https://huggingface.co/TheBloke/stable-vicuna-13B-GGML/resolve/main/
--- a/backends/py_llama_cpp/requirements.txt
+++ b/backends/py_llama_cpp/requirements.txt
@ -0,0 +1 @@
+pyllamacpp
--- a/configs/default.yaml
+++ b/configs/default.yaml
@ -7,7 +7,7 @@ n_threads: 8
 host: localhost
 language: en-US
 # Supported backends are llamacpp and gpt-j
-backend: llama_cpp
+backend: llama_cpp_official
 model: null
 n_predict: 1024
 nb_messages_to_remember: 5
--- a/gpt4all_api/api.py
+++ b/gpt4all_api/api.py
@ -28,6 +28,58 @@ __license__ = "Apache 2.0"



+import subprocess
+import pkg_resources
+
+
+# ===========================================================
+# Manage automatic install scripts
+
+def is_package_installed(package_name):
+    try:
+        dist = pkg_resources.get_distribution(package_name)
+        return True
+    except pkg_resources.DistributionNotFound:
+        return False
+
+
+def install_package(package_name):
+    try:
+        # Check if the package is already installed
+        __import__(package_name)
+        print(f"{package_name} is already installed.")
+    except ImportError:
+        print(f"{package_name} is not installed. Installing...")
+        
+        # Install the package using pip
+        subprocess.check_call(["pip", "install", package_name])
+        
+        print(f"{package_name} has been successfully installed.")
+
+
+def parse_requirements_file(requirements_path):
+    with open(requirements_path, 'r') as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith('#'):
+                # Skip empty and commented lines
+                continue
+            package_name, _, version_specifier = line.partition('==')
+            package_name, _, version_specifier = line.partition('>=')
+            if is_package_installed(package_name):
+                # The package is already installed
+                print(f"{package_name} is already installed.")
+            else:
+                # The package is not installed, install it
+                if version_specifier:
+                    install_package(f"{package_name}{version_specifier}")
+                else:
+                    install_package(package_name)
+
+
+# ===========================================================
+
+
 class ModelProcess:
    def __init__(self, config=None):
        self.config = config
@ -42,7 +94,11 @@ class ModelProcess:
        self.model_ready  = mp.Value('i', 0)
        self.ready = False
            
-    def load_backend(self, backend_path):
+    def load_backend(self, backend_path:Path):
+        # first find out if there is a requirements.txt file
+        requirements_file = backend_path/"requirements.txt"
+        if requirements_file.exists():
+            parse_requirements_file(requirements_file)        

        # define the full absolute path to the module
        absolute_path = backend_path.resolve()
@ -88,6 +144,7 @@ class ModelProcess:
    
    def rebuild_backend(self, config):
        try:
+            
            backend = self.load_backend(Path("backends")/config["backend"])
            print("Backend loaded successfully")
        except Exception as ex:
@ -239,6 +296,7 @@ class ModelProcess:
        while not self.set_config_queue.empty():
            config = self.set_config_queue.get()
            if config is not None:
+                print("Inference process : Setting configuration")
                self._set_config(config)

    def _cancel_generation(self):
--- a/models/llama_cpp/.keep
+++ b/models/llama_cpp/.keep
--- a/models/llama_cpp_official/.keep
+++ b/models/llama_cpp_official/.keep
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,6 @@ nomic
 pytest
 pyyaml
 markdown
-pyllamacpp==2.1.1
 gpt4all-j
 pygptj
 gpt4all
@ -16,4 +15,5 @@ transformers
 accelerate
 gevent
 gevent-websocket
-pyaipersonality>=0.0.12
+pyaipersonality>=0.0.12
+ctransformers