From 7102a3f8c2a0442986bd2664277c02b772fa625e Mon Sep 17 00:00:00 2001
From: saloui <saifeddine.aloui@cea.fr>
Date: Wed, 17 May 2023 17:38:40 +0200
Subject: [PATCH] Upgraded models

---
 backends/c_transformers/__init__.py           | 106 ++++++++++++++++++
 backends/c_transformers/models.yaml           |   7 ++
 backends/gpt_4all/__init__.py                 |   5 +
 backends/gpt_j_a/__init__.py                  |   4 +
 backends/gpt_j_m/__init__.py                  |   4 +
 backends/hugging_face/models.yaml             |  72 ------------
 backends/llama_cpp/models.yaml                |  47 --------
 .../__init__.py                               |  66 +++++------
 backends/llama_cpp_official/models.yaml       |  29 +++++
 backends/llama_cpp_official/requirements.txt  |   1 +
 .../{llama_cpp => py_llama_cpp}/__init__.py   |  10 +-
 backends/py_llama_cpp/models.yaml             |  11 ++
 backends/py_llama_cpp/requirements.txt        |   1 +
 configs/default.yaml                          |   2 +-
 gpt4all_api/api.py                            |  60 +++++++++-
 models/llama_cpp/.keep                        |   0
 .../.keep                                     |   0
 requirements.txt                              |   4 +-
 18 files changed, 271 insertions(+), 158 deletions(-)
 create mode 100644 backends/c_transformers/__init__.py
 create mode 100644 backends/c_transformers/models.yaml
 delete mode 100644 backends/hugging_face/models.yaml
 delete mode 100644 backends/llama_cpp/models.yaml
 rename backends/{hugging_face => llama_cpp_official}/__init__.py (56%)
 create mode 100644 backends/llama_cpp_official/models.yaml
 create mode 100644 backends/llama_cpp_official/requirements.txt
 rename backends/{llama_cpp => py_llama_cpp}/__init__.py (90%)
 create mode 100644 backends/py_llama_cpp/models.yaml
 create mode 100644 backends/py_llama_cpp/requirements.txt
 delete mode 100644 models/llama_cpp/.keep
 rename models/{hugging_face => llama_cpp_official}/.keep (100%)

diff --git a/backends/c_transformers/__init__.py b/backends/c_transformers/__init__.py
new file mode 100644
index 00000000..ef1f989c
--- /dev/null
+++ b/backends/c_transformers/__init__.py
@@ -0,0 +1,106 @@
+######
+# Project       : GPT4ALL-UI
+# File          : backend.py
+# Author        : ParisNeo with the help of the community
+# Underlying backend : Abdeladim's pygptj backend
+# Supported by Nomic-AI
+# Licence       : Apache 2.0
+# Description   : 
+# This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to marella's backend
+# Follow him on his github project : https://github.com/marella/ctransformers
+
+######
+from pathlib import Path
+from typing import Callable
+from gpt4all_api.backend import GPTBackend
+import yaml
+from ctransformers import AutoModelForCausalLM
+
+__author__ = "parisneo"
+__github__ = "https://github.com/nomic-ai/gpt4all-ui"
+__copyright__ = "Copyright 2023, "
+__license__ = "Apache 2.0"
+
+backend_name = "GPTJ"
+
+class GPTJ(GPTBackend):
+    file_extension='*.bin'
+    def __init__(self, config:dict) -> None:
+        """Builds a LLAMACPP backend
+
+        Args:
+            config (dict): The configuration file
+        """
+        super().__init__(config, False)
+        if 'gpt2' in self.config['model']:
+            model_type='gpt2'
+        elif 'gptj' in self.config['model']:
+            model_type='gptj'
+        elif 'gpt_neox' in self.config['model']:
+            model_type='gpt_neox'
+        elif 'dolly-v2' in self.config['model']:
+            model_type='dolly-v2'
+        elif 'starcoder' in self.config['model']:
+            model_type='starcoder'
+        else:
+            print("The model you are using is not supported by this backend")
+            return
+        
+        
+        if self.config["use_avx2"]:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                    f"./models/c_transformers/{self.config['model']}", model_type=model_type
+                    )
+        else:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                    f"./models/c_transformers/{self.config['model']}", model_type=model_type, lib = "avx"
+                    )
+
+    def generate(self, 
+                 prompt:str,                  
+                 n_predict: int = 128,
+                 new_text_callback: Callable[[str], None] = bool,
+                 verbose: bool = False,
+                 **gpt_params ):
+        """Generates text out of a prompt
+
+        Args:
+            prompt (str): The prompt to use for generation
+            n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
+            new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
+            verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
+        """
+        try:
+            self.model.reset()
+            tokens = self.model.tokenize(prompt.encode())
+            for tok in self.model.generate(
+                                            tokens, 
+                                            seed=self.config['seed'],
+                                            n_threads=self.config['n_threads'],
+                                            n_predict=n_predict,
+                                            top_k=self.config['top_k'],
+                                            top_p=self.config['top_p'],
+                                            temp=self.config['temperature'],
+                                            repeat_penalty=self.config['repeat_penalty'],
+                                            repeat_last_n=self.config['repeat_last_n'],
+                                            n_batch=8,
+                                            reset=True,
+                                           ):
+                if not new_text_callback(self.model.detokenize(tok)):
+                    return
+        except Exception as ex:
+            print(ex)
+            
+            
+    @staticmethod
+    def get_available_models():
+        # Create the file path relative to the child class's directory
+        backend_path = Path(__file__).parent
+        file_path = backend_path/"models.yaml"
+
+        with open(file_path, 'r') as file:
+            yaml_data = yaml.safe_load(file)
+        
+        return yaml_data
\ No newline at end of file
diff --git a/backends/c_transformers/models.yaml b/backends/c_transformers/models.yaml
new file mode 100644
index 00000000..dc7468c2
--- /dev/null
+++ b/backends/c_transformers/models.yaml
@@ -0,0 +1,7 @@
+- bestGPTJ: 'true'
+  description: Current best commercially licensable model based on GPT-J and trained
+    by Nomic AI on the latest curated GPT4All dataset.
+  filename: ggml-gpt4all-j-v1.3-groovy.bin
+  filesize: '3785248281'
+  isDefault: 'true'
+  md5sum: 81a09a0ddf89690372fc296ff7f625af
diff --git a/backends/gpt_4all/__init__.py b/backends/gpt_4all/__init__.py
index 37eafcff..34aa5421 100644
--- a/backends/gpt_4all/__init__.py
+++ b/backends/gpt_4all/__init__.py
@@ -6,6 +6,11 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+
+# This backend is a wrapper to gpt4all's official backend
+# Follow him on his github project : https://github.com/nomic-ai/gpt4all
+
 ######
 from pathlib import Path
 from typing import Callable
diff --git a/backends/gpt_j_a/__init__.py b/backends/gpt_j_a/__init__.py
index 4eac6ef1..4129d7b9 100644
--- a/backends/gpt_j_a/__init__.py
+++ b/backends/gpt_j_a/__init__.py
@@ -7,6 +7,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to abdeladim's backend
+# Follow him on his github project : https://github.com/abdeladim-s/pygptj
+
 ######
 from pathlib import Path
 from typing import Callable
diff --git a/backends/gpt_j_m/__init__.py b/backends/gpt_j_m/__init__.py
index 366cc1de..4cc43eb0 100644
--- a/backends/gpt_j_m/__init__.py
+++ b/backends/gpt_j_m/__init__.py
@@ -7,6 +7,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to marella's backend
+# Follow him on his github project : https://github.com/marella/gpt4all-j 
+
 ######
 from pathlib import Path
 from typing import Callable
diff --git a/backends/hugging_face/models.yaml b/backends/hugging_face/models.yaml
deleted file mode 100644
index 76b88d88..00000000
--- a/backends/hugging_face/models.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-- bestGPTJ: 'true'
-  description: Current best commercially licensable model based on GPT-J and trained
-    by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.3-groovy.bin
-  filesize: '3785248281'
-  isDefault: 'true'
-  md5sum: 81a09a0ddf89690372fc296ff7f625af
-- bestLlama: 'true'
-  description: Current best non-commercially licensable model based on Llama 13b and
-    trained by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-l13b-snoozy.bin
-  filesize: '8136770688'
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-- bestMPT: 'true'
-  description: Current best non-commercially licensable chat model based on MPT and
-    trained by Mosaic ML.
-  filename: ggml-mpt-7b-chat.bin
-  filesize: '4854401050'
-  isDefault: 'true'
-  md5sum: 756249d3d6abe23bde3b1ae272628640
-  requires: 2.4.1
-- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v2 GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.2-jazzy.bin
-  filesize: '3785248281'
-  md5sum: 879344aaa9d62fdccbda0be7a09e7976
-- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v1 GPT4All dataset.
-  filename: ggml-gpt4all-j-v1.1-breezy.bin
-  filesize: '3785248281'
-  md5sum: 61d48a82cb188cceb14ebb8082bfec37
-- description: A commercially licensable model based on GPT-J and trained by Nomic
-    AI on the v0 GPT4All dataset.
-  filename: ggml-gpt4all-j.bin
-  filesize: '3785248281'
-  md5sum: 5b5a3f9b858d33b29b52b89692415595
-- description: A non-commercially licensable model based on Llama 7b and trained by
-    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-7b-1.1-q4_2.bin
-  filesize: '4212859520'
-  md5sum: 29119f8fa11712704c6b22ac5ab792ea
-- description: A non-commercially licensable model based on Llama 13b and trained
-    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-13b-1.1-q4_2.bin
-  filesize: '8136770688'
-  md5sum: 95999b7b0699e2070af63bf5d34101a8
-- description: A non-commercially licensable model based on Llama 7b and trained by
-    Microsoft and Peking University.
-  filename: ggml-wizardLM-7B.q4_2.bin
-  filesize: '4212864640'
-  md5sum: 99e6d129745a3f1fb1121abed747b05a
-- description: A non-commercially licensable model based on Llama 13b and RLHF trained
-    by Stable AI.
-  filename: ggml-stable-vicuna-13B.q4_2.bin
-  filesize: '8136777088'
-  md5sum: 6cb4ee297537c9133bddab9692879de0
-- description: A commercially licensable model base pre-trained by Mosaic ML.
-  filename: ggml-mpt-7b-base.bin
-  filesize: '4854401028'
-  md5sum: 120c32a51d020066288df045ef5d52b9
-  requires: 2.4.1
-- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
-    on ~180,000 instructions, trained by Nous Research.
-  filename: ggml-nous-gpt4-vicuna-13b.bin
-  filesize: '8136777088'
-  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
-- description: A commericially licensable instruct model based on MPT and trained
-    by Mosaic ML.
-  filename: ggml-mpt-7b-instruct.bin
-  filesize: '4854401028'
-  md5sum: 1cfa4958f489f0a0d1ffdf6b37322809
-  requires: 2.4.1
diff --git a/backends/llama_cpp/models.yaml b/backends/llama_cpp/models.yaml
deleted file mode 100644
index 475cbcbb..00000000
--- a/backends/llama_cpp/models.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-- bestLlama: 'false'
-  description: The model who started it all
-  filename: gpt4all-lora-quantized-ggml.new.bin
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://huggingface.co/ParisNeo/GPT4All/resolve/main/
-- bestLlama: 'false'
-  description: The model who started it all (uncensored version)
-  filename: gpt4all-lora-unfiltered-quantized.new.bin
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://huggingface.co/ParisNeo/GPT4All/resolve/main/
-- bestLlama: 'true'
-  description: Current best non-commercially licensable model based on Llama 13b and
-    trained by Nomic AI on the latest curated GPT4All dataset.
-  filename: ggml-gpt4all-l13b-snoozy.bin
-  filesize: '8136770688'
-  md5sum: 91f886b68fbce697e9a3cd501951e455
-  server: https://gpt4all.io/models/
-- description: A non-commercially licensable model based on Llama 7b and trained by
-    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-7b-1.1-q4_2.bin
-  filesize: '4212859520'
-  md5sum: 29119f8fa11712704c6b22ac5ab792ea
-  server: https://gpt4all.io/models/
-- description: A non-commercially licensable model based on Llama 13b and trained
-    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-  filename: ggml-vicuna-13b-1.1-q4_2.bin
-  filesize: '8136770688'
-  md5sum: 95999b7b0699e2070af63bf5d34101a8
-  server: https://gpt4all.io/models/
-- description: A non-commercially licensable model based on Llama 7b and trained by
-    Microsoft and Peking University.
-  filename: ggml-wizardLM-7B.q4_2.bin
-  filesize: '4212864640'
-  md5sum: 99e6d129745a3f1fb1121abed747b05a
-  server: https://gpt4all.io/models/
-- description: A non-commercially licensable model based on Llama 13b and RLHF trained
-    by Stable AI.
-  filename: ggml-stable-vicuna-13B.q4_2.bin
-  filesize: '8136777088'
-  md5sum: 6cb4ee297537c9133bddab9692879de0
-  server: https://gpt4all.io/models/
-- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
-    on ~180,000 instructions, trained by Nous Research.
-  filename: ggml-nous-gpt4-vicuna-13b.bin
-  filesize: '8136777088'
-  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
-  server: https://gpt4all.io/models/
diff --git a/backends/hugging_face/__init__.py b/backends/llama_cpp_official/__init__.py
similarity index 56%
rename from backends/hugging_face/__init__.py
rename to backends/llama_cpp_official/__init__.py
index e9630fc7..2584c5fc 100644
--- a/backends/hugging_face/__init__.py
+++ b/backends/llama_cpp_official/__init__.py
@@ -6,38 +6,39 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to the official llamacpp python bindings
+# Follow him on his github project : https://github.com/abetlen/llama-cpp-python
+
 ######
 from pathlib import Path
 from typing import Callable
-from accelerate import init_empty_weights
-from accelerate import load_checkpoint_and_dispatch
-from transformers import AutoTokenizer
-from transformers import AutoConfig, AutoModelForCausalLM
+from llama_cpp import Llama
 from gpt4all_api.backend import GPTBackend
-import torch
+import yaml
+import random
 
 __author__ = "parisneo"
-__github__ = "https://github.com/ParisNeo/GPTQ_backend"
+__github__ = "https://github.com/nomic-ai/gpt4all-ui"
 __copyright__ = "Copyright 2023, "
 __license__ = "Apache 2.0"
 
-backend_name = "HuggingFace"
+backend_name = "LLAMACPP"
 
-class HuggingFace(GPTBackend):
-    file_extension='*'
+class LLAMACPP(GPTBackend):
+    file_extension='*.bin'
     def __init__(self, config:dict) -> None:
-        """Builds a HuggingFace backend
+        """Builds a LLAMACPP backend
 
         Args:
             config (dict): The configuration file
         """
         super().__init__(config, False)
-
-
-        # load quantized model, currently only support cpu or single gpu
-        config_path = AutoConfig.from_pretrained(config["model"])
-        self.tokenizer = AutoTokenizer.from_pretrained(config["model"])
-        self.model = AutoModelForCausalLM.from_pretrained(config["model"], load_in_8bit=True, device_map='auto')
+        seed = config["seed"]
+        if seed <=0:
+            seed = random.randint(1, 2**31)
+            
+        self.model = Llama(model_path=f"./models/llama_cpp_official/{self.config['model']}", n_gpu_layers=40, seed=seed)
 
     def generate(self, 
                  prompt:str,                  
@@ -54,30 +55,31 @@ class HuggingFace(GPTBackend):
             verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
         """
         try:
-            tok = self.tokenizer.decode(self.model.generate(**self.tokenizer(prompt, return_tensors="pt").to("cuda:0"))[0])
-            new_text_callback(tok)
-            """
             self.model.reset()
-            for tok in self.model.generate(prompt, 
-                                           n_predict=n_predict,                                           
-                                            temp=self.config['temp'],
+            tokens = self.model.tokenize(prompt.encode())
+            count = 0
+            for tok in self.model.generate(tokens, 
+                                            temp=self.config['temperature'],
                                             top_k=self.config['top_k'],
                                             top_p=self.config['top_p'],
                                             repeat_penalty=self.config['repeat_penalty'],
-                                            repeat_last_n = self.config['repeat_last_n'],
-                                            n_threads=self.config['n_threads'],
                                            ):
-                if not new_text_callback(tok):
+                if count >= n_predict or (tok == self.model.token_eos()):
+                    break
+                word = self.model.detokenize([tok]).decode()
+                if not new_text_callback(word):
                     return
-            """
+                count += 1
         except Exception as ex:
             print(ex)
             
     @staticmethod
-    def list_models(config:dict):
-        """Lists the models for this backend
-        """
+    def get_available_models():
+        # Create the file path relative to the child class's directory
+        backend_path = Path(__file__).parent
+        file_path = backend_path/"models.yaml"
+
+        with open(file_path, 'r') as file:
+            yaml_data = yaml.safe_load(file)
         
-        return [
-            "EleutherAI/gpt-j-6B"
-        ]
+        return yaml_data
\ No newline at end of file
diff --git a/backends/llama_cpp_official/models.yaml b/backends/llama_cpp_official/models.yaml
new file mode 100644
index 00000000..a5d2a42c
--- /dev/null
+++ b/backends/llama_cpp_official/models.yaml
@@ -0,0 +1,29 @@
+- bestLlama: 'true'
+  license: Non commercial
+  description: The official open assistant 30B model finally here
+  filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin
+  sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5
+  server: https://huggingface.co/TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardVicuna model 13B
+  filename: wizard-vicuna-13B.ggml.q4_0.bin
+  sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5
+  server: https://huggingface.co/TheBloke/wizard-vicuna-13B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardLM model 7B
+  filename: WizardLM-7B-uncensored.ggml.q4_0.bin
+  sha256: b1e53a3c3a9389b9c5d81e0813cfb90ebaff6acad1733fad08cd28974fa3ac30
+  server: https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/
+
+- bestLlama: 'true'
+  license: Non commercial
+  description: The wizardLM model uncensored
+  filename: WizardLM-7B-uncensored.ggml.q4_0.bin
+  md5sum: b1e53a3c3a9389b9c5d81e0813cfb90ebaff6acad1733fad08cd28974fa3ac30
+  server: https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/
+
+
diff --git a/backends/llama_cpp_official/requirements.txt b/backends/llama_cpp_official/requirements.txt
new file mode 100644
index 00000000..f442f3f6
--- /dev/null
+++ b/backends/llama_cpp_official/requirements.txt
@@ -0,0 +1 @@
+llama-cpp-python
\ No newline at end of file
diff --git a/backends/llama_cpp/__init__.py b/backends/py_llama_cpp/__init__.py
similarity index 90%
rename from backends/llama_cpp/__init__.py
rename to backends/py_llama_cpp/__init__.py
index 769f307f..37b89f1b 100644
--- a/backends/llama_cpp/__init__.py
+++ b/backends/py_llama_cpp/__init__.py
@@ -6,6 +6,10 @@
 # Licence       : Apache 2.0
 # Description   : 
 # This is an interface class for GPT4All-ui backends.
+
+# This backend is a wrapper to abdeladim's backend
+# Follow him on his github project : https://github.com/abdeladim-s/pyllamacpp
+
 ######
 from pathlib import Path
 from typing import Callable
@@ -18,9 +22,9 @@ __github__ = "https://github.com/nomic-ai/gpt4all-ui"
 __copyright__ = "Copyright 2023, "
 __license__ = "Apache 2.0"
 
-backend_name = "LLAMACPP"
+backend_name = "PyLLAMACPP"
 
-class LLAMACPP(GPTBackend):
+class PyLLAMACPP(GPTBackend):
     file_extension='*.bin'
     def __init__(self, config:dict) -> None:
         """Builds a LLAMACPP backend
@@ -31,7 +35,7 @@ class LLAMACPP(GPTBackend):
         super().__init__(config, False)
         
         self.model = Model(
-                model_path=f"./models/llama_cpp/{self.config['model']}",
+                model_path=f"./models/py_llama_cpp/{self.config['model']}",
                 prompt_context="", prompt_prefix="", prompt_suffix="",
                 n_ctx=self.config['ctx_size'], 
                 seed=self.config['seed'],
diff --git a/backends/py_llama_cpp/models.yaml b/backends/py_llama_cpp/models.yaml
new file mode 100644
index 00000000..c8ed6a2b
--- /dev/null
+++ b/backends/py_llama_cpp/models.yaml
@@ -0,0 +1,11 @@
+- bestLlama: 'true'
+  description: The official open assistant 30B model finally here
+  filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin
+  md5sum: 91f886b68fbce697e9a3cd501951e455
+  server: https://huggingface.co/TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML/resolve/main/
+
+- bestLlama: 'true'
+  description: Stable vicuna 13B
+  filename: stable-vicuna-13B.ggml.q5_1.bin
+  md5sum: 91f886b68fbce697e9a3cd501951e455
+  server: https://huggingface.co/TheBloke/stable-vicuna-13B-GGML/resolve/main/
diff --git a/backends/py_llama_cpp/requirements.txt b/backends/py_llama_cpp/requirements.txt
new file mode 100644
index 00000000..aa3c1719
--- /dev/null
+++ b/backends/py_llama_cpp/requirements.txt
@@ -0,0 +1 @@
+pyllamacpp
\ No newline at end of file
diff --git a/configs/default.yaml b/configs/default.yaml
index acafb587..eb656a3f 100644
--- a/configs/default.yaml
+++ b/configs/default.yaml
@@ -7,7 +7,7 @@ n_threads: 8
 host: localhost
 language: en-US
 # Supported backends are llamacpp and gpt-j
-backend: llama_cpp
+backend: llama_cpp_official
 model: null
 n_predict: 1024
 nb_messages_to_remember: 5
diff --git a/gpt4all_api/api.py b/gpt4all_api/api.py
index 8f2dc817..15d178e3 100644
--- a/gpt4all_api/api.py
+++ b/gpt4all_api/api.py
@@ -28,6 +28,58 @@ __license__ = "Apache 2.0"
 
 
 
+import subprocess
+import pkg_resources
+
+
+# ===========================================================
+# Manage automatic install scripts
+
+def is_package_installed(package_name):
+    try:
+        dist = pkg_resources.get_distribution(package_name)
+        return True
+    except pkg_resources.DistributionNotFound:
+        return False
+
+
+def install_package(package_name):
+    try:
+        # Check if the package is already installed
+        __import__(package_name)
+        print(f"{package_name} is already installed.")
+    except ImportError:
+        print(f"{package_name} is not installed. Installing...")
+        
+        # Install the package using pip
+        subprocess.check_call(["pip", "install", package_name])
+        
+        print(f"{package_name} has been successfully installed.")
+
+
+def parse_requirements_file(requirements_path):
+    with open(requirements_path, 'r') as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith('#'):
+                # Skip empty and commented lines
+                continue
+            package_name, _, version_specifier = line.partition('==')
+            package_name, _, version_specifier = line.partition('>=')
+            if is_package_installed(package_name):
+                # The package is already installed
+                print(f"{package_name} is already installed.")
+            else:
+                # The package is not installed, install it
+                if version_specifier:
+                    install_package(f"{package_name}{version_specifier}")
+                else:
+                    install_package(package_name)
+
+
+# ===========================================================
+
+
 class ModelProcess:
     def __init__(self, config=None):
         self.config = config
@@ -42,7 +94,11 @@ class ModelProcess:
         self.model_ready  = mp.Value('i', 0)
         self.ready = False
             
-    def load_backend(self, backend_path):
+    def load_backend(self, backend_path:Path):
+        # first find out if there is a requirements.txt file
+        requirements_file = backend_path/"requirements.txt"
+        if requirements_file.exists():
+            parse_requirements_file(requirements_file)        
 
         # define the full absolute path to the module
         absolute_path = backend_path.resolve()
@@ -88,6 +144,7 @@ class ModelProcess:
     
     def rebuild_backend(self, config):
         try:
+            
             backend = self.load_backend(Path("backends")/config["backend"])
             print("Backend loaded successfully")
         except Exception as ex:
@@ -239,6 +296,7 @@ class ModelProcess:
         while not self.set_config_queue.empty():
             config = self.set_config_queue.get()
             if config is not None:
+                print("Inference process : Setting configuration")
                 self._set_config(config)
 
     def _cancel_generation(self):
diff --git a/models/llama_cpp/.keep b/models/llama_cpp/.keep
deleted file mode 100644
index e69de29b..00000000
diff --git a/models/hugging_face/.keep b/models/llama_cpp_official/.keep
similarity index 100%
rename from models/hugging_face/.keep
rename to models/llama_cpp_official/.keep
diff --git a/requirements.txt b/requirements.txt
index 2599052d..666c7727 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,6 @@ nomic
 pytest
 pyyaml
 markdown
-pyllamacpp==2.1.1
 gpt4all-j
 pygptj
 gpt4all
@@ -16,4 +15,5 @@ transformers
 accelerate
 gevent
 gevent-websocket
-pyaipersonality>=0.0.12
\ No newline at end of file
+pyaipersonality>=0.0.12
+ctransformers
\ No newline at end of file