upgraded code

2024-12-20 21:03:07 +00:00 · 2023-05-19 22:21:13 +02:00 · 2023-05-19 22:21:13 +02:00 · e13b0d0531
commit e13b0d0531
parent 5a05ab79aa
7 changed files with 187 additions and 5 deletions
--- a/backends/fix_yaml.py
+++ b/backends/fix_yaml.py
@ -0,0 +1,53 @@
+import argparse
+import yaml
+from urllib.parse import urlparse
+
+from pathlib import Path
+
+def process_yaml(input_file):
+    # Read YAML file
+    with open(input_file, 'r') as file:
+        models = yaml.safe_load(file)
+
+    # Process each model entry
+    for model in models:
+        server_url = model['server']
+        parsed_url = urlparse(server_url)
+        if not 'owner' in model:
+            if 'huggingface.co' in parsed_url.netloc:
+                # Hugging Face URL, extract owner from server URL
+                model['owner'] = parsed_url.path.split('/')[1]
+            else:
+                # Non-Hugging Face URL, use domain name as owner
+                model['owner'] = parsed_url.netloc
+
+        # Add additional fields
+        if not 'link' in model:
+            model['link'] = server_url
+        if not 'license' in model:
+            model['license'] = 'Non commercial'
+
+    # Save processed YAML file
+    output_file = input_file.stem + '_processed.yaml'
+    with open(output_file, 'w') as file:
+        yaml.dump(models, file)
+
+    print(f"Processed YAML file saved as {output_file}")
+
+def main():
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description='Process YAML file')
+    parser.add_argument('input_file', type=str, help='Input YAML file')
+
+    args = parser.parse_args()
+
+    input_file = Path(args.input_file)
+
+    if not input_file.exists():
+        print('Input file does not exist.')
+        return
+
+    process_yaml(input_file)
+
+if __name__ == '__main__':
+    main()
--- a/backends/gpt_4all/init.py
+++ b/backends/gpt_4all/init.py
@ -37,7 +37,7 @@ class GPT4ALL(GPTBackend):
        super().__init__(config, False)
        self.model = GPT4All.get_model_from_name(self.config['model'])
        self.model.load_model(
-                model_path=f"./models/gpt_4all/{self.config['model']}",
+                model_path=f"./models/gpt_4all/{self.config['model']}"
        )


@ -88,7 +88,7 @@ class GPT4ALL(GPTBackend):
                                            repeat_penalty=self.config['repeat_penalty'],
                                            repeat_last_n = self.config['repeat_last_n'],
                                            # n_threads=self.config['n_threads'],
-                                            streaming=True
+                                            streaming=True,
                                           ):
                output += tok
                if new_text_callback is not None:
--- a/backends/gpt_4all/models.yaml
+++ b/backends/gpt_4all/models.yaml
@ -4,14 +4,20 @@
  filename: ggml-gpt4all-j-v1.3-groovy.bin
  filesize: '3785248281'
  isDefault: 'true'
+  license: Apache 2.0
+  link: https://gpt4all.io
  md5sum: 81a09a0ddf89690372fc296ff7f625af
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - bestLlama: 'true'
  description: Current best non-commercially licensable model based on Llama 13b and
    trained by Nomic AI on the latest curated GPT4All dataset.
  filename: ggml-gpt4all-l13b-snoozy.bin
  filesize: '8136770688'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 91f886b68fbce697e9a3cd501951e455
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - bestMPT: 'true'
  description: Current best non-commercially licensable chat model based on MPT and
@ -19,67 +25,99 @@
  filename: ggml-mpt-7b-chat.bin
  filesize: '4854401050'
  isDefault: 'true'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 756249d3d6abe23bde3b1ae272628640
+  owner: Nomic AI
  requires: 2.4.1
  server: https://gpt4all.io/models/
 - description: A commercially licensable model based on GPT-J and trained by Nomic
    AI on the v2 GPT4All dataset.
  filename: ggml-gpt4all-j-v1.2-jazzy.bin
  filesize: '3785248281'
+  license: Apache 2.0
+  link: https://gpt4all.io
  md5sum: 879344aaa9d62fdccbda0be7a09e7976
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A commercially licensable model based on GPT-J and trained by Nomic
    AI on the v1 GPT4All dataset.
  filename: ggml-gpt4all-j-v1.1-breezy.bin
  filesize: '3785248281'
+  license: Apache 2.0
+  link: https://gpt4all.io
  md5sum: 61d48a82cb188cceb14ebb8082bfec37
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A commercially licensable model based on GPT-J and trained by Nomic
    AI on the v0 GPT4All dataset.
  filename: ggml-gpt4all-j.bin
  filesize: '3785248281'
+  license: Apache 2.0
+  link: https://gpt4all.io
  md5sum: 5b5a3f9b858d33b29b52b89692415595
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A non-commercially licensable model based on Llama 7b and trained by
    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
  filename: ggml-vicuna-7b-1.1-q4_2.bin
  filesize: '4212859520'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 29119f8fa11712704c6b22ac5ab792ea
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A non-commercially licensable model based on Llama 13b and trained
    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
  filename: ggml-vicuna-13b-1.1-q4_2.bin
  filesize: '8136770688'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 95999b7b0699e2070af63bf5d34101a8
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A non-commercially licensable model based on Llama 7b and trained by
    Microsoft and Peking University.
  filename: ggml-wizardLM-7B.q4_2.bin
  filesize: '4212864640'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 99e6d129745a3f1fb1121abed747b05a
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A non-commercially licensable model based on Llama 13b and RLHF trained
    by Stable AI.
  filename: ggml-stable-vicuna-13B.q4_2.bin
  filesize: '8136777088'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 6cb4ee297537c9133bddab9692879de0
+  owner: Nomic AI
  server: https://gpt4all.io/models/
 - description: A commercially licensable model base pre-trained by Mosaic ML.
  filename: ggml-mpt-7b-base.bin
  filesize: '4854401028'
+  license: Non commercial
+  link: https://gpt4all.io
  md5sum: 120c32a51d020066288df045ef5d52b9
+  owner: Nomic AI
  requires: 2.4.1
  server: https://gpt4all.io/models/
 - description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
    on ~180,000 instructions, trained by Nous Research.
  filename: ggml-nous-gpt4-vicuna-13b.bin
  filesize: '8136777088'
+  license: Non commercial
+  link: https://gpt4all.io/models/
  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
+  owner: gpt4all.io
  server: https://gpt4all.io/models/
 - description: A commericially licensable instruct model based on MPT and trained
    by Mosaic ML.
  filename: ggml-mpt-7b-instruct.bin
  filesize: '4854401028'
+  license: Apache 2.0
+  link: https://gpt4all.io
  md5sum: 1cfa4958f489f0a0d1ffdf6b37322809
-  requires: 2.4.1
+  owner: Nomic AI
  server: https://gpt4all.io/models/
--- a/backends/gpt_4all/requirements.txt
+++ b/backends/gpt_4all/requirements.txt
@ -1 +1 @@
-gpt4all
+gpt4all>=0.2.3
--- a/backends/gpt_j_a/models.yaml
+++ b/backends/gpt_j_a/models.yaml
@ -0,0 +1,89 @@
+- bestGPTJ: 'true'
+  owner: Nomic AI
+  link: https://gpt4all.io
+  description: Current best commercially licensable model based on GPT-J and trained
+    by Nomic AI on the latest curated GPT4All dataset.
+  filename: ggml-gpt4all-j-v1.3-groovy.bin
+  filesize: '3785248281'
+  isDefault: 'true'
+  md5sum: 81a09a0ddf89690372fc296ff7f625af
+  server: https://gpt4all.io/models/
+- description: A commercially licensable model based on GPT-J and trained by Nomic
+    AI on the v2 GPT4All dataset.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-gpt4all-j-v1.2-jazzy.bin
+  filesize: '3785248281'
+  md5sum: 879344aaa9d62fdccbda0be7a09e7976
+  server: https://gpt4all.io/models/
+- description: A commercially licensable model based on GPT-J and trained by Nomic
+    AI on the v1 GPT4All dataset.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-gpt4all-j-v1.1-breezy.bin
+  filesize: '3785248281'
+  md5sum: 61d48a82cb188cceb14ebb8082bfec37
+  server: https://gpt4all.io/models/
+- description: A commercially licensable model based on GPT-J and trained by Nomic
+    AI on the v0 GPT4All dataset.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-gpt4all-j.bin
+  filesize: '3785248281'
+  md5sum: 5b5a3f9b858d33b29b52b89692415595
+  server: https://gpt4all.io/models/
+- description: A non-commercially licensable model based on Llama 7b and trained by
+    teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-vicuna-7b-1.1-q4_2.bin
+  filesize: '4212859520'
+  md5sum: 29119f8fa11712704c6b22ac5ab792ea
+  server: https://gpt4all.io/models/
+- description: A non-commercially licensable model based on Llama 13b and trained
+    by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-vicuna-13b-1.1-q4_2.bin
+  filesize: '8136770688'
+  md5sum: 95999b7b0699e2070af63bf5d34101a8
+  server: https://gpt4all.io/models/
+- description: A non-commercially licensable model based on Llama 7b and trained by
+    Microsoft and Peking University.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-wizardLM-7B.q4_2.bin
+  filesize: '4212864640'
+  md5sum: 99e6d129745a3f1fb1121abed747b05a
+  server: https://gpt4all.io/models/
+- description: A non-commercially licensable model based on Llama 13b and RLHF trained
+    by Stable AI.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-stable-vicuna-13B.q4_2.bin
+  filesize: '8136777088'
+  md5sum: 6cb4ee297537c9133bddab9692879de0
+  server: https://gpt4all.io/models/
+- description: A commercially licensable model base pre-trained by Mosaic ML.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-mpt-7b-base.bin
+  filesize: '4854401028'
+  md5sum: 120c32a51d020066288df045ef5d52b9
+  requires: 2.4.1
+  server: https://gpt4all.io/models/
+- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned
+    on ~180,000 instructions, trained by Nous Research.
+  filename: ggml-nous-gpt4-vicuna-13b.bin
+  filesize: '8136777088'
+  md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe
+  server: https://gpt4all.io/models/
+- description: A commericially licensable instruct model based on MPT and trained
+    by Mosaic ML.
+  owner: Nomic AI
+  link: https://gpt4all.io
+  filename: ggml-mpt-7b-instruct.bin
+  filesize: '4854401028'
+  md5sum: 1cfa4958f489f0a0d1ffdf6b37322809
+  requires: 2.4.1
+  server: https://gpt4all.io/models/
--- a/backends/llama_cpp_official/models.yaml
+++ b/backends/llama_cpp_official/models.yaml
@ -1,5 +1,7 @@
 - bestLlama: 'true'
  license: Non commercial
+  owner: TheBloke
+  link: https://huggingface.co/TheBloke
  description: The official open assistant 30B model finally here
  filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin
  sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5
--- a/gpt4all_api/api.py
+++ b/gpt4all_api/api.py
@ -183,7 +183,7 @@ class ModelProcess:
                print(f"Loading model : {model_file}")
                self.model = self.backend(self.config)
                self.model_ready.value = 1
-                print("Model created successfully\ntesting the model, please wait ...")
+                print("Model created successfully\n")
            except Exception as ex:
                print("Couldn't build model")
                print(ex)