diff --git a/backends/fix_yaml.py b/backends/fix_yaml.py new file mode 100644 index 00000000..e68b1f07 --- /dev/null +++ b/backends/fix_yaml.py @@ -0,0 +1,53 @@ +import argparse +import yaml +from urllib.parse import urlparse + +from pathlib import Path + +def process_yaml(input_file): + # Read YAML file + with open(input_file, 'r') as file: + models = yaml.safe_load(file) + + # Process each model entry + for model in models: + server_url = model['server'] + parsed_url = urlparse(server_url) + if not 'owner' in model: + if 'huggingface.co' in parsed_url.netloc: + # Hugging Face URL, extract owner from server URL + model['owner'] = parsed_url.path.split('/')[1] + else: + # Non-Hugging Face URL, use domain name as owner + model['owner'] = parsed_url.netloc + + # Add additional fields + if not 'link' in model: + model['link'] = server_url + if not 'license' in model: + model['license'] = 'Non commercial' + + # Save processed YAML file + output_file = input_file.stem + '_processed.yaml' + with open(output_file, 'w') as file: + yaml.dump(models, file) + + print(f"Processed YAML file saved as {output_file}") + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser(description='Process YAML file') + parser.add_argument('input_file', type=str, help='Input YAML file') + + args = parser.parse_args() + + input_file = Path(args.input_file) + + if not input_file.exists(): + print('Input file does not exist.') + return + + process_yaml(input_file) + +if __name__ == '__main__': + main() diff --git a/backends/gpt_4all/__init__.py b/backends/gpt_4all/__init__.py index 83d208e5..9281ebd5 100644 --- a/backends/gpt_4all/__init__.py +++ b/backends/gpt_4all/__init__.py @@ -37,7 +37,7 @@ class GPT4ALL(GPTBackend): super().__init__(config, False) self.model = GPT4All.get_model_from_name(self.config['model']) self.model.load_model( - model_path=f"./models/gpt_4all/{self.config['model']}", + model_path=f"./models/gpt_4all/{self.config['model']}" ) @@ -88,7 +88,7 @@ class GPT4ALL(GPTBackend): repeat_penalty=self.config['repeat_penalty'], repeat_last_n = self.config['repeat_last_n'], # n_threads=self.config['n_threads'], - streaming=True + streaming=True, ): output += tok if new_text_callback is not None: diff --git a/backends/gpt_4all/models.yaml b/backends/gpt_4all/models.yaml index 18d75008..eabfa2ac 100644 --- a/backends/gpt_4all/models.yaml +++ b/backends/gpt_4all/models.yaml @@ -4,14 +4,20 @@ filename: ggml-gpt4all-j-v1.3-groovy.bin filesize: '3785248281' isDefault: 'true' + license: Apache 2.0 + link: https://gpt4all.io md5sum: 81a09a0ddf89690372fc296ff7f625af + owner: Nomic AI server: https://gpt4all.io/models/ - bestLlama: 'true' description: Current best non-commercially licensable model based on Llama 13b and trained by Nomic AI on the latest curated GPT4All dataset. filename: ggml-gpt4all-l13b-snoozy.bin filesize: '8136770688' + license: Non commercial + link: https://gpt4all.io md5sum: 91f886b68fbce697e9a3cd501951e455 + owner: Nomic AI server: https://gpt4all.io/models/ - bestMPT: 'true' description: Current best non-commercially licensable chat model based on MPT and @@ -19,67 +25,99 @@ filename: ggml-mpt-7b-chat.bin filesize: '4854401050' isDefault: 'true' + license: Non commercial + link: https://gpt4all.io md5sum: 756249d3d6abe23bde3b1ae272628640 + owner: Nomic AI requires: 2.4.1 server: https://gpt4all.io/models/ - description: A commercially licensable model based on GPT-J and trained by Nomic AI on the v2 GPT4All dataset. filename: ggml-gpt4all-j-v1.2-jazzy.bin filesize: '3785248281' + license: Apache 2.0 + link: https://gpt4all.io md5sum: 879344aaa9d62fdccbda0be7a09e7976 + owner: Nomic AI server: https://gpt4all.io/models/ - description: A commercially licensable model based on GPT-J and trained by Nomic AI on the v1 GPT4All dataset. filename: ggml-gpt4all-j-v1.1-breezy.bin filesize: '3785248281' + license: Apache 2.0 + link: https://gpt4all.io md5sum: 61d48a82cb188cceb14ebb8082bfec37 + owner: Nomic AI server: https://gpt4all.io/models/ - description: A commercially licensable model based on GPT-J and trained by Nomic AI on the v0 GPT4All dataset. filename: ggml-gpt4all-j.bin filesize: '3785248281' + license: Apache 2.0 + link: https://gpt4all.io md5sum: 5b5a3f9b858d33b29b52b89692415595 + owner: Nomic AI server: https://gpt4all.io/models/ - description: A non-commercially licensable model based on Llama 7b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego. filename: ggml-vicuna-7b-1.1-q4_2.bin filesize: '4212859520' + license: Non commercial + link: https://gpt4all.io md5sum: 29119f8fa11712704c6b22ac5ab792ea + owner: Nomic AI server: https://gpt4all.io/models/ - description: A non-commercially licensable model based on Llama 13b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego. filename: ggml-vicuna-13b-1.1-q4_2.bin filesize: '8136770688' + license: Non commercial + link: https://gpt4all.io md5sum: 95999b7b0699e2070af63bf5d34101a8 + owner: Nomic AI server: https://gpt4all.io/models/ - description: A non-commercially licensable model based on Llama 7b and trained by Microsoft and Peking University. filename: ggml-wizardLM-7B.q4_2.bin filesize: '4212864640' + license: Non commercial + link: https://gpt4all.io md5sum: 99e6d129745a3f1fb1121abed747b05a + owner: Nomic AI server: https://gpt4all.io/models/ - description: A non-commercially licensable model based on Llama 13b and RLHF trained by Stable AI. filename: ggml-stable-vicuna-13B.q4_2.bin filesize: '8136777088' + license: Non commercial + link: https://gpt4all.io md5sum: 6cb4ee297537c9133bddab9692879de0 + owner: Nomic AI server: https://gpt4all.io/models/ - description: A commercially licensable model base pre-trained by Mosaic ML. filename: ggml-mpt-7b-base.bin filesize: '4854401028' + license: Non commercial + link: https://gpt4all.io md5sum: 120c32a51d020066288df045ef5d52b9 + owner: Nomic AI requires: 2.4.1 server: https://gpt4all.io/models/ - description: A non-commercially licensable model based on Vicuna 13b, fine-tuned on ~180,000 instructions, trained by Nous Research. filename: ggml-nous-gpt4-vicuna-13b.bin filesize: '8136777088' + license: Non commercial + link: https://gpt4all.io/models/ md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe + owner: gpt4all.io server: https://gpt4all.io/models/ - description: A commericially licensable instruct model based on MPT and trained by Mosaic ML. filename: ggml-mpt-7b-instruct.bin filesize: '4854401028' + license: Apache 2.0 + link: https://gpt4all.io md5sum: 1cfa4958f489f0a0d1ffdf6b37322809 - requires: 2.4.1 + owner: Nomic AI server: https://gpt4all.io/models/ diff --git a/backends/gpt_4all/requirements.txt b/backends/gpt_4all/requirements.txt index 461121c3..e97837d1 100644 --- a/backends/gpt_4all/requirements.txt +++ b/backends/gpt_4all/requirements.txt @@ -1 +1 @@ -gpt4all \ No newline at end of file +gpt4all>=0.2.3 \ No newline at end of file diff --git a/backends/gpt_j_a/models.yaml b/backends/gpt_j_a/models.yaml new file mode 100644 index 00000000..6994830e --- /dev/null +++ b/backends/gpt_j_a/models.yaml @@ -0,0 +1,89 @@ +- bestGPTJ: 'true' + owner: Nomic AI + link: https://gpt4all.io + description: Current best commercially licensable model based on GPT-J and trained + by Nomic AI on the latest curated GPT4All dataset. + filename: ggml-gpt4all-j-v1.3-groovy.bin + filesize: '3785248281' + isDefault: 'true' + md5sum: 81a09a0ddf89690372fc296ff7f625af + server: https://gpt4all.io/models/ +- description: A commercially licensable model based on GPT-J and trained by Nomic + AI on the v2 GPT4All dataset. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-gpt4all-j-v1.2-jazzy.bin + filesize: '3785248281' + md5sum: 879344aaa9d62fdccbda0be7a09e7976 + server: https://gpt4all.io/models/ +- description: A commercially licensable model based on GPT-J and trained by Nomic + AI on the v1 GPT4All dataset. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-gpt4all-j-v1.1-breezy.bin + filesize: '3785248281' + md5sum: 61d48a82cb188cceb14ebb8082bfec37 + server: https://gpt4all.io/models/ +- description: A commercially licensable model based on GPT-J and trained by Nomic + AI on the v0 GPT4All dataset. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-gpt4all-j.bin + filesize: '3785248281' + md5sum: 5b5a3f9b858d33b29b52b89692415595 + server: https://gpt4all.io/models/ +- description: A non-commercially licensable model based on Llama 7b and trained by + teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-vicuna-7b-1.1-q4_2.bin + filesize: '4212859520' + md5sum: 29119f8fa11712704c6b22ac5ab792ea + server: https://gpt4all.io/models/ +- description: A non-commercially licensable model based on Llama 13b and trained + by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-vicuna-13b-1.1-q4_2.bin + filesize: '8136770688' + md5sum: 95999b7b0699e2070af63bf5d34101a8 + server: https://gpt4all.io/models/ +- description: A non-commercially licensable model based on Llama 7b and trained by + Microsoft and Peking University. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-wizardLM-7B.q4_2.bin + filesize: '4212864640' + md5sum: 99e6d129745a3f1fb1121abed747b05a + server: https://gpt4all.io/models/ +- description: A non-commercially licensable model based on Llama 13b and RLHF trained + by Stable AI. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-stable-vicuna-13B.q4_2.bin + filesize: '8136777088' + md5sum: 6cb4ee297537c9133bddab9692879de0 + server: https://gpt4all.io/models/ +- description: A commercially licensable model base pre-trained by Mosaic ML. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-mpt-7b-base.bin + filesize: '4854401028' + md5sum: 120c32a51d020066288df045ef5d52b9 + requires: 2.4.1 + server: https://gpt4all.io/models/ +- description: A non-commercially licensable model based on Vicuna 13b, fine-tuned + on ~180,000 instructions, trained by Nous Research. + filename: ggml-nous-gpt4-vicuna-13b.bin + filesize: '8136777088' + md5sum: d5eafd5b0bd0d615cfd5fd763f642dfe + server: https://gpt4all.io/models/ +- description: A commericially licensable instruct model based on MPT and trained + by Mosaic ML. + owner: Nomic AI + link: https://gpt4all.io + filename: ggml-mpt-7b-instruct.bin + filesize: '4854401028' + md5sum: 1cfa4958f489f0a0d1ffdf6b37322809 + requires: 2.4.1 + server: https://gpt4all.io/models/ diff --git a/backends/llama_cpp_official/models.yaml b/backends/llama_cpp_official/models.yaml index a5d2a42c..bd770b7d 100644 --- a/backends/llama_cpp_official/models.yaml +++ b/backends/llama_cpp_official/models.yaml @@ -1,5 +1,7 @@ - bestLlama: 'true' license: Non commercial + owner: TheBloke + link: https://huggingface.co/TheBloke description: The official open assistant 30B model finally here filename: OpenAssistant-SFT-7-Llama-30B.ggml.q4_0.bin sha256: 32fd44c685fbf429810db593e2db8aa42a7e1be2cd3571b6005d53b029acfcf5 diff --git a/gpt4all_api/api.py b/gpt4all_api/api.py index 68ea105a..dd6c85c1 100644 --- a/gpt4all_api/api.py +++ b/gpt4all_api/api.py @@ -183,7 +183,7 @@ class ModelProcess: print(f"Loading model : {model_file}") self.model = self.backend(self.config) self.model_ready.value = 1 - print("Model created successfully\ntesting the model, please wait ...") + print("Model created successfully\n") except Exception as ex: print("Couldn't build model") print(ex)