moved to PyAIPersonality file format

This commit is contained in:
ParisNeo 2023-04-30 22:40:19 +02:00
parent 5c5daf084d
commit b317c3e2aa
14 changed files with 1193 additions and 355 deletions

2
.gitignore vendored
View File

@ -149,7 +149,7 @@ configs/*
# personalities other than the default one
personalities/*
!personalities/english/general/gpt4all_chatbot.yaml
!personalities/english/general/gpt4all_chatbot/config.yaml
# personalities other than the default one
databases/*

View File

@ -103,7 +103,7 @@ You can also refuse to download the model during the install procedure and downl
## LLama_cpp models
- [GPT4ALL 7B](https://huggingface.co/ParisNeo/GPT4All/resolve/main/gpt4all-lora-quantized-ggml.bin) or visit [repository](https://huggingface.co/ParisNeo/GPT4All)
- [GPT4ALL 7B unfiltered](https://huggingface.co/ParisNeo/GPT4All/blob/main/gpt4all-lora-unfiltered-quantized.new.bin) or visit [repository](https://huggingface.co/ParisNeo/GPT4All)
- [GPT4ALL 7B unfiltered](https://huggingface.co/ParisNeo/GPT4All/resolve/main/gpt4all-lora-unfiltered-quantized.new.bin) or visit [repository](https://huggingface.co/ParisNeo/GPT4All)
- [Vicuna 7B rev 1](https://huggingface.co/eachadea/legacy-ggml-vicuna-7b-4bit/resolve/main/ggml-vicuna-7b-4bit-rev1.bin) or visit [repository](https://huggingface.co/eachadea/legacy-ggml-vicuna-7b-4bit)
- [Vicuna 13B rev 1](https://huggingface.co/eachadea/ggml-vicuna-13b-4bit/resolve/main/ggml-vicuna-13b-4bit-rev1.bin) or visit [repository](https://huggingface.co/eachadea/ggml-vicuna-13b-4bit)

20
app.py
View File

@ -22,6 +22,7 @@ import re
import traceback
import threading
import sys
from pyaipersonality import AIPersonality
from pyGpt4All.db import DiscussionsDB, Discussion
from flask import (
Flask,
@ -222,7 +223,7 @@ class Gpt4AllWebUI(GPT4AllAPI):
def list_personalities(self):
personalities_dir = Path(f'./personalities/{self.config["personality_language"]}/{self.config["personality_category"]}') # replace with the actual path to the models folder
personalities = [f.stem for f in personalities_dir.glob('*.yaml')]
personalities = [f.stem for f in personalities_dir.iterdir() if f.is_dir()]
return jsonify(personalities)
def list_languages(self):
@ -297,13 +298,13 @@ class Gpt4AllWebUI(GPT4AllAPI):
if self.current_discussion:
# First we need to send the new message ID to the client
response_id = self.current_discussion.add_message(
self.personality["name"], "", parent = message_id
self.personality.name, "", parent = message_id
) # first the content is empty, but we'll fill it at the end
socketio.emit('infos',
{
"type": "input_message_infos",
"bot": self.personality["name"],
"user": self.personality["user_name"],
"bot": self.personality.name,
"user": self.personality.user_name,
"message":message,#markdown.markdown(message),
"id": message_id,
"response_id": response_id,
@ -407,7 +408,7 @@ class Gpt4AllWebUI(GPT4AllAPI):
# target=self.create_chatbot()
# Return a success response
return json.dumps({"id": self.current_discussion.discussion_id, "time": timestamp, "welcome_message":self.personality["welcome_message"], "sender":self.personality["name"]})
return json.dumps({"id": self.current_discussion.discussion_id, "time": timestamp, "welcome_message":self.personality.welcome_message, "sender":self.personality.name})
def set_backend(self):
data = request.get_json()
@ -460,9 +461,9 @@ class Gpt4AllWebUI(GPT4AllAPI):
self.config['personality_category'] = personality_category
self.config['personality'] = personality
personality_fn = f"personalities/{self.config['personality_language']}/{self.config['personality_category']}/{self.config['personality']}.yaml"
personality_fn = f"personalities/{self.config['personality_language']}/{self.config['personality_category']}/{self.config['personality']}"
print(f"Loading personality : {personality_fn}")
self.personality = load_config(personality_fn)
self.personality = AIPersonality(personality_fn)
self.config['n_predict'] = int(data["nPredict"])
self.config['seed'] = int(data["seed"])
@ -603,7 +604,7 @@ if __name__ == "__main__":
if arg_value is not None:
config[arg_name] = arg_value
personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml")
personality = AIPersonality(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}")
# executor = ThreadPoolExecutor(max_workers=1)
# app.config['executor'] = executor
@ -621,6 +622,9 @@ if __name__ == "__main__":
http_server = WSGIServer((config["host"], config["port"]), app, handler_class=CustomWebSocketHandler)
http_server = WSGIServer((config["host"], config["port"]), app, handler_class=WebSocketHandler)
url = f'http://{config["host"]}:{config["port"]}'
print(f"Please open your browser and go to {url} to view the ui")
if config["debug"]:
socketio.run(app,debug=True, host=config["host"], port=config["port"])
else:

View File

@ -1,79 +0,0 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
from gpt4allj import Model
from pyGpt4All.backend import GPTBackend
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
backend_name = "GPT_J"
class GPT_J(GPTBackend):
file_extension='*'
def __init__(self, config:dict) -> None:
"""Builds a GPT-J backend
Args:
config (dict): The configuration file
"""
super().__init__(config, True)
self.config = config
if "use_avx2" in self.config and not self.config["use_avx2"]:
self.model = Model(
model=f"./models/gpt_j/{self.config['model']}", instructions='avx'
)
else:
self.model = Model(
model=f"./models/gpt_j/{self.config['model']}"
)
def get_num_tokens(self, prompt):
return self.model.num_tokens(prompt)
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
num_tokens = self.get_num_tokens(prompt)
print(f"Prompt has {num_tokens} tokens")
try:
self.model.generate(
prompt,
callback=new_text_callback,
n_predict=num_tokens + n_predict,
seed=self.config['seed'] if self.config['seed']>0 else -1,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
# repeat_penalty=self.config['repeat_penalty'],
# repeat_last_n = self.config['repeat_last_n'],
n_threads=self.config['n_threads'],
#verbose=verbose
)
except Exception as ex:
print(ex)
#new_text_callback()

View File

@ -1,81 +0,0 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from pyGpt4All.backend import GPTBackend
from transformers import AutoTokenizer, pipeline
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
from auto_gptq.eval_tasks import LanguageModelingTask
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
backend_name = "GPT-Q"
class GPT_Q(GPTBackend):
file_extension='*'
def __init__(self, config:dict) -> None:
"""Builds a GPT-J backend
Args:
config (dict): The configuration file
"""
super().__init__(config, True)
self.config = config
# path = Path("models/hugging_face")/self.config['model']
path = "TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g"
AutoGPTQForCausalLM.from_pretrained(path, BaseQuantizeConfig())
self.model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.generator = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=0, # Use GPU if available
)
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
inputs = self.tokenizer(prompt, return_tensors="pt").input_ids
while len(inputs<n_predict):
outputs = self.model.generate(
inputs,
max_new_tokens=1,
#new_text_callback=new_text_callback,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repeat_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
n_threads=self.config['n_threads'],
verbose=verbose
)
inputs += outputs
new_text_callback(self.tokenizer.batch_decode(outputs, skip_special_tokens=True))

View File

@ -1,82 +0,0 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from pyGpt4All.backend import GPTBackend
import torch
import time
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
backend_name = "HuggingFace"
class HuggingFace(GPTBackend):
file_extension='*'
def __init__(self, config:dict) -> None:
"""Builds a Hugging face backend
Args:
config (dict): The configuration file
"""
super().__init__(config, True)
self.config = config
path = self.config['model']
self.model = AutoModelForCausalLM.from_pretrained(Path("models/hugging_face")/path, low_cpu_mem_usage=True)
self.tokenizer = AutoTokenizer.from_pretrained(Path("models/hugging_face")/path)
self.generator = pipeline(
"text-generation",
model=self.model,
tokenizer=self.tokenizer,
device=0, # Use GPU if available
)
def generate_callback(self, text, new_text_callback):
def callback(outputs):
generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
new_text_callback(generated_text)
print(text + generated_text, end="\r")
time.sleep(0.5)
return callback
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
callback = self.generate_callback(prompt, new_text_callback)
outputs = self.generator(
prompt,
max_length=100,
do_sample=True,
num_beams=5,
temperature=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repetition_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
callback=callback
)
print(outputs)

View File

@ -12,7 +12,7 @@ n_predict: 1024
nb_messages_to_remember: 5
personality_language: english
personality_category: general
personality: gpt4all_chatbot
personality: gpt4all
port: 9600
repeat_last_n: 40
repeat_penalty: 1.2

1152
convert.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,47 +0,0 @@
# GPT4All Chatbot conditionning file
# Author : @ParisNeo
# Version : 1.1
# Description :
# An NLP needs conditionning to instruct it to be whatever we want it to be.
# This file is used by the GPT4All web ui to condition the personality of the model you are
# talking to.
# Name of the personality
name: gpt4all
# Name of the user
user_name: user
# Language (see the list of supported languages here : https://github.com/ParisNeo/GPT4All_Personalities/blob/main/README.md)
language: "en_XX"
# Category
category: "General"
# Personality description:
personality_description: |
This personality is a helpful and Kind AI ready to help you solve your problems
# The conditionning instructions sent to eh model at the start of the discussion
personality_conditionning: |
GPT4All is a smart and helpful Assistant built by Nomic-AI. It can discuss with humans and assist them.
#Welcome message to be sent to the user when a new discussion is started
welcome_message: "Welcome! I am GPT4All A free and open assistant. What can I do for you today?"
# This prefix is added at the beginning of any message input by the user
user_message_prefix: "### Human:\n"
# A text to put between user and chatbot messages
link_text: "\n"
# This prefix is added at the beginning of any message output by the ai
ai_message_prefix: "### Assistant:\n"
# Here is the list of extensions this personality requires
dependencies: []
# Some personalities need a disclaimer to warn the user of potential harm that can be caused by the AI
# for example, for medical assistants, it is important to tell the user to be careful and not use medication
# without advise from a real docor.
disclaimer: ""

View File

@ -13,6 +13,7 @@ from datetime import datetime
from pyGpt4All.db import DiscussionsDB
from pathlib import Path
import importlib
from pyaipersonality import AIPersonality
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
@ -20,7 +21,7 @@ __copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class GPT4AllAPI():
def __init__(self, config:dict, personality:dict, config_file_path:str) -> None:
def __init__(self, config:dict, personality:AIPersonality, config_file_path:str) -> None:
self.config = config
self.personality = personality
self.config_file_path = config_file_path
@ -49,27 +50,6 @@ class GPT4AllAPI():
# Build chatbot
self.chatbot_bindings = self.create_chatbot()
print("Chatbot created successfully")
# tests the model
"""
self.prepare_reception()
self.discussion_messages = "Instruction: Act as gpt4all. A kind and helpful AI bot built to help users solve problems.\nuser: how to build a water rocket?\ngpt4all:"
text = self.chatbot_bindings.generate(
self.discussion_messages,
new_text_callback=self.new_text_callback,
n_predict=372,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repeat_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
#seed=self.config['seed'],
n_threads=self.config['n_threads']
)
print(text)
"""
# generation status
self.generating=False
@ -102,10 +82,10 @@ class GPT4AllAPI():
0
)
self.current_message_id = message_id
if self.personality["welcome_message"]!="":
if self.personality["welcome_message"]!="":
if self.personality.welcome_message!="":
if self.personality.welcome_message!="":
message_id = self.current_discussion.add_message(
self.personality["name"], self.personality["welcome_message"],
self.personality.name, self.personality.welcome_message,
DiscussionsDB.MSG_TYPE_NORMAL,
0,
self.current_message_id
@ -126,7 +106,7 @@ class GPT4AllAPI():
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Chatbot conditionning
self.condition_chatbot(self.personality["personality_conditionning"])
self.condition_chatbot(self.personality.personality_conditionning)
return timestamp
def prepare_query(self, message_id=-1):
@ -135,19 +115,19 @@ class GPT4AllAPI():
for message in messages:
if message["id"]<= message_id or message_id==-1:
if message["type"]!=self.db.MSG_TYPE_CONDITIONNING:
if message["sender"]==self.personality["name"]:
self.full_message_list.append(self.personality["ai_message_prefix"]+message["content"])
if message["sender"]==self.personality.name:
self.full_message_list.append(self.personality.ai_message_prefix+message["content"])
else:
self.full_message_list.append(self.personality["user_message_prefix"] + message["content"])
self.full_message_list.append(self.personality.user_message_prefix + message["content"])
link_text = self.personality["link_text"]
link_text = self.personality.link_text
if len(self.full_message_list) > self.config["nb_messages_to_remember"]:
discussion_messages = self.personality["personality_conditionning"]+ link_text.join(self.full_message_list[-self.config["nb_messages_to_remember"]:])
discussion_messages = self.personality.personality_conditioning+ link_text.join(self.full_message_list[-self.config["nb_messages_to_remember"]:])
else:
discussion_messages = self.personality["personality_conditionning"]+ link_text.join(self.full_message_list)
discussion_messages = self.personality.personality_conditioning+ link_text.join(self.full_message_list)
discussion_messages += link_text + self.personality["ai_message_prefix"]
discussion_messages += link_text + self.personality.ai_message_prefix
return discussion_messages # Removes the last return
def get_discussion_to(self, message_id=-1):
@ -156,17 +136,17 @@ class GPT4AllAPI():
for message in messages:
if message["id"]<= message_id or message_id==-1:
if message["type"]!=self.db.MSG_TYPE_CONDITIONNING:
if message["sender"]==self.personality["name"]:
self.full_message_list.append(self.personality["ai_message_prefix"]+message["content"])
if message["sender"]==self.personality.name:
self.full_message_list.append(self.personality.ai_message_prefix+message["content"])
else:
self.full_message_list.append(self.personality["user_message_prefix"] + message["content"])
self.full_message_list.append(self.personality.user_message_prefix + message["content"])
link_text = self.personality["link_text"]
link_text = self.personality.link_text
if len(self.full_message_list) > self.config["nb_messages_to_remember"]:
discussion_messages = self.personality["personality_conditionning"]+ link_text.join(self.full_message_list[-self.config["nb_messages_to_remember"]:])
discussion_messages = self.personality.personality_conditionning+ link_text.join(self.full_message_list[-self.config["nb_messages_to_remember"]:])
else:
discussion_messages = self.personality["personality_conditionning"]+ link_text.join(self.full_message_list)
discussion_messages = self.personality.personality_conditionning+ link_text.join(self.full_message_list)
return discussion_messages # Removes the last return
@ -197,7 +177,7 @@ class GPT4AllAPI():
sys.stdout.flush()
self.bot_says += text
if not self.personality["user_message_prefix"].strip().lower() in self.bot_says.lower():
if not self.personality.user_message_prefix.strip().lower() in self.bot_says.lower():
self.socketio.emit('message', {'data': self.bot_says});
if self.cancel_gen:
print("Generation canceled")
@ -205,7 +185,7 @@ class GPT4AllAPI():
else:
return True
else:
self.bot_says = self.remove_text_from_string(self.bot_says, self.personality["user_message_prefix"].strip())
self.bot_says = self.remove_text_from_string(self.bot_says, self.personality.user_message_prefix.strip())
print("The model is halucinating")
return False

View File

@ -14,3 +14,4 @@ transformers
accelerate
gevent
gevent-websocket
pyaipersonality

View File

@ -4,6 +4,7 @@ nomic
pytest
pyyaml
markdown
pyllamacpp==1.0.7
pyllamacpp==2.0.0
gpt4all-j
transformers
transformers
pyaipersonality

View File

@ -104,7 +104,7 @@ if exist ".git" (
:PULL_CHANGES
echo Pulling latest changes
git pull origin main
goto :GET_PERSONALITIES
goto :CHECK_PYTHON_INSTALL
:CLONE_REPO
REM Check if repository exists
@ -122,12 +122,6 @@ if exist GPT4All (
git pull
)
:GET_PERSONALITIES
REM Download latest personalities
if not exist tmp\personalities git clone https://github.com/ParisNeo/GPT4All_Personalities.git tmp\personalities
xcopy /s tmp\personalities\* personalities /Y
goto :CHECK_PYTHON_INSTALL
:CHECK_PYTHON_INSTALL
REM Check if Python is installed
set /p="Checking for python..." <nul

View File

@ -68,11 +68,6 @@ if ping -q -c 1 google.com >/dev/null 2>&1; then
fi
echo Pulling latest version...
git pull
# Download latest personalities
if ! test -d ./tmp/personalities; then
git clone https://github.com/ParisNeo/GPT4All_Personalities.git ./tmp/personalities
fi
cp ./tmp/personalities/* ./personalities/
# Install Python 3.10 and pip
echo -n "Checking for python3.10..."