New model format

This commit is contained in:
Saifeddine ALOUI 2023-04-20 19:30:03 +02:00
parent b05dde5477
commit 2d598cc5fe
19 changed files with 453 additions and 91 deletions

55
app.py
View File

@ -9,6 +9,13 @@
# Made by the community for the community
######
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
import argparse
import json
import re
@ -40,9 +47,18 @@ class Gpt4AllWebUI(GPT4AllAPI):
self.app = _app
self.add_endpoint(
"/list_backends", "list_backends", self.list_backends, methods=["GET"]
)
self.add_endpoint(
"/list_models", "list_models", self.list_models, methods=["GET"]
)
self.add_endpoint(
"/list_personalities_languages", "list_personalities_languages", self.list_personalities_languages, methods=["GET"]
)
self.add_endpoint(
"/list_personalities_categories", "list_personalities_categories", self.list_personalities_categories, methods=["GET"]
)
self.add_endpoint(
"/list_personalities", "list_personalities", self.list_personalities, methods=["GET"]
)
@ -55,6 +71,9 @@ class Gpt4AllWebUI(GPT4AllAPI):
"/list_discussions", "list_discussions", self.list_discussions, methods=["GET"]
)
self.add_endpoint("/set_personality_language", "set_personality_language", self.set_personality_language, methods=["GET"])
self.add_endpoint("/set_personality_category", "set_personality_category", self.set_personality_category, methods=["GET"])
self.add_endpoint("/", "", self.index, methods=["GET"])
self.add_endpoint("/export_discussion", "export_discussion", self.export_discussion, methods=["GET"])
@ -116,14 +135,30 @@ class Gpt4AllWebUI(GPT4AllAPI):
"/help", "help", self.help, methods=["GET"]
)
def list_backends(self):
backends_dir = Path('./pyGpt4All/backends') # replace with the actual path to the models folder
backends = [f.stem for f in backends_dir.glob('*.py') if f.name!="backend" and f.stem!="__init__"]
return jsonify(backends)
def list_models(self):
models_dir = Path('./models') # replace with the actual path to the models folder
models_dir = Path('./models')/self.config["backend"] # replace with the actual path to the models folder
models = [f.name for f in models_dir.glob('*.bin')]
return jsonify(models)
def list_personalities_languages(self):
personalities_languages_dir = Path(f'./personalities') # replace with the actual path to the models folder
personalities_languages = [f.stem for f in personalities_languages_dir.iterdir() if f.is_dir()]
return jsonify(personalities_languages)
def list_personalities_categories(self):
personalities_categories_dir = Path(f'./personalities/{self.config["personality_language"]}') # replace with the actual path to the models folder
personalities_categories = [f.stem for f in personalities_categories_dir.iterdir() if f.is_dir()]
return jsonify(personalities_categories)
def list_personalities(self):
personalities_dir = Path('./personalities') # replace with the actual path to the models folder
personalities_dir = Path(f'./personalities/{self.config["personality_language"]}/{self.config["personality_category"]}') # replace with the actual path to the models folder
personalities = [f.stem for f in personalities_dir.glob('*.yaml')]
return jsonify(personalities)
@ -145,6 +180,16 @@ class Gpt4AllWebUI(GPT4AllAPI):
return jsonify(discussions)
def set_personality_language(self):
lang = request.args.get('language')
self.config['personality_language'] = lang
return jsonify({'success':True})
def set_personality_category(self):
category = request.args.get('category')
self.config['personality_category'] = category
return jsonify({'success':True})
def add_endpoint(
self,
endpoint=None,
@ -247,7 +292,7 @@ class Gpt4AllWebUI(GPT4AllAPI):
return Response(
stream_with_context(
self.parse_to_prompt_stream(message, message_id)
)
), content_type='text/plain; charset=utf-8'
)
@ -284,7 +329,7 @@ class Gpt4AllWebUI(GPT4AllAPI):
for message in messages:
message["content"] = markdown.markdown(message["content"])
return jsonify(messages)
return jsonify(messages), {'Content-Type': 'application/json; charset=utf-8'}
def delete_discussion(self):
data = request.get_json()
@ -470,7 +515,7 @@ if __name__ == "__main__":
if arg_value is not None:
config[arg_name] = arg_value
personality = load_config(f"personalities/{config['personality']}.yaml")
personality = load_config(f"personalities/{config['personality_language']}/{config['personality_category']}/{config['personality']}.yaml")
executor = ThreadPoolExecutor(max_workers=2)
app.config['executor'] = executor

View File

@ -5,9 +5,13 @@ debug: false
n_threads: 8
host: localhost
language: en-US
# Supported backends are llamacpp and gpt-j
backend: llama_cpp
model: gpt4all-lora-quantized-ggml.bin
n_predict: 1024
nb_messages_to_remember: 5
personality_language: english
personality_category: general
personality: gpt4all_chatbot
port: 9600
repeat_last_n: 40
@ -18,4 +22,4 @@ top_k: 50
top_p: 0.95
voice: ""
use_gpu: false # Not active yet
auto_read: false
auto_read: false

View File

@ -1 +1,4 @@
Here you can drop your models
Here you can drop your models depending on the selected backend
Currently, supported backends are:
- llamacpp
- gpt-j

View File

@ -1,47 +0,0 @@
# GPT4All Chatbot conditionning file
# Author : @ParisNeo
# Version : 1.0
# Description :
# An NLP needs conditionning to instruct it to be whatever we want it to be.
# This file is used by the GPT4All web ui to condition the personality of the model you are
# talking to.
# Name of the personality
name: gpt4all
# Name of the user
user_name: user
# Language (see the list of supported languages here : https://github.com/ParisNeo/GPT4All_Personalities/blob/main/README.md)
language: "en_XX"
# Category
category: "General"
# Personality description:
personality_description: |
This personality is a helpful and Kind AI ready to help you solve your problems
# The conditionning instructions sent to eh model at the start of the discussion
personality_conditionning: |
GPT4All is a smart and helpful AI chat bot built by Nomic-AI. It can generate stories on demand.
#Welcome message to be sent to the user when a new discussion is started
welcome_message: "Welcome! I am GPT4All A free and open discussion AI. What can I do for you today?"
# This prefix is added at the beginning of any message input by the user
user_message_prefix: "user: "
# A text to put between user and chatbot messages
link_text: "\n"
# This prefix is added at the beginning of any message output by the ai
ai_message_prefix: "gpt4all: "
# Here is the list of extensions this personality requires
dependencies: []
# Some personalities need a disclaimer to warn the user of potential harm that can be caused by the AI
# for example, for medical assistants, it is important to tell the user to be careful and not use medication
# without advise from a real docor.
disclaimer: ""

View File

@ -11,11 +11,15 @@ import gc
import sys
from queue import Queue
from datetime import datetime
from pyllamacpp.model import Model
from pyGpt4All.db import DiscussionsDB
from pyGpt4All.backends import BACKENDS_LIST
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class GPT4AllAPI():
def __init__(self, config:dict, personality:dict, config_file_path) -> None:
def __init__(self, config:dict, personality:dict, config_file_path:str) -> None:
self.config = config
self.personality = personality
self.config_file_path = config_file_path
@ -38,6 +42,9 @@ class GPT4AllAPI():
# This is used to keep track of messages
self.full_message_list = []
# Select backend
self.backend = BACKENDS_LIST[self.config["backend"]]
# Build chatbot
self.chatbot_bindings = self.create_chatbot()
print("Chatbot created successfully")
@ -66,11 +73,7 @@ class GPT4AllAPI():
def create_chatbot(self):
try:
return Model(
ggml_model=f"./models/{self.config['model']}",
n_ctx=self.config['ctx_size'],
seed=self.config['seed'],
)
return self.backend(self.config)
except Exception as ex:
print(f"Exception {ex}")
return None

View File

@ -0,0 +1,6 @@
from pyGpt4All.backends.llamacpp import LLAMACPP
from pyGpt4All.backends.gpt_j import GPT_J
BACKENDS_LIST={
"llama_cpp":LLAMACPP,
"gpt_j":GPT_J
}

View File

@ -0,0 +1,37 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class GPTBackend:
def __init__(self, config:dict) -> None:
self.config = config
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = None,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
This should ber implemented by child class
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
pass

View File

@ -0,0 +1,60 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
from gpt4allj import Model
from pyGpt4All.backends.backend import GPTBackend
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class GPT_J(GPTBackend):
def __init__(self, config:dict) -> None:
"""Builds a GPT-J backend
Args:
config (dict): The configuration file
"""
super().__init__(config)
self.config = config
self.model = Model(
ggml_model=f"./models/gptj/{self.config['model']}"
)
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
self.model.generate(
prompt,
new_text_callback=new_text_callback,
n_predict=n_predict,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repeat_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
n_threads=self.config['n_threads'],
verbose=verbose
)

View File

@ -0,0 +1,62 @@
######
# Project : GPT4ALL-UI
# File : backend.py
# Author : ParisNeo with the help of the community
# Supported by Nomic-AI
# Licence : Apache 2.0
# Description :
# This is an interface class for GPT4All-ui backends.
######
from pathlib import Path
from typing import Callable
from pyllamacpp.model import Model
from pyGpt4All.backends.backend import GPTBackend
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class LLAMACPP(GPTBackend):
def __init__(self, config:dict) -> None:
"""Builds a LLAMACPP backend
Args:
config (dict): The configuration file
"""
super().__init__(config)
self.model = Model(
ggml_model=f"./models/llamacpp/{self.config['model']}",
n_ctx=self.config['ctx_size'],
seed=self.config['seed'],
)
def generate(self,
prompt:str,
n_predict: int = 128,
new_text_callback: Callable[[str], None] = bool,
verbose: bool = False,
**gpt_params ):
"""Generates text out of a prompt
Args:
prompt (str): The prompt to use for generation
n_predict (int, optional): Number of tokens to prodict. Defaults to 128.
new_text_callback (Callable[[str], None], optional): A callback function that is called everytime a new text element is generated. Defaults to None.
verbose (bool, optional): If true, the code will spit many informations about the generation process. Defaults to False.
"""
self.model.generate(
prompt,
new_text_callback=new_text_callback,
n_predict=n_predict,
temp=self.config['temp'],
top_k=self.config['top_k'],
top_p=self.config['top_p'],
repeat_penalty=self.config['repeat_penalty'],
repeat_last_n = self.config['repeat_last_n'],
n_threads=self.config['n_threads'],
verbose=verbose
)

View File

@ -11,6 +11,11 @@
######
import yaml
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
def load_config(file_path):
with open(file_path, 'r') as stream:
config = yaml.safe_load(stream)

View File

@ -1,5 +1,12 @@
import sqlite3
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
# =================================== Database ==================================================================
class DiscussionsDB:
MSG_TYPE_NORMAL = 0
@ -13,6 +20,17 @@ class DiscussionsDB:
create database schema
"""
db_version = 2
# Verify encoding and change it if it is not complient
with sqlite3.connect(self.db_path) as conn:
# Execute a PRAGMA statement to get the current encoding of the database
cur = conn.execute('PRAGMA encoding')
current_encoding = cur.fetchone()[0]
if current_encoding != 'UTF-8':
# The current encoding is not UTF-8, so we need to change it
print(f"The current encoding is {current_encoding}, changing to UTF-8...")
conn.execute('PRAGMA encoding = "UTF-8"')
conn.commit()
print("Checking discussions database...")
with sqlite3.connect(self.db_path) as conn:

View File

@ -5,6 +5,11 @@
# it gives your code access to the model, the callback functions, the model conditionning etc
from config import load_config, save_config
__author__ = "parisneo"
__github__ = "https://github.com/nomic-ai/gpt4all-ui"
__copyright__ = "Copyright 2023, "
__license__ = "Apache 2.0"
class Extension():
def __init__(self, metadata_file_path:str, app) -> None:
self.app = app

View File

@ -1,6 +1,7 @@
flask
nomic
pytest
pyllamacpp==1.0.6
pyyaml
markdown
markdown
pyllamacpp==1.0.6
gpt4allj

7
requirements_dev.txt Normal file
View File

@ -0,0 +1,7 @@
flask
nomic
pytest
pyyaml
markdown
pyllamacpp==1.0.6
gpt4allj

35
setup.py Normal file
View File

@ -0,0 +1,35 @@
from pathlib import Path
from typing import Union
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
def read_requirements(path: Union[str, Path]):
with open(path, "r") as file:
return file.read().splitlines()
requirements = read_requirements("requirements.txt")
requirements_dev = read_requirements("requirements_dev.txt")
setuptools.setup(
name="GPT4Allui",
version="0.0.5",
author="Saifeddine ALOUI",
author_email="aloui.saifeddine@gmail.com",
description="A web ui for running chat models with different backends. Supports multiple personalities and extensions.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/nomic-ai/gpt4all-ui",
packages=setuptools.find_packages(),
install_requires=requirements,
extras_require={"dev": requirements_dev},
classifiers=[
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: Apache 2.0 License",
"Operating System :: OS Independent",
],
)

View File

@ -93,7 +93,7 @@ function update_main(){
else{
// For the other enrtries, these are just the text of the chatbot
for (const char of text) {
txt = hiddenElement.innerHTML;
txt = bot_msg.hiddenElement.innerHTML;
if (char != '\f') {
txt += char
bot_msg.hiddenElement.innerHTML = txt;

View File

@ -3,7 +3,7 @@ fetch('/settings')
.then(response => response.text())
.then(html => {
document.getElementById('settings').innerHTML = html;
backendInput = document.getElementById('backend');
modelInput = document.getElementById('model');
personalityInput = document.getElementById('personalities');
languageInput = document.getElementById('language');
@ -54,6 +54,7 @@ fetch('/settings')
.then((response) => response.json())
.then((data) => {
console.log(data);
backendInput.value = data["backend"]
modelInput.value = data["model"]
personalityInput.value = data["personality"]
languageInput.value = data["language"]
@ -89,6 +90,7 @@ fetch('/settings')
// Get form values and put them in an object
const formValues = {
seed: seedInput.value,
backend: backendInput.value,
model: modelInput.value,
personality: personalityInput.value,
language: languageInput.value,
@ -129,10 +131,16 @@ fetch('/settings')
function populate_models(){
// Get a reference to the <select> element
const selectElement = document.getElementById('model');
const selectBackend = document.getElementById('backend');
const selectModel = document.getElementById('model');
// Fetch the list of .bin files from the models subfolder
fetch('/list_models')
const selectPersonalityLanguage = document.getElementById('personalities_language');
const selectPersonalityCategory = document.getElementById('personalities_category');
const selectPersonality = document.getElementById('personalities');
function populate_backends(){
// Fetch the list of .bin files from the models subfolder
fetch('/list_backends')
.then(response => response.json())
.then(data => {
if (Array.isArray(data)) {
@ -141,7 +149,7 @@ function populate_models(){
const optionElement = document.createElement('option');
optionElement.value = filename;
optionElement.textContent = filename;
selectElement.appendChild(optionElement);
selectBackend.appendChild(optionElement);
});
// fetch('/get_args')
@ -153,30 +161,134 @@ function populate_models(){
console.error('Expected an array, but received:', data);
}
});
}
function populate_models(){
// Fetch the list of .bin files from the models subfolder
fetch('/list_models')
.then(response => response.json())
.then(data => {
if (Array.isArray(data)) {
// data is an array
data.forEach(filename => {
const optionElement = document.createElement('option');
optionElement.value = filename;
optionElement.textContent = filename;
selectModel.appendChild(optionElement);
});
// fetch('/get_args')
// .then(response=> response.json())
// .then(data=>{
// })
} else {
console.error('Expected an array, but received:', data);
}
});
}
function populate_personalities_languages(){
selectPersonalityLanguage.innerHTML=""
// Fetch the list of .yaml files from the models subfolder
fetch('/list_personalities_languages')
.then(response => response.json())
.then(data => {
if (Array.isArray(data)) {
// data is an array
data.forEach(filename => {
const optionElement = document.createElement('option');
optionElement.value = filename;
optionElement.textContent = filename;
selectPersonalityLanguage.appendChild(optionElement);
});
// fetch('/get_args')
// .then(response=> response.json())
// .then(data=>{
// })
} else {
console.error('Expected an array, but received:', data);
}
});
}
function populate_personalities_categories(){
selectPersonalityCategory.innerHTML=""
// Fetch the list of .yaml files from the models subfolder
fetch('/list_personalities_categories')
.then(response => response.json())
.then(data => {
if (Array.isArray(data)) {
// data is an array
data.forEach(filename => {
const optionElement = document.createElement('option');
optionElement.value = filename;
optionElement.textContent = filename;
selectPersonalityCategory.appendChild(optionElement);
});
} else {
console.error('Expected an array, but received:', data);
}
});
}
function populate_personalities(){
selectPersonality.innerHTML=""
// Fetch the list of .yaml files from the models subfolder
fetch('/list_personalities')
.then(response => response.json())
.then(data => {
if (Array.isArray(data)) {
// data is an array
const selectElement = document.getElementById('personalities');
data.forEach(filename => {
const optionElement = document.createElement('option');
optionElement.value = filename;
optionElement.textContent = filename;
selectElement.appendChild(optionElement);
selectPersonality.appendChild(optionElement);
});
// fetch('/get_args')
// .then(response=> response.json())
// .then(data=>{
// })
} else {
console.error('Expected an array, but received:', data);
}
});
}
function set_personality_language(lang, callback) {
fetch(`/set_personality_language?language=${lang}`)
.then(response => response.json())
.then(data => {
callback(data);
});
}
// Example usage: call another function after set_personality_language returns
selectPersonalityLanguage.addEventListener('click', function() {
set_personality_language(selectPersonalityLanguage.value, function(data) {
console.log('Response received:', data);
populate_personalities_categories();
});
});
function set_personality_category(category, callback) {
fetch(`/set_personality_category?category=${category}`)
.then(response => response.json())
.then(data => {
callback()
});
}
// Example usage: call another function after set_personality_category returns
selectPersonalityCategory.addEventListener('click', function() {
set_personality_category(selectPersonalityCategory.value, function(data) {
console.log('Response received:', data);
populate_personalities();
});
});
populate_backends()
populate_models()
populate_personalities_languages()
populate_personalities_categories()
populate_personalities()
// Fetch the list of .yaml files from the models subfolder
fetch('/list_languages')
@ -184,19 +296,13 @@ function populate_models(){
.then(data => {
if (Array.isArray(data)) {
// data is an array
const selectElement = document.getElementById('language');
const selectLanguage = document.getElementById('language');
data.forEach(row => {
const optionElement = document.createElement('option');
optionElement.value = row.value;
optionElement.innerHTML = row.label;
selectElement.appendChild(optionElement);
selectLanguage.appendChild(optionElement);
});
// fetch('/get_args')
// .then(response=> response.json())
// .then(data=>{
// })
} else {
console.error('Expected an array, but received:', data);
}

View File

@ -1,10 +1,25 @@
<div class="h-full overflow-y-auto">
<form id="model-params-form" class="bg-gray-50 dark:bg-gray-700 shadow-md rounded px-8 py-8 pt-6 pb-8 mb-4 text-black dark:text-white">
<div class="mb-4 flex-row">
<label class="font-bold" for="model">Backend</label>
<select class="bg-gray-200 dark:bg-gray-700 w-96 shadow appearance-none border rounded py-2 px-3 leading-tight focus:outline-none focus:shadow-outline" id="backend" name="backend">
</select>
</div>
<div class="mb-4 flex-row">
<label class="font-bold" for="model">Model</label>
<select class="bg-gray-200 dark:bg-gray-700 w-96 shadow appearance-none border rounded py-2 px-3 leading-tight focus:outline-none focus:shadow-outline" id="model" name="model">
</select>
</div>
<div class="mb-4 flex-row">
<label class="font-bold mb-2" for="model">Personalities Languages</label>
<select class="bg-gray-200 dark:bg-gray-700 shadow appearance-none border rounded py-2 px-3 leading-tight focus:outline-none focus:shadow-outline" id="personalities_language" name="personalities_language" value="English">
</select>
</div>
<div class="mb-4 flex-row">
<label class="font-bold mb-2" for="model">Personalities Category</label>
<select class="bg-gray-200 dark:bg-gray-700 shadow appearance-none border rounded py-2 px-3 leading-tight focus:outline-none focus:shadow-outline" id="personalities_category" name="personalities_category" value="general">
</select>
</div>
<div class="mb-4 flex-row">
<label class="font-bold mb-2" for="model">Personalities</label>
<select class="bg-gray-200 dark:bg-gray-700 shadow appearance-none border rounded py-2 px-3 leading-tight focus:outline-none focus:shadow-outline" id="personalities" name="personalities" value="gpt4all_chatbot.yaml">

View File

@ -238,18 +238,15 @@ if not exist \models (
md \models
)
if not exist ./models/gpt4all-lora-quantized-ggml.bin (
if not exist ./models/llamacpp/gpt4all-lora-quantized-ggml.bin (
echo.
choice /C YNB /M "The default model file (gpt4all-lora-quantized-ggml.bin) does not exist. Do you want to download it? Press B to download it with a browser (faster)."
if errorlevel 3 goto DOWNLOAD_WITH_BROWSER
if errorlevel 2 goto DOWNLOAD_SKIP
if errorlevel 1 goto MODEL_DOWNLOAD
) ELSE (
echo.
choice /C YNB /M "The default model file (gpt4all-lora-quantized-ggml.bin) already exists. Do you want to replace it? Press B to download it with a browser (faster)."
if errorlevel 3 goto DOWNLOAD_WITH_BROWSER
if errorlevel 2 goto DOWNLOAD_SKIP
if errorlevel 1 goto MODEL_DOWNLOAD
echo Model already installed
goto CONTINUE
)
:DOWNLOAD_WITH_BROWSER