From 789e800381daa5799eff0c5904b2bdb48b3fa5f1 Mon Sep 17 00:00:00 2001 From: saloui Date: Fri, 21 Jul 2023 17:01:21 +0200 Subject: [PATCH] upgraded --- lollms/personality.py | 3 +- lollms/utilities.py | 99 +++++++++++++++++++++++++++++++++++++++++-- setup.py | 2 +- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/lollms/personality.py b/lollms/personality.py index 41a1177..bbdd138 100644 --- a/lollms/personality.py +++ b/lollms/personality.py @@ -1072,7 +1072,7 @@ class APScript(StateMachine): def generate(self, prompt, max_size, temperature = None, top_k = None, top_p=None, repeat_penalty=None ): self.bot_says = "" ASCIIColors.info("Text generation started: Warming up") - return self.personality.model.generate( + self.personality.model.generate( prompt, max_size, self.process, @@ -1081,6 +1081,7 @@ class APScript(StateMachine): top_p=self.personality.model_top_p if top_p is None else top_p, repeat_penalty=self.personality.model_repeat_penalty if repeat_penalty is None else repeat_penalty, ).strip() + return self.bot_says def run_workflow(self, prompt:str, previous_discussion_text:str="", callback=None): """ diff --git a/lollms/utilities.py b/lollms/utilities.py index 9451a47..6b02642 100644 --- a/lollms/utilities.py +++ b/lollms/utilities.py @@ -101,7 +101,11 @@ class TextVectorizer: if use_pca: # Use PCA for dimensionality reduction pca = PCA(n_components=2) - embeddings_2d = pca.fit_transform(combined_embeddings) + try: + embeddings_2d = pca.fit_transform(combined_embeddings) + except Exception as ex: + + embeddings_2d = [] else: # Use t-SNE for dimensionality reduction # Adjust the perplexity value @@ -219,7 +223,7 @@ class TextVectorizer: data=[] for chunk in chunks: try: - data.append(self.model.detokenize(chunk) ) + data.append(self.model.detokenize(chunk).replace("","").replace("","") ) except Exception as ex: print("oups") self.vectorizer.fit(data) @@ -305,4 +309,93 @@ class TextVectorizer: self.embeddings = {} self.texts={} if self.personality_config.save_db: - self.save_to_json() \ No newline at end of file + self.save_to_json() + + +class GenericDataLoader: + @staticmethod + def install_package(package_name): + import subprocess + import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", package_name]) + + @staticmethod + def read_pdf_file(file_path): + try: + import PyPDF2 + except ImportError: + GenericDataLoader.install_package("PyPDF2") + import PyPDF2 + with open(file_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + text = "" + for page in pdf_reader.pages: + text += page.extract_text() + return text + + @staticmethod + def read_docx_file(file_path): + try: + from docx import Document + except ImportError: + GenericDataLoader.install_package("python-docx") + from docx import Document + doc = Document(file_path) + text = "" + for paragraph in doc.paragraphs: + text += paragraph.text + "\n" + return text + + @staticmethod + def read_json_file(file_path): + import json + with open(file_path, 'r') as file: + data = json.load(file) + return data + + @staticmethod + def read_csv_file(file_path): + try: + import csv + except ImportError: + GenericDataLoader.install_package("csv") + import csv + with open(file_path, 'r') as file: + csv_reader = csv.reader(file) + lines = [row for row in csv_reader] + return lines + + @staticmethod + def read_html_file(file_path): + try: + from bs4 import BeautifulSoup + except ImportError: + GenericDataLoader.install_package("beautifulsoup4") + from bs4 import BeautifulSoup + with open(file_path, 'r') as file: + soup = BeautifulSoup(file, 'html.parser') + text = soup.get_text() + return text + + @staticmethod + def read_pptx_file(file_path): + try: + from pptx import Presentation + except ImportError: + GenericDataLoader.install_package("python-pptx") + from pptx import Presentation + prs = Presentation(file_path) + text = "" + for slide in prs.slides: + for shape in slide.shapes: + if shape.has_text_frame: + for paragraph in shape.text_frame.paragraphs: + for run in paragraph.runs: + text += run.text + return text + + @staticmethod + def read_text_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() + return content diff --git a/setup.py b/setup.py index 50f9fad..3908e92 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ def get_all_files(path): setuptools.setup( name="lollms", - version="2.1.50", + version="2.1.53", author="Saifeddine ALOUI", author_email="aloui.saifeddine@gmail.com", description="A python library for AI personality definition",