upgraded

2025-01-31 16:35:28 +00:00 · 2023-07-21 17:01:21 +02:00 · 2023-07-21 17:01:21 +02:00 · 789e800381
commit 789e800381
parent 3c68ad5973
3 changed files with 99 additions and 5 deletions
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -1072,7 +1072,7 @@ class APScript(StateMachine):
    def generate(self, prompt, max_size, temperature = None, top_k = None, top_p=None, repeat_penalty=None ):
        self.bot_says = ""
        ASCIIColors.info("Text generation started: Warming up")
-        return self.personality.model.generate(
+        self.personality.model.generate(
                                prompt, 
                                max_size, 
                                self.process,
@ -1081,6 +1081,7 @@ class APScript(StateMachine):
                                top_p=self.personality.model_top_p if top_p is None else top_p,
                                repeat_penalty=self.personality.model_repeat_penalty if repeat_penalty is None else repeat_penalty,
                                ).strip()    
+        return self.bot_says

    def run_workflow(self, prompt:str, previous_discussion_text:str="", callback=None):
        """
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -101,7 +101,11 @@ class TextVectorizer:
            if use_pca:
                # Use PCA for dimensionality reduction
                pca = PCA(n_components=2)
-                embeddings_2d = pca.fit_transform(combined_embeddings)
+                try:
+                    embeddings_2d = pca.fit_transform(combined_embeddings)
+                except Exception as ex:
+                    
+                    embeddings_2d = []
            else:
                # Use t-SNE for dimensionality reduction
                # Adjust the perplexity value
@ -219,7 +223,7 @@ class TextVectorizer:
            data=[]
            for chunk in chunks:
                try:
-                    data.append(self.model.detokenize(chunk) ) 
+                    data.append(self.model.detokenize(chunk).replace("<s>","").replace("</s>","") ) 
                except Exception as ex:
                    print("oups")
            self.vectorizer.fit(data)
@ -305,4 +309,93 @@ class TextVectorizer:
        self.embeddings = {}
        self.texts={}
        if self.personality_config.save_db:
-            self.save_to_json()
+            self.save_to_json()
+            
+      
+class GenericDataLoader:
+    @staticmethod
+    def install_package(package_name):
+        import subprocess
+        import sys
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+
+    @staticmethod        
+    def read_pdf_file(file_path):
+        try:
+            import PyPDF2
+        except ImportError:
+            GenericDataLoader.install_package("PyPDF2")
+            import PyPDF2
+        with open(file_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text()
+        return text
+
+    @staticmethod
+    def read_docx_file(file_path):
+        try:
+            from docx import Document
+        except ImportError:
+            GenericDataLoader.install_package("python-docx")
+            from docx import Document
+        doc = Document(file_path)
+        text = ""
+        for paragraph in doc.paragraphs:
+            text += paragraph.text + "\n"
+        return text
+
+    @staticmethod
+    def read_json_file(file_path):
+        import json
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+        return data
+    
+    @staticmethod
+    def read_csv_file(file_path):
+        try:
+            import csv
+        except ImportError:
+            GenericDataLoader.install_package("csv")
+            import csv
+        with open(file_path, 'r') as file:
+            csv_reader = csv.reader(file)
+            lines = [row for row in csv_reader]
+        return lines    
+
+    @staticmethod
+    def read_html_file(file_path):
+        try:
+            from bs4 import BeautifulSoup
+        except ImportError:
+            GenericDataLoader.install_package("beautifulsoup4")
+            from bs4 import BeautifulSoup
+        with open(file_path, 'r') as file:
+            soup = BeautifulSoup(file, 'html.parser')
+            text = soup.get_text()
+        return text
+    
+    @staticmethod
+    def read_pptx_file(file_path):
+        try:
+            from pptx import Presentation
+        except ImportError:
+            GenericDataLoader.install_package("python-pptx")
+            from pptx import Presentation
+        prs = Presentation(file_path)
+        text = ""
+        for slide in prs.slides:
+            for shape in slide.shapes:
+                if shape.has_text_frame:
+                    for paragraph in shape.text_frame.paragraphs:
+                        for run in paragraph.runs:
+                            text += run.text
+        return text
+    
+    @staticmethod
+    def read_text_file(file_path):
+        with open(file_path, 'r', encoding='utf-8') as file:
+            content = file.read()
+        return content
--- a/setup.py
+++ b/setup.py
@ -26,7 +26,7 @@ def get_all_files(path):

 setuptools.setup(
    name="lollms",
-    version="2.1.50",
+    version="2.1.53",
    author="Saifeddine ALOUI",
    author_email="aloui.saifeddine@gmail.com",
    description="A python library for AI personality definition",