mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 20:57:58 +00:00
upgraded
This commit is contained in:
parent
3c68ad5973
commit
789e800381
@ -1072,7 +1072,7 @@ class APScript(StateMachine):
|
||||
def generate(self, prompt, max_size, temperature = None, top_k = None, top_p=None, repeat_penalty=None ):
|
||||
self.bot_says = ""
|
||||
ASCIIColors.info("Text generation started: Warming up")
|
||||
return self.personality.model.generate(
|
||||
self.personality.model.generate(
|
||||
prompt,
|
||||
max_size,
|
||||
self.process,
|
||||
@ -1081,6 +1081,7 @@ class APScript(StateMachine):
|
||||
top_p=self.personality.model_top_p if top_p is None else top_p,
|
||||
repeat_penalty=self.personality.model_repeat_penalty if repeat_penalty is None else repeat_penalty,
|
||||
).strip()
|
||||
return self.bot_says
|
||||
|
||||
def run_workflow(self, prompt:str, previous_discussion_text:str="", callback=None):
|
||||
"""
|
||||
|
@ -101,7 +101,11 @@ class TextVectorizer:
|
||||
if use_pca:
|
||||
# Use PCA for dimensionality reduction
|
||||
pca = PCA(n_components=2)
|
||||
embeddings_2d = pca.fit_transform(combined_embeddings)
|
||||
try:
|
||||
embeddings_2d = pca.fit_transform(combined_embeddings)
|
||||
except Exception as ex:
|
||||
|
||||
embeddings_2d = []
|
||||
else:
|
||||
# Use t-SNE for dimensionality reduction
|
||||
# Adjust the perplexity value
|
||||
@ -219,7 +223,7 @@ class TextVectorizer:
|
||||
data=[]
|
||||
for chunk in chunks:
|
||||
try:
|
||||
data.append(self.model.detokenize(chunk) )
|
||||
data.append(self.model.detokenize(chunk).replace("<s>","").replace("</s>","") )
|
||||
except Exception as ex:
|
||||
print("oups")
|
||||
self.vectorizer.fit(data)
|
||||
@ -305,4 +309,93 @@ class TextVectorizer:
|
||||
self.embeddings = {}
|
||||
self.texts={}
|
||||
if self.personality_config.save_db:
|
||||
self.save_to_json()
|
||||
self.save_to_json()
|
||||
|
||||
|
||||
class GenericDataLoader:
|
||||
@staticmethod
|
||||
def install_package(package_name):
|
||||
import subprocess
|
||||
import sys
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
||||
|
||||
@staticmethod
|
||||
def read_pdf_file(file_path):
|
||||
try:
|
||||
import PyPDF2
|
||||
except ImportError:
|
||||
GenericDataLoader.install_package("PyPDF2")
|
||||
import PyPDF2
|
||||
with open(file_path, 'rb') as file:
|
||||
pdf_reader = PyPDF2.PdfReader(file)
|
||||
text = ""
|
||||
for page in pdf_reader.pages:
|
||||
text += page.extract_text()
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_docx_file(file_path):
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
GenericDataLoader.install_package("python-docx")
|
||||
from docx import Document
|
||||
doc = Document(file_path)
|
||||
text = ""
|
||||
for paragraph in doc.paragraphs:
|
||||
text += paragraph.text + "\n"
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_json_file(file_path):
|
||||
import json
|
||||
with open(file_path, 'r') as file:
|
||||
data = json.load(file)
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def read_csv_file(file_path):
|
||||
try:
|
||||
import csv
|
||||
except ImportError:
|
||||
GenericDataLoader.install_package("csv")
|
||||
import csv
|
||||
with open(file_path, 'r') as file:
|
||||
csv_reader = csv.reader(file)
|
||||
lines = [row for row in csv_reader]
|
||||
return lines
|
||||
|
||||
@staticmethod
|
||||
def read_html_file(file_path):
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
GenericDataLoader.install_package("beautifulsoup4")
|
||||
from bs4 import BeautifulSoup
|
||||
with open(file_path, 'r') as file:
|
||||
soup = BeautifulSoup(file, 'html.parser')
|
||||
text = soup.get_text()
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_pptx_file(file_path):
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
GenericDataLoader.install_package("python-pptx")
|
||||
from pptx import Presentation
|
||||
prs = Presentation(file_path)
|
||||
text = ""
|
||||
for slide in prs.slides:
|
||||
for shape in slide.shapes:
|
||||
if shape.has_text_frame:
|
||||
for paragraph in shape.text_frame.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
text += run.text
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_text_file(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
return content
|
||||
|
Loading…
Reference in New Issue
Block a user