mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 04:37:54 +00:00
fixed bugs
This commit is contained in:
parent
688d70a221
commit
239955db8b
@ -1493,20 +1493,35 @@ class APScript(StateMachine):
|
||||
ASCIIColors.yellow(prompt)
|
||||
ASCIIColors.red(" *-*-*-*-*-*-*-*")
|
||||
|
||||
def fast_gen(self, prompt, max_generation_size, placeholders={}, debug=False):
|
||||
def fast_gen(self, prompt: str, max_generation_size: int, placeholders: dict = {}, sacrifice: list = ["previous_discussion"], debug: bool = False) -> str:
|
||||
"""
|
||||
Fast way to generate code
|
||||
|
||||
This method takes in a prompt, maximum generation size, optional placeholders, sacrifice list, and debug flag.
|
||||
It reshapes the context before performing text generation by adjusting and cropping the number of tokens.
|
||||
|
||||
Parameters:
|
||||
- prompt (str): The input prompt for text generation.
|
||||
- max_generation_size (int): The maximum number of tokens to generate.
|
||||
- placeholders (dict, optional): A dictionary of placeholders to be replaced in the prompt. Defaults to an empty dictionary.
|
||||
- sacrifice (list, optional): A list of placeholders to sacrifice if the window is bigger than the context size minus the number of tokens to generate. Defaults to ["previous_discussion"].
|
||||
- debug (bool, optional): Flag to enable/disable debug mode. Defaults to False.
|
||||
|
||||
Returns:
|
||||
- str: The generated text after removing special tokens ("<s>" and "</s>") and stripping any leading/trailing whitespace.
|
||||
"""
|
||||
pr = PromptReshaper(prompt)
|
||||
prompt = pr.build(placeholders,
|
||||
self.personality.model.tokenize,
|
||||
self.personality.model.detokenize,
|
||||
self.personality.model.config.ctx_size-max_generation_size,
|
||||
["previous_discussion"]
|
||||
self.personality.model.config.ctx_size - max_generation_size,
|
||||
sacrifice
|
||||
)
|
||||
if self.personality.config.get("debug",False):
|
||||
self.print_prompt("prompt",prompt)
|
||||
return self.generate(prompt, max_generation_size).strip().replace("</s>","").replace("<s>","")
|
||||
if debug:
|
||||
self.print_prompt("prompt", prompt)
|
||||
|
||||
return self.generate(prompt, max_generation_size).strip().replace("</s>", "").replace("<s>", "")
|
||||
|
||||
|
||||
|
||||
#Helper method to convert outputs path to url
|
||||
|
@ -6,6 +6,7 @@ import json
|
||||
import re
|
||||
import subprocess
|
||||
import gc
|
||||
from typing import List
|
||||
|
||||
class NumpyEncoderDecoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
@ -677,119 +678,6 @@ class TextVectorizer:
|
||||
self.save_to_json()
|
||||
|
||||
|
||||
class GenericDataLoader:
|
||||
@staticmethod
|
||||
def read_file(file_path:Path):
|
||||
if file_path.suffix ==".pdf":
|
||||
return GenericDataLoader.read_pdf_file(file_path)
|
||||
elif file_path.suffix == ".docx":
|
||||
return GenericDataLoader.read_docx_file(file_path)
|
||||
elif file_path.suffix == ".json":
|
||||
return GenericDataLoader.read_json_file(file_path)
|
||||
elif file_path.suffix == ".html":
|
||||
return GenericDataLoader.read_html_file(file_path)
|
||||
elif file_path.suffix == ".pptx":
|
||||
return GenericDataLoader.read_pptx_file(file_path)
|
||||
if file_path.suffix in [".txt", ".rtf", ".md", ".log", ".cpp", ".java", ".js", ".py", ".rb", ".sh", ".sql", ".css", ".html", ".php", ".json", ".xml", ".yaml", ".yml", ".h", ".hh", ".hpp", ".inc", ".snippet", ".snippets", ".asm", ".s", ".se", ".sym", ".ini", ".inf", ".map", ".bat"]:
|
||||
return GenericDataLoader.read_text_file(file_path)
|
||||
else:
|
||||
raise ValueError("Unknown file type")
|
||||
def get_supported_file_types():
|
||||
return ["pdf", "txt", "docx", "json", "html", "pptx",".txt", ".md", ".log", ".cpp", ".java", ".js", ".py", ".rb", ".sh", ".sql", ".css", ".html", ".php", ".json", ".xml", ".yaml", ".yml", ".h", ".hh", ".hpp", ".inc", ".snippet", ".snippets", ".asm", ".s", ".se", ".sym", ".ini", ".inf", ".map", ".bat", ".rtf"]
|
||||
@staticmethod
|
||||
def read_pdf_file(file_path):
|
||||
try:
|
||||
import PyPDF2
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
import pytesseract
|
||||
import pdfminer
|
||||
from pdfminer.high_level import extract_text
|
||||
except ImportError:
|
||||
PackageManager.install_package("PyPDF2")
|
||||
PackageManager.install_package("pytesseract")
|
||||
PackageManager.install_package("pillow")
|
||||
PackageManager.install_package("pdfminer")
|
||||
PackageManager.install_package("pdfminer.six")
|
||||
|
||||
import PyPDF2
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
import pytesseract
|
||||
from pdfminer.high_level import extract_text
|
||||
|
||||
# Extract text from the PDF
|
||||
text = extract_text(file_path)
|
||||
|
||||
# Convert to Markdown (You may need to implement custom logic based on your specific use case)
|
||||
markdown_text = text.replace('\n', ' \n') # Adding double spaces at the end of each line for Markdown line breaks
|
||||
|
||||
return markdown_text
|
||||
|
||||
@staticmethod
|
||||
def read_docx_file(file_path):
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
PackageManager.install_package("python-docx")
|
||||
from docx import Document
|
||||
doc = Document(file_path)
|
||||
text = ""
|
||||
for paragraph in doc.paragraphs:
|
||||
text += paragraph.text + "\n"
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_json_file(file_path):
|
||||
import json
|
||||
with open(file_path, 'r') as file:
|
||||
data = json.load(file)
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def read_csv_file(file_path):
|
||||
try:
|
||||
import csv
|
||||
except ImportError:
|
||||
PackageManager.install_package("csv")
|
||||
import csv
|
||||
with open(file_path, 'r') as file:
|
||||
csv_reader = csv.reader(file)
|
||||
lines = [row for row in csv_reader]
|
||||
return lines
|
||||
|
||||
@staticmethod
|
||||
def read_html_file(file_path):
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
PackageManager.install_package("beautifulsoup4")
|
||||
from bs4 import BeautifulSoup
|
||||
with open(file_path, 'r') as file:
|
||||
soup = BeautifulSoup(file, 'html.parser')
|
||||
text = soup.get_text()
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_pptx_file(file_path):
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
PackageManager.install_package("python-pptx")
|
||||
from pptx import Presentation
|
||||
prs = Presentation(file_path)
|
||||
text = ""
|
||||
for slide in prs.slides:
|
||||
for shape in slide.shapes:
|
||||
if shape.has_text_frame:
|
||||
for paragraph in shape.text_frame.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
text += run.text
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def read_text_file(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
return content
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user