mirror of
https://github.com/ParisNeo/lollms.git
synced 2024-12-19 04:37:54 +00:00
fixed bugs
This commit is contained in:
parent
688d70a221
commit
239955db8b
@ -1493,20 +1493,35 @@ class APScript(StateMachine):
|
|||||||
ASCIIColors.yellow(prompt)
|
ASCIIColors.yellow(prompt)
|
||||||
ASCIIColors.red(" *-*-*-*-*-*-*-*")
|
ASCIIColors.red(" *-*-*-*-*-*-*-*")
|
||||||
|
|
||||||
def fast_gen(self, prompt, max_generation_size, placeholders={}, debug=False):
|
def fast_gen(self, prompt: str, max_generation_size: int, placeholders: dict = {}, sacrifice: list = ["previous_discussion"], debug: bool = False) -> str:
|
||||||
"""
|
"""
|
||||||
Fast way to generate code
|
Fast way to generate code
|
||||||
"""
|
|
||||||
pr = PromptReshaper(prompt)
|
This method takes in a prompt, maximum generation size, optional placeholders, sacrifice list, and debug flag.
|
||||||
prompt = pr.build(placeholders,
|
It reshapes the context before performing text generation by adjusting and cropping the number of tokens.
|
||||||
self.personality.model.tokenize,
|
|
||||||
self.personality.model.detokenize,
|
Parameters:
|
||||||
self.personality.model.config.ctx_size-max_generation_size,
|
- prompt (str): The input prompt for text generation.
|
||||||
["previous_discussion"]
|
- max_generation_size (int): The maximum number of tokens to generate.
|
||||||
)
|
- placeholders (dict, optional): A dictionary of placeholders to be replaced in the prompt. Defaults to an empty dictionary.
|
||||||
if self.personality.config.get("debug",False):
|
- sacrifice (list, optional): A list of placeholders to sacrifice if the window is bigger than the context size minus the number of tokens to generate. Defaults to ["previous_discussion"].
|
||||||
self.print_prompt("prompt",prompt)
|
- debug (bool, optional): Flag to enable/disable debug mode. Defaults to False.
|
||||||
return self.generate(prompt, max_generation_size).strip().replace("</s>","").replace("<s>","")
|
|
||||||
|
Returns:
|
||||||
|
- str: The generated text after removing special tokens ("<s>" and "</s>") and stripping any leading/trailing whitespace.
|
||||||
|
"""
|
||||||
|
pr = PromptReshaper(prompt)
|
||||||
|
prompt = pr.build(placeholders,
|
||||||
|
self.personality.model.tokenize,
|
||||||
|
self.personality.model.detokenize,
|
||||||
|
self.personality.model.config.ctx_size - max_generation_size,
|
||||||
|
sacrifice
|
||||||
|
)
|
||||||
|
if debug:
|
||||||
|
self.print_prompt("prompt", prompt)
|
||||||
|
|
||||||
|
return self.generate(prompt, max_generation_size).strip().replace("</s>", "").replace("<s>", "")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Helper method to convert outputs path to url
|
#Helper method to convert outputs path to url
|
||||||
|
@ -6,6 +6,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import gc
|
import gc
|
||||||
|
from typing import List
|
||||||
|
|
||||||
class NumpyEncoderDecoder(json.JSONEncoder):
|
class NumpyEncoderDecoder(json.JSONEncoder):
|
||||||
def default(self, obj):
|
def default(self, obj):
|
||||||
@ -677,119 +678,6 @@ class TextVectorizer:
|
|||||||
self.save_to_json()
|
self.save_to_json()
|
||||||
|
|
||||||
|
|
||||||
class GenericDataLoader:
|
|
||||||
@staticmethod
|
|
||||||
def read_file(file_path:Path):
|
|
||||||
if file_path.suffix ==".pdf":
|
|
||||||
return GenericDataLoader.read_pdf_file(file_path)
|
|
||||||
elif file_path.suffix == ".docx":
|
|
||||||
return GenericDataLoader.read_docx_file(file_path)
|
|
||||||
elif file_path.suffix == ".json":
|
|
||||||
return GenericDataLoader.read_json_file(file_path)
|
|
||||||
elif file_path.suffix == ".html":
|
|
||||||
return GenericDataLoader.read_html_file(file_path)
|
|
||||||
elif file_path.suffix == ".pptx":
|
|
||||||
return GenericDataLoader.read_pptx_file(file_path)
|
|
||||||
if file_path.suffix in [".txt", ".rtf", ".md", ".log", ".cpp", ".java", ".js", ".py", ".rb", ".sh", ".sql", ".css", ".html", ".php", ".json", ".xml", ".yaml", ".yml", ".h", ".hh", ".hpp", ".inc", ".snippet", ".snippets", ".asm", ".s", ".se", ".sym", ".ini", ".inf", ".map", ".bat"]:
|
|
||||||
return GenericDataLoader.read_text_file(file_path)
|
|
||||||
else:
|
|
||||||
raise ValueError("Unknown file type")
|
|
||||||
def get_supported_file_types():
|
|
||||||
return ["pdf", "txt", "docx", "json", "html", "pptx",".txt", ".md", ".log", ".cpp", ".java", ".js", ".py", ".rb", ".sh", ".sql", ".css", ".html", ".php", ".json", ".xml", ".yaml", ".yml", ".h", ".hh", ".hpp", ".inc", ".snippet", ".snippets", ".asm", ".s", ".se", ".sym", ".ini", ".inf", ".map", ".bat", ".rtf"]
|
|
||||||
@staticmethod
|
|
||||||
def read_pdf_file(file_path):
|
|
||||||
try:
|
|
||||||
import PyPDF2
|
|
||||||
from PIL import Image, UnidentifiedImageError
|
|
||||||
import pytesseract
|
|
||||||
import pdfminer
|
|
||||||
from pdfminer.high_level import extract_text
|
|
||||||
except ImportError:
|
|
||||||
PackageManager.install_package("PyPDF2")
|
|
||||||
PackageManager.install_package("pytesseract")
|
|
||||||
PackageManager.install_package("pillow")
|
|
||||||
PackageManager.install_package("pdfminer")
|
|
||||||
PackageManager.install_package("pdfminer.six")
|
|
||||||
|
|
||||||
import PyPDF2
|
|
||||||
from PIL import Image, UnidentifiedImageError
|
|
||||||
import pytesseract
|
|
||||||
from pdfminer.high_level import extract_text
|
|
||||||
|
|
||||||
# Extract text from the PDF
|
|
||||||
text = extract_text(file_path)
|
|
||||||
|
|
||||||
# Convert to Markdown (You may need to implement custom logic based on your specific use case)
|
|
||||||
markdown_text = text.replace('\n', ' \n') # Adding double spaces at the end of each line for Markdown line breaks
|
|
||||||
|
|
||||||
return markdown_text
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_docx_file(file_path):
|
|
||||||
try:
|
|
||||||
from docx import Document
|
|
||||||
except ImportError:
|
|
||||||
PackageManager.install_package("python-docx")
|
|
||||||
from docx import Document
|
|
||||||
doc = Document(file_path)
|
|
||||||
text = ""
|
|
||||||
for paragraph in doc.paragraphs:
|
|
||||||
text += paragraph.text + "\n"
|
|
||||||
return text
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_json_file(file_path):
|
|
||||||
import json
|
|
||||||
with open(file_path, 'r') as file:
|
|
||||||
data = json.load(file)
|
|
||||||
return data
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_csv_file(file_path):
|
|
||||||
try:
|
|
||||||
import csv
|
|
||||||
except ImportError:
|
|
||||||
PackageManager.install_package("csv")
|
|
||||||
import csv
|
|
||||||
with open(file_path, 'r') as file:
|
|
||||||
csv_reader = csv.reader(file)
|
|
||||||
lines = [row for row in csv_reader]
|
|
||||||
return lines
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_html_file(file_path):
|
|
||||||
try:
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
except ImportError:
|
|
||||||
PackageManager.install_package("beautifulsoup4")
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
with open(file_path, 'r') as file:
|
|
||||||
soup = BeautifulSoup(file, 'html.parser')
|
|
||||||
text = soup.get_text()
|
|
||||||
return text
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_pptx_file(file_path):
|
|
||||||
try:
|
|
||||||
from pptx import Presentation
|
|
||||||
except ImportError:
|
|
||||||
PackageManager.install_package("python-pptx")
|
|
||||||
from pptx import Presentation
|
|
||||||
prs = Presentation(file_path)
|
|
||||||
text = ""
|
|
||||||
for slide in prs.slides:
|
|
||||||
for shape in slide.shapes:
|
|
||||||
if shape.has_text_frame:
|
|
||||||
for paragraph in shape.text_frame.paragraphs:
|
|
||||||
for run in paragraph.runs:
|
|
||||||
text += run.text
|
|
||||||
return text
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_text_file(file_path):
|
|
||||||
with open(file_path, 'r', encoding='utf-8') as file:
|
|
||||||
content = file.read()
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user