mirror of
https://github.com/ParisNeo/lollms-webui.git
synced 2024-12-24 06:36:37 +00:00
Merge branch 'main' of https://github.com/ParisNeo/lollms-webui
This commit is contained in:
commit
e01907721b
11
app.py
11
app.py
@ -89,7 +89,7 @@ from lollms.app import LollmsApplication
|
|||||||
from lollms.paths import LollmsPaths
|
from lollms.paths import LollmsPaths
|
||||||
from lollms.main_config import LOLLMSConfig
|
from lollms.main_config import LOLLMSConfig
|
||||||
from lollms.utilities import trace_exception
|
from lollms.utilities import trace_exception
|
||||||
from lollms.security import sanitize_path, MultipartBoundaryCheck
|
from lollms.security import sanitize_path
|
||||||
from lollms_webui import LOLLMSWebUI
|
from lollms_webui import LOLLMSWebUI
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from ascii_colors import ASCIIColors
|
from ascii_colors import ASCIIColors
|
||||||
@ -124,9 +124,12 @@ def get_ip_addresses():
|
|||||||
app = FastAPI(title="LoLLMS", description="This is the LoLLMS-Webui API documentation")
|
app = FastAPI(title="LoLLMS", description="This is the LoLLMS-Webui API documentation")
|
||||||
|
|
||||||
|
|
||||||
# Add the MultipartBoundaryCheck middleware
|
try:
|
||||||
app.add_middleware(MultipartBoundaryCheck)
|
from lollms.security import MultipartBoundaryCheck
|
||||||
|
# Add the MultipartBoundaryCheck middleware
|
||||||
|
app.add_middleware(MultipartBoundaryCheck)
|
||||||
|
except:
|
||||||
|
print("Couldn't activate MultipartBoundaryCheck")
|
||||||
|
|
||||||
#app.mount("/socket.io", StaticFiles(directory="path/to/socketio.js"))
|
#app.mount("/socket.io", StaticFiles(directory="path/to/socketio.js"))
|
||||||
|
|
||||||
|
@ -818,6 +818,108 @@ async listModels(host_address = this.host_address) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TextChunker {
|
||||||
|
constructor(chunkSize = 512, overlap = 0, tokenizer = null, model = null) {
|
||||||
|
this.chunkSize = chunkSize;
|
||||||
|
this.overlap = overlap;
|
||||||
|
this.tokenizer = tokenizer || new TikTokenTokenizer();
|
||||||
|
this.model = model;
|
||||||
|
}
|
||||||
|
|
||||||
|
getTextChunks(text, doc, cleanChunk = true, minNbTokensInChunk = 10) {
|
||||||
|
const paragraphs = text.split('\n\n');
|
||||||
|
const chunks = [];
|
||||||
|
let currentChunk = [];
|
||||||
|
let currentTokens = 0;
|
||||||
|
let chunkId = 0;
|
||||||
|
|
||||||
|
for (const paragraph of paragraphs) {
|
||||||
|
const cleanedParagraph = cleanChunk ? paragraph.trim() : paragraph;
|
||||||
|
const paragraphTokens = this.tokenizer.tokenize(cleanedParagraph).length;
|
||||||
|
|
||||||
|
if (currentTokens + paragraphTokens > this.chunkSize) {
|
||||||
|
if (currentTokens > minNbTokensInChunk) {
|
||||||
|
let chunkText = currentChunk.join('\n\n');
|
||||||
|
if (cleanChunk) {
|
||||||
|
chunkText = TextChunker.removeUnnecessaryReturns(chunkText);
|
||||||
|
}
|
||||||
|
const chunk = new Chunk(doc, '', chunkText, currentTokens, chunkId);
|
||||||
|
chunkId++;
|
||||||
|
chunks.push(chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.overlap > 0) {
|
||||||
|
currentChunk = [...currentChunk.slice(-this.overlap), cleanedParagraph];
|
||||||
|
} else {
|
||||||
|
currentChunk = [cleanedParagraph];
|
||||||
|
}
|
||||||
|
currentTokens = currentChunk.reduce((sum, p) => sum + this.tokenizer.tokenize(p).length, 0);
|
||||||
|
} else {
|
||||||
|
currentChunk.push(cleanedParagraph);
|
||||||
|
currentTokens += paragraphTokens;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentChunk.length > 0 && currentTokens > minNbTokensInChunk) {
|
||||||
|
let chunkText = currentChunk.join('\n\n');
|
||||||
|
if (cleanChunk) {
|
||||||
|
chunkText = TextChunker.removeUnnecessaryReturns(chunkText);
|
||||||
|
}
|
||||||
|
const chunk = new Chunk(doc, '', chunkText, currentTokens, chunkId);
|
||||||
|
chunks.push(chunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static removeUnnecessaryReturns(paragraph) {
|
||||||
|
const lines = paragraph.split('\n');
|
||||||
|
return lines.filter(line => line.trim()).join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
static chunkText(text, tokenizer, chunkSize = 512, overlap = 0, cleanChunk = true, minNbTokensInChunk = 10) {
|
||||||
|
const paragraphs = text.split('\n\n');
|
||||||
|
const chunks = [];
|
||||||
|
let currentChunk = [];
|
||||||
|
let currentTokens = 0;
|
||||||
|
|
||||||
|
for (const paragraph of paragraphs) {
|
||||||
|
const cleanedParagraph = cleanChunk ? paragraph.trim() : paragraph;
|
||||||
|
const paragraphTokens = tokenizer.tokenize(cleanedParagraph).length;
|
||||||
|
|
||||||
|
if (currentTokens + paragraphTokens > chunkSize) {
|
||||||
|
if (currentTokens > minNbTokensInChunk) {
|
||||||
|
let chunkText = currentChunk.join('\n\n');
|
||||||
|
if (cleanChunk) {
|
||||||
|
chunkText = TextChunker.removeUnnecessaryReturns(chunkText);
|
||||||
|
}
|
||||||
|
chunks.push(chunkText);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overlap > 0) {
|
||||||
|
currentChunk = [...currentChunk.slice(-overlap), cleanedParagraph];
|
||||||
|
} else {
|
||||||
|
currentChunk = [cleanedParagraph];
|
||||||
|
}
|
||||||
|
currentTokens = currentChunk.reduce((sum, p) => sum + tokenizer.tokenize(p).length, 0);
|
||||||
|
} else {
|
||||||
|
currentChunk.push(cleanedParagraph);
|
||||||
|
currentTokens += paragraphTokens;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentChunk.length > 0 && currentTokens > minNbTokensInChunk) {
|
||||||
|
let chunkText = currentChunk.join('\n\n');
|
||||||
|
if (cleanChunk) {
|
||||||
|
chunkText = TextChunker.removeUnnecessaryReturns(chunkText);
|
||||||
|
}
|
||||||
|
chunks.push(chunkText);
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class TasksLibrary {
|
class TasksLibrary {
|
||||||
constructor(lollms) {
|
constructor(lollms) {
|
||||||
this.lollms = lollms;
|
this.lollms = lollms;
|
||||||
@ -1055,7 +1157,11 @@ async sequentialChunksSummary({
|
|||||||
return summaries.join("\n");
|
return summaries.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Placeholder methods for stepStart, stepEnd, fastGen
|
// Placeholder methods for step stepStart, stepEnd, fastGen
|
||||||
|
step(message) {
|
||||||
|
console.log(message);
|
||||||
|
}
|
||||||
|
|
||||||
stepStart(message) {
|
stepStart(message) {
|
||||||
console.log(message);
|
console.log(message);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user