From e9c542a124a4babc762ae0b3cc97af61c783fd1d Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Thu, 5 Dec 2024 16:39:26 +0100 Subject: [PATCH] Update lollms_client_js.js --- endpoints/libraries/lollms_client_js.js | 164 +++++++++++++++++++++--- 1 file changed, 148 insertions(+), 16 deletions(-) diff --git a/endpoints/libraries/lollms_client_js.js b/endpoints/libraries/lollms_client_js.js index 4de3d77f..e58feffb 100644 --- a/endpoints/libraries/lollms_client_js.js +++ b/endpoints/libraries/lollms_client_js.js @@ -9,7 +9,8 @@ const ELF_GENERATION_FORMAT = { LOLLMS: 0, OPENAI: 1, OLLAMA: 2, - LITELLM: 2 + LITELLM: 3, + VLLM: 4 }; // JavaScript equivalent of the ELF_COMPLETION_FORMAT enum @@ -186,6 +187,8 @@ cancel_generation() { return this.ollama_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback); case ELF_GENERATION_FORMAT.LITELLM: return this.litellm_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback); + case ELF_GENERATION_FORMAT.VLLM: + return this.vllm_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback); default: throw new Error('Invalid generation mode'); } @@ -407,6 +410,7 @@ async openai_generate(prompt, host_address = this.host_address, model_name = thi } } + async openai_generate_with_images(prompt, images, options = {}) { const { host_address = this.host_address, @@ -528,6 +532,133 @@ async openai_generate_with_images(prompt, images, options = {}) { } } + +async vllm_generate({ + prompt, + host_address = null, + model_name = null, + personality = null, + n_predict = null, + stream = false, + temperature = null, + top_k = null, + top_p = null, + repeat_penalty = null, + repeat_last_n = null, + seed = null, + n_threads = null, + completion_format = ELF_COMPLETION_FORMAT.Instruct, // Instruct or Chat + service_key = "", + streaming_callback = null +}) { + // Set default values to instance variables if optional arguments are null + host_address = host_address || this.host_address; + model_name = model_name || this.model_name; + n_predict = n_predict || this.n_predict || this.minNPredict; + personality = personality !== null ? personality : this.personality; + temperature = temperature !== null ? temperature : this.temperature; + top_k = top_k !== null ? top_k : this.top_k; + top_p = top_p !== null ? top_p : this.top_p; + repeat_penalty = repeat_penalty !== null ? repeat_penalty : this.repeat_penalty; + repeat_last_n = repeat_last_n !== null ? repeat_last_n : this.repeat_last_n; + seed = seed || this.seed; + n_threads = n_threads || this.n_threads; + + const headers = { + "Content-Type": "application/json", + ...(service_key && { Authorization: `Bearer ${service_key}` }) + }; + + let data; + let completionFormatPath; + + if (completion_format === ELF_COMPLETION_FORMAT.Instruct) { + data = { + model: model_name, + prompt: prompt, + stream: stream, + temperature: parseFloat(temperature), + max_tokens: n_predict + }; + completionFormatPath = "/v1/completions"; + } else if (completion_format === ELF_COMPLETION_FORMAT.Chat) { + data = { + model: model_name, + messages: [ + { + role: "user", + content: prompt + } + ], + stream: stream, + temperature: parseFloat(temperature), + max_tokens: n_predict + }; + completionFormatPath = "/v1/chat/completions"; + } + + if (host_address.endsWith("/")) { + host_address = host_address.slice(0, -1); + } + + const url = `${host_address}${completionFormatPath}`; + + try { + const response = await axios.post(url, data, { + headers: headers, + responseType: stream ? "stream" : "json", + httpsAgent: this.verifySslCertificate + ? undefined + : new (require("https").Agent)({ rejectUnauthorized: false }) + }); + + if (stream) { + let text = ""; + response.data.on("data", (chunk) => { + const decoded = chunk.toString("utf-8"); + if (decoded.startsWith("data: ")) { + try { + const jsonData = JSON.parse(decoded.slice(5).trim()); + let chunkContent = ""; + if (completion_format === ELF_COMPLETION_FORMAT.Chat) { + chunkContent = jsonData.choices[0]?.delta?.content || ""; + } else { + chunkContent = jsonData.choices[0]?.text || ""; + } + text += chunkContent; + if (streaming_callback) { + if (!streaming_callback(chunkContent, "MSG_TYPE_CHUNK")) { + response.data.destroy(); + } + } + } catch (error) { + response.data.destroy(); + } + } + }); + + return new Promise((resolve, reject) => { + response.data.on("end", () => resolve(text)); + response.data.on("error", (err) => reject(err)); + }); + } else { + return response.data; + } + } catch (error) { + if (error.response) { + const errorMessage = + error.response.data?.error?.message || + error.response.data?.message || + "Unknown error occurred"; + console.error(errorMessage); + throw new Error(errorMessage); + } else { + console.error(error.message); + throw error; + } + } +} + async encode_image(image_path, max_image_width = -1) { // In a browser environment, we'll use the File API and canvas // For Node.js, you'd need to use libraries like 'sharp' or 'jimp' @@ -998,15 +1129,15 @@ async summarizeText( let tk = await this.tokenize(text); let prevLen = tk.length; let documentChunks = null; - console.log(`Text size: ${prevLen}`) + console.log(`Text size: ${prevLen}/${maxSummarySize}`) while (tk.length > maxSummarySize && (documentChunks === null || documentChunks.length > 1)) { this.stepStart(`Compressing ${docName}...`); let chunkSize = Math.floor(this.lollms.ctxSize * 0.6); documentChunks = await TextChunker.chunkText(text, this.lollms, chunkSize, 0, true); console.log(`documentChunks: ${documentChunks}`) - text = await this.summarizeChunks({ - chunks: documentChunks, + text = await this.summarizeChunks( + documentChunks, summaryInstruction, docName, answerStart, @@ -1014,7 +1145,7 @@ async summarizeText( callback, chunkSummaryPostProcessing, summaryMode - }); + ); tk = await this.tokenize(text); let dtkLn = prevLen - tk.length; prevLen = tk.length; @@ -1055,16 +1186,16 @@ async smartDataExtraction( while (tk.length > maxSummarySize) { let chunkSize = Math.floor(this.lollms.ctxSize * 0.6); let documentChunks = await TextChunker.chunkText(text, this.lollms, chunkSize, 0, true); - text = await this.summarizeChunks({ - chunks: documentChunks, - summaryInstruction: dataExtractionInstruction, + text = await this.summarizeChunks( + documentChunks, + dataExtractionInstruction, docName, answerStart, maxGenerationSize, callback, chunkSummaryPostProcessing, summaryMode - }); + ); tk = await this.tokenize(text); let dtkLn = prevLen - tk.length; prevLen = tk.length; @@ -1073,21 +1204,22 @@ async smartDataExtraction( } this.stepStart("Rewriting ..."); - text = await this.summarizeChunks({ - chunks: [text], - summaryInstruction: finalTaskInstruction, + text = await this.summarizeChunks( + [text], + finalTaskInstruction, docName, answerStart, maxGenerationSize, callback, - chunkSummaryPostProcessing - }); + chunkSummaryPostProcessing, + summaryMode + ); this.stepEnd("Rewriting ..."); return text; } -async summarizeChunks({ +async summarizeChunks( chunks, summaryInstruction = "summarize the current chunk.", docName = "chunk", @@ -1096,7 +1228,7 @@ async summarizeChunks({ callback = null, chunkSummaryPostProcessing = null, summaryMode = "SEQUENTIAL" -}) { +) { if (summaryMode === "SEQUENTIAL") { let summary = ""; for (let i = 0; i < chunks.length; i++) {