mirror of
https://github.com/ParisNeo/lollms-webui.git
synced 2024-12-19 12:27:52 +00:00
383 lines
13 KiB
JavaScript
383 lines
13 KiB
JavaScript
|
// This requires axios
|
||
|
// In the html don't forget to import axios.min.js cdn
|
||
|
// <script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
|
||
|
|
||
|
|
||
|
|
||
|
// JavaScript equivalent of the ELF_GENERATION_FORMAT enum
|
||
|
const ELF_GENERATION_FORMAT = {
|
||
|
LOLLMS: 0,
|
||
|
OPENAI: 1,
|
||
|
OLLAMA: 2,
|
||
|
LITELLM: 2
|
||
|
};
|
||
|
|
||
|
// JavaScript equivalent of the ELF_COMPLETION_FORMAT enum
|
||
|
const ELF_COMPLETION_FORMAT = {
|
||
|
Instruct: 0,
|
||
|
Chat: 1
|
||
|
};
|
||
|
|
||
|
// Ensuring the objects are immutable
|
||
|
Object.freeze(ELF_GENERATION_FORMAT);
|
||
|
Object.freeze(ELF_COMPLETION_FORMAT);
|
||
|
|
||
|
|
||
|
class LollmsClient {
|
||
|
constructor(
|
||
|
host_address = null,
|
||
|
model_name = null,
|
||
|
ctx_size = 4096,
|
||
|
personality = -1,
|
||
|
n_predict = 4096,
|
||
|
temperature = 0.1,
|
||
|
top_k = 50,
|
||
|
top_p = 0.95,
|
||
|
repeat_penalty = 0.8,
|
||
|
repeat_last_n = 40,
|
||
|
seed = null,
|
||
|
n_threads = 8,
|
||
|
service_key = "",
|
||
|
default_generation_mode = ELF_GENERATION_FORMAT.LOLLMS
|
||
|
) {
|
||
|
// Handle the import or initialization of tiktoken equivalent in JavaScript
|
||
|
// this.tokenizer = new TikTokenJS('gpt-3.5-turbo-1106'); // This is hypothetical
|
||
|
|
||
|
this.host_address = host_address;
|
||
|
this.model_name = model_name;
|
||
|
this.ctx_size = ctx_size;
|
||
|
this.n_predict = n_predict;
|
||
|
this.personality = personality;
|
||
|
this.temperature = temperature;
|
||
|
this.top_k = top_k;
|
||
|
this.top_p = top_p;
|
||
|
this.repeat_penalty = repeat_penalty;
|
||
|
this.repeat_last_n = repeat_last_n;
|
||
|
this.seed = seed;
|
||
|
this.n_threads = n_threads;
|
||
|
this.service_key = service_key;
|
||
|
this.default_generation_mode = default_generation_mode;
|
||
|
this.template = {
|
||
|
start_header_id_template: "!@>",
|
||
|
end_header_id_template: ": ",
|
||
|
separator_template: "\n",
|
||
|
start_user_header_id_template: "!@>",
|
||
|
end_user_header_id_template: ": ",
|
||
|
end_user_message_id_template: "",
|
||
|
start_ai_header_id_template: "!@>",
|
||
|
end_ai_header_id_template: ": ",
|
||
|
end_ai_message_id_template: "",
|
||
|
system_message_template: "system"
|
||
|
}
|
||
|
fetch('/template')
|
||
|
.then((response) => {
|
||
|
if (!response.ok) {
|
||
|
throw new Error('Network response was not ok ' + response.statusText);
|
||
|
}
|
||
|
return response.json();
|
||
|
})
|
||
|
.then((data) => {
|
||
|
console.log("data: ", data);
|
||
|
this.template = data;
|
||
|
})
|
||
|
.catch((error) => {
|
||
|
console.error('Error fetching template:', error);
|
||
|
});
|
||
|
|
||
|
}
|
||
|
system_message(){
|
||
|
return this.template.start_header_id_template+this.template.system_message_template+this.template.end_header_id_template
|
||
|
}
|
||
|
ai_message(ai_name="assistant"){
|
||
|
return this.template.start_ai_header_id_template+ai_name+this.template.end_ai_header_id_template
|
||
|
}
|
||
|
user_message(user_name="user"){
|
||
|
return this.template.start_user_header_id_template+user_name+this.template.end_user_header_id_template
|
||
|
}
|
||
|
updateServerAddress(newAddress) {
|
||
|
this.serverAddress = newAddress;
|
||
|
}
|
||
|
async tokenize(prompt) {
|
||
|
/**
|
||
|
* Tokenizes the given prompt using the model's tokenizer.
|
||
|
*
|
||
|
* @param {string} prompt - The input prompt to be tokenized.
|
||
|
* @returns {Array} A list of tokens representing the tokenized prompt.
|
||
|
*/
|
||
|
const output = await axios.post("/lollms_tokenize", {"prompt": prompt});
|
||
|
console.log(output.data.named_tokens)
|
||
|
return output.data.named_tokens
|
||
|
}
|
||
|
async detokenize(tokensList) {
|
||
|
/**
|
||
|
* Detokenizes the given list of tokens using the model's tokenizer.
|
||
|
*
|
||
|
* @param {Array} tokensList - A list of tokens to be detokenized.
|
||
|
* @returns {string} The detokenized text as a string.
|
||
|
*/
|
||
|
const output = await axios.post("/lollms_detokenize", {"tokens": tokensList});
|
||
|
console.log(output.data.text)
|
||
|
return output.data.text
|
||
|
}
|
||
|
generate(prompt, {
|
||
|
n_predict = null,
|
||
|
stream = false,
|
||
|
temperature = 0.1,
|
||
|
top_k = 50,
|
||
|
top_p = 0.95,
|
||
|
repeat_penalty = 0.8,
|
||
|
repeat_last_n = 40,
|
||
|
seed = null,
|
||
|
n_threads = 8,
|
||
|
service_key = "",
|
||
|
streamingCallback = null
|
||
|
} = {}) {
|
||
|
switch (this.default_generation_mode) {
|
||
|
case ELF_GENERATION_FORMAT.LOLLMS:
|
||
|
return this.lollms_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streamingCallback);
|
||
|
case ELF_GENERATION_FORMAT.OPENAI:
|
||
|
return this.openai_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
|
||
|
case ELF_GENERATION_FORMAT.OLLAMA:
|
||
|
return this.ollama_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
|
||
|
case ELF_GENERATION_FORMAT.LITELLM:
|
||
|
return this.litellm_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
|
||
|
default:
|
||
|
throw new Error('Invalid generation mode');
|
||
|
}
|
||
|
}
|
||
|
async generateText(prompt, options = {}) {
|
||
|
// Destructure with default values from `this` if not provided in `options`
|
||
|
const {
|
||
|
host_address = this.host_address,
|
||
|
model_name = this.model_name,
|
||
|
personality = this.personality,
|
||
|
n_predict = this.n_predict,
|
||
|
stream = false,
|
||
|
temperature = this.temperature,
|
||
|
top_k = this.top_k,
|
||
|
top_p = this.top_p,
|
||
|
repeat_penalty = this.repeat_penalty,
|
||
|
repeat_last_n = this.repeat_last_n,
|
||
|
seed = this.seed,
|
||
|
n_threads = this.n_threads,
|
||
|
service_key = this.service_key,
|
||
|
streamingCallback = null
|
||
|
} = options;
|
||
|
|
||
|
try {
|
||
|
const result = await this.lollms_generate(
|
||
|
prompt,
|
||
|
host_address,
|
||
|
model_name,
|
||
|
personality,
|
||
|
n_predict,
|
||
|
stream,
|
||
|
temperature,
|
||
|
top_k,
|
||
|
top_p,
|
||
|
repeat_penalty,
|
||
|
repeat_last_n,
|
||
|
seed,
|
||
|
n_threads,
|
||
|
service_key,
|
||
|
streamingCallback
|
||
|
);
|
||
|
return result;
|
||
|
} catch (error) {
|
||
|
// Handle any errors that occur during generation
|
||
|
console.error('An error occurred during text generation:', error);
|
||
|
throw error; // Re-throw the error if you want to allow the caller to handle it as well
|
||
|
}
|
||
|
}
|
||
|
async lollms_generate(prompt, host_address = this.host_address, model_name = this.model_name, personality = this.personality, n_predict = this.n_predict, stream = false, temperature = this.temperature, top_k = this.top_k, top_p = this.top_p, repeat_penalty = this.repeat_penalty, repeat_last_n = this.repeat_last_n, seed = this.seed, n_threads = this.n_threads, service_key = this.service_key, streamingCallback = null) {
|
||
|
let url;
|
||
|
if(host_address!=null){
|
||
|
url = `${host_address}/lollms_generate`;
|
||
|
}
|
||
|
else{
|
||
|
url = `/lollms_generate`;
|
||
|
}
|
||
|
const headers = service_key !== "" ? {
|
||
|
'Content-Type': 'application/json; charset=utf-8',
|
||
|
'Authorization': `Bearer ${service_key}`,
|
||
|
} : {
|
||
|
'Content-Type': 'application/json',
|
||
|
};
|
||
|
|
||
|
const data = JSON.stringify({
|
||
|
prompt: prompt,
|
||
|
model_name: model_name,
|
||
|
personality: personality,
|
||
|
n_predict: n_predict?n_predict:self.n_predict,
|
||
|
stream: stream,
|
||
|
temperature: temperature,
|
||
|
top_k: top_k,
|
||
|
top_p: top_p,
|
||
|
repeat_penalty: repeat_penalty,
|
||
|
repeat_last_n: repeat_last_n,
|
||
|
seed: seed,
|
||
|
n_threads: n_threads
|
||
|
});
|
||
|
|
||
|
try {
|
||
|
const response = await fetch(url, {
|
||
|
method: 'POST',
|
||
|
headers: headers,
|
||
|
body: data
|
||
|
});
|
||
|
|
||
|
// Check if the response is okay
|
||
|
if (!response.ok) {
|
||
|
throw new Error('Network response was not ok ' + response.statusText);
|
||
|
}
|
||
|
|
||
|
// Read the response as plaintext
|
||
|
const responseBody = await response.text();
|
||
|
console.log(responseBody)
|
||
|
return responseBody ;
|
||
|
} catch (error) {
|
||
|
console.error(error);
|
||
|
return null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
async openai_generate(prompt, host_address = this.host_address, model_name = this.model_name, personality = this.personality, n_predict = this.n_predict, stream = false, temperature = this.temperature, top_k = this.top_k, top_p = this.top_p, repeat_penalty = this.repeat_penalty, repeat_last_n = this.repeat_last_n, seed = this.seed, n_threads = this.n_threads, ELF_COMPLETION_FORMAT = "vllm instruct", service_key = this.service_key, streamingCallback = null) {
|
||
|
const url = `${host_address}/generate_completion`;
|
||
|
const headers = service_key !== "" ? {
|
||
|
'Content-Type': 'application/json; charset=utf-8',
|
||
|
'Authorization': `Bearer ${service_key}`,
|
||
|
} : {
|
||
|
'Content-Type': 'application/json',
|
||
|
};
|
||
|
|
||
|
const data = JSON.stringify({
|
||
|
prompt: prompt,
|
||
|
model_name: model_name,
|
||
|
personality: personality,
|
||
|
n_predict: n_predict,
|
||
|
stream: stream,
|
||
|
temperature: temperature,
|
||
|
top_k: top_k,
|
||
|
top_p: top_p,
|
||
|
repeat_penalty: repeat_penalty,
|
||
|
repeat_last_n: repeat_last_n,
|
||
|
seed: seed,
|
||
|
n_threads: n_threads,
|
||
|
completion_format: ELF_COMPLETION_FORMAT
|
||
|
});
|
||
|
|
||
|
try {
|
||
|
const response = await fetch(url, {
|
||
|
method: 'POST',
|
||
|
headers: headers,
|
||
|
body: data
|
||
|
});
|
||
|
if (stream && streamingCallback) {
|
||
|
// Note: Streaming with Fetch API in the browser might not work as expected because Fetch API does not support HTTP/2 server push.
|
||
|
// You would need a different approach for real-time streaming.
|
||
|
streamingCallback(await response.json(), 'MSG_TYPE_CHUNK');
|
||
|
} else {
|
||
|
return await response.json();
|
||
|
}
|
||
|
} catch (error) {
|
||
|
console.error("Error generating completion:", error);
|
||
|
return null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async listMountedPersonalities(host_address = this.host_address) {
|
||
|
const url = `${host_address}/list_mounted_personalities`;
|
||
|
|
||
|
try {
|
||
|
const response = await fetch(url);
|
||
|
return await response.json();
|
||
|
} catch (error) {
|
||
|
console.error(error);
|
||
|
return null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async listModels(host_address = this.host_address) {
|
||
|
const url = `${host_address}/list_models`;
|
||
|
|
||
|
try {
|
||
|
const response = await fetch(url);
|
||
|
return await response.json();
|
||
|
} catch (error) {
|
||
|
console.error(error);
|
||
|
return null;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
class TasksLibrary {
|
||
|
constructor(lollms) {
|
||
|
this.lollms = lollms;
|
||
|
}
|
||
|
|
||
|
async translateTextChunk(textChunk, outputLanguage = "french", host_address = null, model_name = null, temperature = 0.1, maxGenerationSize = 3000) {
|
||
|
const translationPrompt = [
|
||
|
`!@>system:`,
|
||
|
`Translate the following text to ${outputLanguage}.`,
|
||
|
`Be faithful to the original text and do not add or remove any information.`,
|
||
|
`Respond only with the translated text.`,
|
||
|
`Do not add comments or explanations.`,
|
||
|
`!@>text to translate:`,
|
||
|
`${textChunk}`,
|
||
|
`!@>translation:`,
|
||
|
].join("\n");
|
||
|
|
||
|
const translated = await this.lollms.generateText(
|
||
|
translationPrompt,
|
||
|
host_address,
|
||
|
model_name,
|
||
|
-1, // personality
|
||
|
maxGenerationSize, // n_predict
|
||
|
false, // stream
|
||
|
temperature, // temperature
|
||
|
undefined, // top_k, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // top_p, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // repeat_penalty, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // repeat_last_n, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // seed, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // n_threads, using undefined to fallback on LollmsClient's default
|
||
|
undefined // service_key, using undefined to fallback on LollmsClient's default
|
||
|
);
|
||
|
|
||
|
return translated;
|
||
|
}
|
||
|
async summarizeText(textChunk, summaryLength = "short", host_address = null, model_name = null, temperature = 0.1, maxGenerationSize = null) {
|
||
|
const summaryPrompt = [
|
||
|
`system:`,
|
||
|
`Summarize the following text in a ${summaryLength} manner.`,
|
||
|
`Keep the summary concise and to the point.`,
|
||
|
`Include all key points and do not add new information.`,
|
||
|
`Respond only with the summary.`,
|
||
|
`text to summarize:`,
|
||
|
`${textChunk}`,
|
||
|
`summary:`,
|
||
|
].join("\n");
|
||
|
|
||
|
const summary = await this.lollms.generateText(
|
||
|
summaryPrompt,
|
||
|
host_address,
|
||
|
model_name,
|
||
|
-1, // personality
|
||
|
maxGenerationSize, // n_predict
|
||
|
false, // stream
|
||
|
temperature, // temperature
|
||
|
undefined, // top_k, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // top_p, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // repeat_penalty, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // repeat_last_n, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // seed, using undefined to fallback on LollmsClient's default
|
||
|
undefined, // n_threads, using undefined to fallback on LollmsClient's default
|
||
|
undefined // service_key, using undefined to fallback on LollmsClient's default
|
||
|
);
|
||
|
|
||
|
return summary;
|
||
|
}
|
||
|
}
|