lollms-webui/endpoints/lollms_client_js.js

383 lines
13 KiB
JavaScript
Raw Normal View History

2024-07-29 22:02:24 +00:00
// This requires axios
// In the html don't forget to import axios.min.js cdn
// <script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
// JavaScript equivalent of the ELF_GENERATION_FORMAT enum
const ELF_GENERATION_FORMAT = {
LOLLMS: 0,
OPENAI: 1,
OLLAMA: 2,
LITELLM: 2
};
// JavaScript equivalent of the ELF_COMPLETION_FORMAT enum
const ELF_COMPLETION_FORMAT = {
Instruct: 0,
Chat: 1
};
// Ensuring the objects are immutable
Object.freeze(ELF_GENERATION_FORMAT);
Object.freeze(ELF_COMPLETION_FORMAT);
class LollmsClient {
constructor(
host_address = null,
model_name = null,
ctx_size = 4096,
personality = -1,
n_predict = 4096,
temperature = 0.1,
top_k = 50,
top_p = 0.95,
repeat_penalty = 0.8,
repeat_last_n = 40,
seed = null,
n_threads = 8,
service_key = "",
default_generation_mode = ELF_GENERATION_FORMAT.LOLLMS
) {
// Handle the import or initialization of tiktoken equivalent in JavaScript
// this.tokenizer = new TikTokenJS('gpt-3.5-turbo-1106'); // This is hypothetical
this.host_address = host_address;
this.model_name = model_name;
this.ctx_size = ctx_size;
this.n_predict = n_predict;
this.personality = personality;
this.temperature = temperature;
this.top_k = top_k;
this.top_p = top_p;
this.repeat_penalty = repeat_penalty;
this.repeat_last_n = repeat_last_n;
this.seed = seed;
this.n_threads = n_threads;
this.service_key = service_key;
this.default_generation_mode = default_generation_mode;
this.template = {
start_header_id_template: "!@>",
end_header_id_template: ": ",
separator_template: "\n",
start_user_header_id_template: "!@>",
end_user_header_id_template: ": ",
end_user_message_id_template: "",
start_ai_header_id_template: "!@>",
end_ai_header_id_template: ": ",
end_ai_message_id_template: "",
system_message_template: "system"
}
fetch('/template')
.then((response) => {
if (!response.ok) {
throw new Error('Network response was not ok ' + response.statusText);
}
return response.json();
})
.then((data) => {
console.log("data: ", data);
this.template = data;
})
.catch((error) => {
console.error('Error fetching template:', error);
});
}
system_message(){
return this.template.start_header_id_template+this.template.system_message_template+this.template.end_header_id_template
}
ai_message(ai_name="assistant"){
return this.template.start_ai_header_id_template+ai_name+this.template.end_ai_header_id_template
}
user_message(user_name="user"){
return this.template.start_user_header_id_template+user_name+this.template.end_user_header_id_template
}
updateServerAddress(newAddress) {
this.serverAddress = newAddress;
}
async tokenize(prompt) {
/**
* Tokenizes the given prompt using the model's tokenizer.
*
* @param {string} prompt - The input prompt to be tokenized.
* @returns {Array} A list of tokens representing the tokenized prompt.
*/
const output = await axios.post("/lollms_tokenize", {"prompt": prompt});
console.log(output.data.named_tokens)
return output.data.named_tokens
}
async detokenize(tokensList) {
/**
* Detokenizes the given list of tokens using the model's tokenizer.
*
* @param {Array} tokensList - A list of tokens to be detokenized.
* @returns {string} The detokenized text as a string.
*/
const output = await axios.post("/lollms_detokenize", {"tokens": tokensList});
console.log(output.data.text)
return output.data.text
}
generate(prompt, {
n_predict = null,
stream = false,
temperature = 0.1,
top_k = 50,
top_p = 0.95,
repeat_penalty = 0.8,
repeat_last_n = 40,
seed = null,
n_threads = 8,
service_key = "",
streamingCallback = null
} = {}) {
switch (this.default_generation_mode) {
case ELF_GENERATION_FORMAT.LOLLMS:
return this.lollms_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, service_key, streamingCallback);
case ELF_GENERATION_FORMAT.OPENAI:
return this.openai_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
case ELF_GENERATION_FORMAT.OLLAMA:
return this.ollama_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
case ELF_GENERATION_FORMAT.LITELLM:
return this.litellm_generate(prompt, this.host_address, this.model_name, -1, n_predict, stream, temperature, top_k, top_p, repeat_penalty, repeat_last_n, seed, n_threads, ELF_COMPLETION_FORMAT.INSTRUCT, service_key, streamingCallback);
default:
throw new Error('Invalid generation mode');
}
}
async generateText(prompt, options = {}) {
// Destructure with default values from `this` if not provided in `options`
const {
host_address = this.host_address,
model_name = this.model_name,
personality = this.personality,
n_predict = this.n_predict,
stream = false,
temperature = this.temperature,
top_k = this.top_k,
top_p = this.top_p,
repeat_penalty = this.repeat_penalty,
repeat_last_n = this.repeat_last_n,
seed = this.seed,
n_threads = this.n_threads,
service_key = this.service_key,
streamingCallback = null
} = options;
try {
const result = await this.lollms_generate(
prompt,
host_address,
model_name,
personality,
n_predict,
stream,
temperature,
top_k,
top_p,
repeat_penalty,
repeat_last_n,
seed,
n_threads,
service_key,
streamingCallback
);
return result;
} catch (error) {
// Handle any errors that occur during generation
console.error('An error occurred during text generation:', error);
throw error; // Re-throw the error if you want to allow the caller to handle it as well
}
}
async lollms_generate(prompt, host_address = this.host_address, model_name = this.model_name, personality = this.personality, n_predict = this.n_predict, stream = false, temperature = this.temperature, top_k = this.top_k, top_p = this.top_p, repeat_penalty = this.repeat_penalty, repeat_last_n = this.repeat_last_n, seed = this.seed, n_threads = this.n_threads, service_key = this.service_key, streamingCallback = null) {
let url;
if(host_address!=null){
url = `${host_address}/lollms_generate`;
}
else{
url = `/lollms_generate`;
}
const headers = service_key !== "" ? {
'Content-Type': 'application/json; charset=utf-8',
'Authorization': `Bearer ${service_key}`,
} : {
'Content-Type': 'application/json',
};
const data = JSON.stringify({
prompt: prompt,
model_name: model_name,
personality: personality,
n_predict: n_predict?n_predict:self.n_predict,
stream: stream,
temperature: temperature,
top_k: top_k,
top_p: top_p,
repeat_penalty: repeat_penalty,
repeat_last_n: repeat_last_n,
seed: seed,
n_threads: n_threads
});
try {
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: data
});
// Check if the response is okay
if (!response.ok) {
throw new Error('Network response was not ok ' + response.statusText);
}
// Read the response as plaintext
const responseBody = await response.text();
console.log(responseBody)
return responseBody ;
} catch (error) {
console.error(error);
return null;
}
}
async openai_generate(prompt, host_address = this.host_address, model_name = this.model_name, personality = this.personality, n_predict = this.n_predict, stream = false, temperature = this.temperature, top_k = this.top_k, top_p = this.top_p, repeat_penalty = this.repeat_penalty, repeat_last_n = this.repeat_last_n, seed = this.seed, n_threads = this.n_threads, ELF_COMPLETION_FORMAT = "vllm instruct", service_key = this.service_key, streamingCallback = null) {
const url = `${host_address}/generate_completion`;
const headers = service_key !== "" ? {
'Content-Type': 'application/json; charset=utf-8',
'Authorization': `Bearer ${service_key}`,
} : {
'Content-Type': 'application/json',
};
const data = JSON.stringify({
prompt: prompt,
model_name: model_name,
personality: personality,
n_predict: n_predict,
stream: stream,
temperature: temperature,
top_k: top_k,
top_p: top_p,
repeat_penalty: repeat_penalty,
repeat_last_n: repeat_last_n,
seed: seed,
n_threads: n_threads,
completion_format: ELF_COMPLETION_FORMAT
});
try {
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: data
});
if (stream && streamingCallback) {
// Note: Streaming with Fetch API in the browser might not work as expected because Fetch API does not support HTTP/2 server push.
// You would need a different approach for real-time streaming.
streamingCallback(await response.json(), 'MSG_TYPE_CHUNK');
} else {
return await response.json();
}
} catch (error) {
console.error("Error generating completion:", error);
return null;
}
}
async listMountedPersonalities(host_address = this.host_address) {
const url = `${host_address}/list_mounted_personalities`;
try {
const response = await fetch(url);
return await response.json();
} catch (error) {
console.error(error);
return null;
}
}
async listModels(host_address = this.host_address) {
const url = `${host_address}/list_models`;
try {
const response = await fetch(url);
return await response.json();
} catch (error) {
console.error(error);
return null;
}
}
}
class TasksLibrary {
constructor(lollms) {
this.lollms = lollms;
}
async translateTextChunk(textChunk, outputLanguage = "french", host_address = null, model_name = null, temperature = 0.1, maxGenerationSize = 3000) {
const translationPrompt = [
`!@>system:`,
`Translate the following text to ${outputLanguage}.`,
`Be faithful to the original text and do not add or remove any information.`,
`Respond only with the translated text.`,
`Do not add comments or explanations.`,
`!@>text to translate:`,
`${textChunk}`,
`!@>translation:`,
].join("\n");
const translated = await this.lollms.generateText(
translationPrompt,
host_address,
model_name,
-1, // personality
maxGenerationSize, // n_predict
false, // stream
temperature, // temperature
undefined, // top_k, using undefined to fallback on LollmsClient's default
undefined, // top_p, using undefined to fallback on LollmsClient's default
undefined, // repeat_penalty, using undefined to fallback on LollmsClient's default
undefined, // repeat_last_n, using undefined to fallback on LollmsClient's default
undefined, // seed, using undefined to fallback on LollmsClient's default
undefined, // n_threads, using undefined to fallback on LollmsClient's default
undefined // service_key, using undefined to fallback on LollmsClient's default
);
return translated;
}
async summarizeText(textChunk, summaryLength = "short", host_address = null, model_name = null, temperature = 0.1, maxGenerationSize = null) {
const summaryPrompt = [
`system:`,
`Summarize the following text in a ${summaryLength} manner.`,
`Keep the summary concise and to the point.`,
`Include all key points and do not add new information.`,
`Respond only with the summary.`,
`text to summarize:`,
`${textChunk}`,
`summary:`,
].join("\n");
const summary = await this.lollms.generateText(
summaryPrompt,
host_address,
model_name,
-1, // personality
maxGenerationSize, // n_predict
false, // stream
temperature, // temperature
undefined, // top_k, using undefined to fallback on LollmsClient's default
undefined, // top_p, using undefined to fallback on LollmsClient's default
undefined, // repeat_penalty, using undefined to fallback on LollmsClient's default
undefined, // repeat_last_n, using undefined to fallback on LollmsClient's default
undefined, // seed, using undefined to fallback on LollmsClient's default
undefined, // n_threads, using undefined to fallback on LollmsClient's default
undefined // service_key, using undefined to fallback on LollmsClient's default
);
return summary;
}
}