mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-04 09:42:53 +00:00
feat(ui): add page to talk with voice, transcription, and tts (#2520)
* feat(ui): add page to talk with voice, transcription, and tts Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Enhance graphics and status reporting Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Better UX by blocking unvalid actions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
aae7ad9d73
commit
e96d2d7667
@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
// Render index
|
// Render index
|
||||||
return c.Render("views/chat", summary)
|
return c.Render("views/chat", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
||||||
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
|
if len(backendConfigs) == 0 {
|
||||||
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
|
return c.Redirect("/")
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := fiber.Map{
|
||||||
|
"Title": "LocalAI - Talk",
|
||||||
|
"ModelsConfig": backendConfigs,
|
||||||
|
"Model": backendConfigs[0].Name,
|
||||||
|
"Version": internal.PrintableVersion(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render index
|
||||||
|
return c.Render("views/talk", summary)
|
||||||
|
})
|
||||||
|
|
||||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
191
core/http/static/talk.js
Normal file
191
core/http/static/talk.js
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
|
||||||
|
const recordButton = document.getElementById('recordButton');
|
||||||
|
const audioPlayback = document.getElementById('audioPlayback');
|
||||||
|
const resetButton = document.getElementById('resetButton');
|
||||||
|
|
||||||
|
let mediaRecorder;
|
||||||
|
let audioChunks = [];
|
||||||
|
let isRecording = false;
|
||||||
|
let conversationHistory = [];
|
||||||
|
let resetTimer;
|
||||||
|
|
||||||
|
function getApiKey() {
|
||||||
|
return document.getElementById('apiKey').value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getModel() {
|
||||||
|
return document.getElementById('modelSelect').value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWhisperModel() {
|
||||||
|
return document.getElementById('whisperModelSelect').value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTTSModel() {
|
||||||
|
return document.getElementById('ttsModelSelect').value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resetConversation() {
|
||||||
|
conversationHistory = [];
|
||||||
|
console.log("Conversation has been reset.");
|
||||||
|
clearTimeout(resetTimer);
|
||||||
|
}
|
||||||
|
|
||||||
|
function setResetTimer() {
|
||||||
|
clearTimeout(resetTimer);
|
||||||
|
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
|
||||||
|
}
|
||||||
|
|
||||||
|
recordButton.addEventListener('click', toggleRecording);
|
||||||
|
resetButton.addEventListener('click', resetConversation);
|
||||||
|
|
||||||
|
function toggleRecording() {
|
||||||
|
if (!isRecording) {
|
||||||
|
startRecording();
|
||||||
|
} else {
|
||||||
|
stopRecording();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startRecording() {
|
||||||
|
document.getElementById("recording").style.display = "block";
|
||||||
|
document.getElementById("resetButton").style.display = "none";
|
||||||
|
if (!navigator.mediaDevices) {
|
||||||
|
alert('MediaDevices API not supported!');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
mediaRecorder = new MediaRecorder(stream);
|
||||||
|
audioChunks = [];
|
||||||
|
mediaRecorder.ondataavailable = (event) => {
|
||||||
|
audioChunks.push(event.data);
|
||||||
|
};
|
||||||
|
mediaRecorder.start();
|
||||||
|
recordButton.textContent = 'Stop Recording';
|
||||||
|
// add class bg-red-500 to recordButton
|
||||||
|
recordButton.classList.add("bg-gray-500");
|
||||||
|
|
||||||
|
isRecording = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopRecording() {
|
||||||
|
mediaRecorder.stop();
|
||||||
|
mediaRecorder.onstop = async () => {
|
||||||
|
document.getElementById("recording").style.display = "none";
|
||||||
|
document.getElementById("recordButton").style.display = "none";
|
||||||
|
|
||||||
|
document.getElementById("loader").style.display = "block";
|
||||||
|
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
||||||
|
document.getElementById("statustext").textContent = "Processing audio...";
|
||||||
|
const transcript = await sendAudioToWhisper(audioBlob);
|
||||||
|
console.log("Transcript:", transcript);
|
||||||
|
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
|
||||||
|
const responseText = await sendTextToChatGPT(transcript);
|
||||||
|
|
||||||
|
console.log("Response:", responseText);
|
||||||
|
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";
|
||||||
|
|
||||||
|
const ttsAudio = await getTextToSpeechAudio(responseText);
|
||||||
|
playAudioResponse(ttsAudio);
|
||||||
|
|
||||||
|
recordButton.textContent = 'Record';
|
||||||
|
// remove class bg-red-500 from recordButton
|
||||||
|
recordButton.classList.remove("bg-gray-500");
|
||||||
|
isRecording = false;
|
||||||
|
document.getElementById("loader").style.display = "none";
|
||||||
|
document.getElementById("recordButton").style.display = "block";
|
||||||
|
document.getElementById("resetButton").style.display = "block";
|
||||||
|
document.getElementById("statustext").textContent = "Press the record button to start recording.";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function submitKey(event) {
|
||||||
|
event.preventDefault();
|
||||||
|
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||||
|
document.getElementById("apiKey").blur();
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("key").addEventListener("submit", submitKey);
|
||||||
|
|
||||||
|
|
||||||
|
storeKey = localStorage.getItem("key");
|
||||||
|
if (storeKey) {
|
||||||
|
document.getElementById("apiKey").value = storeKey;
|
||||||
|
} else {
|
||||||
|
document.getElementById("apiKey").value = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function sendAudioToWhisper(audioBlob) {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('file', audioBlob);
|
||||||
|
formData.append('model', getWhisperModel());
|
||||||
|
API_KEY = localStorage.getItem("key");
|
||||||
|
|
||||||
|
const response = await fetch('/v1/audio/transcriptions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${API_KEY}`
|
||||||
|
},
|
||||||
|
body: formData
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
console.log("Whisper result:", result)
|
||||||
|
return result.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function sendTextToChatGPT(text) {
|
||||||
|
conversationHistory.push({ role: "user", content: text });
|
||||||
|
API_KEY = localStorage.getItem("key");
|
||||||
|
|
||||||
|
const response = await fetch('/v1/chat/completions', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${API_KEY}`,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: getModel(),
|
||||||
|
messages: conversationHistory
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
const responseText = result.choices[0].message.content;
|
||||||
|
conversationHistory.push({ role: "assistant", content: responseText });
|
||||||
|
|
||||||
|
setResetTimer();
|
||||||
|
|
||||||
|
return responseText;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getTextToSpeechAudio(text) {
|
||||||
|
API_KEY = localStorage.getItem("key");
|
||||||
|
|
||||||
|
const response = await fetch('/v1/audio/speech', {
|
||||||
|
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${API_KEY}`,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
// "backend": "string",
|
||||||
|
input: text,
|
||||||
|
model: getTTSModel(),
|
||||||
|
// "voice": "string"
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
const audioBlob = await response.blob();
|
||||||
|
return audioBlob; // Return the blob directly
|
||||||
|
}
|
||||||
|
|
||||||
|
function playAudioResponse(audioBlob) {
|
||||||
|
const audioUrl = URL.createObjectURL(audioBlob);
|
||||||
|
audioPlayback.src = audioUrl;
|
||||||
|
audioPlayback.hidden = false;
|
||||||
|
audioPlayback.play();
|
||||||
|
}
|
||||||
|
|
@ -20,6 +20,7 @@
|
|||||||
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||||
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||||
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||||
|
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -32,6 +33,7 @@
|
|||||||
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||||
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
|
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||||
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||||
|
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||||
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
|
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
108
core/http/views/talk.html
Normal file
108
core/http/views/talk.html
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
{{template "views/partials/head" .}}
|
||||||
|
<script defer src="/static/talk.js"></script>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||||
|
<div class="flex flex-col min-h-screen">
|
||||||
|
|
||||||
|
{{template "views/partials/navbar"}}
|
||||||
|
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
|
||||||
|
<!-- Chat Header -->
|
||||||
|
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||||
|
|
||||||
|
<div class="flex items-center justify-center">
|
||||||
|
|
||||||
|
<div x-show="component === 'menu'" id="menu">
|
||||||
|
|
||||||
|
<button @click="component = 'key'" title="Update API key"
|
||||||
|
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||||
|
>Set API Key🔑</button>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<form x-show="component === 'key'" id="key">
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
id="apiKey"
|
||||||
|
name="apiKey"
|
||||||
|
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||||
|
placeholder="API Key"
|
||||||
|
x-model.lazy="key"
|
||||||
|
/>
|
||||||
|
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||||
|
<i class="fa-solid fa-arrow-right"></i>
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex items-center justify-center">
|
||||||
|
<div class="w-full p-4 max-w-md border-t border-gray-700 ">
|
||||||
|
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
|
||||||
|
<div id="recording" class="" style="display: none;">
|
||||||
|
<i class="fa-solid fa-microphone animate-pulse text-red-700"></i>
|
||||||
|
<span class="text-white-700 text-sm font-bold mb-2">Recording... press "Stop recording" to stop</span>
|
||||||
|
</div>
|
||||||
|
<div id="loader" class="my-2 loader" style="display: none;"></div>
|
||||||
|
<div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
|
||||||
|
<div class="mb-4" >
|
||||||
|
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
|
||||||
|
<select id="modelSelect"
|
||||||
|
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||||
|
>
|
||||||
|
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||||
|
|
||||||
|
{{ range .ModelsConfig }}
|
||||||
|
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
|
{{ end }}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mb-4" >
|
||||||
|
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
|
||||||
|
<select id="whisperModelSelect"
|
||||||
|
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||||
|
|
||||||
|
>
|
||||||
|
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||||
|
|
||||||
|
{{ range .ModelsConfig }}
|
||||||
|
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
|
{{ end }}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="mb-4" >
|
||||||
|
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
|
||||||
|
<select id="ttsModelSelect"
|
||||||
|
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||||
|
>
|
||||||
|
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||||
|
{{ range .ModelsConfig }}
|
||||||
|
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||||
|
{{ end }}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<button id="recordButton"
|
||||||
|
class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
|
||||||
|
><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
|
||||||
|
<a id="resetButton"
|
||||||
|
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
|
||||||
|
href="#"
|
||||||
|
>Reset conversation</a>
|
||||||
|
<audio id="audioPlayback" controls hidden></audio>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
x
Reference in New Issue
Block a user