mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(ui): add page to talk with voice, transcription, and tts (#2520)
* feat(ui): add page to talk with voice, transcription, and tts Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Enhance graphics and status reporting Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Better UX by blocking unvalid actions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
aae7ad9d73
commit
e96d2d7667
@ -247,6 +247,26 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
// Render index
|
||||
return c.Render("views/chat", summary)
|
||||
})
|
||||
|
||||
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
return c.Redirect("/")
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Talk",
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/talk", summary)
|
||||
})
|
||||
|
||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
191
core/http/static/talk.js
Normal file
191
core/http/static/talk.js
Normal file
@ -0,0 +1,191 @@
|
||||
|
||||
const recordButton = document.getElementById('recordButton');
|
||||
const audioPlayback = document.getElementById('audioPlayback');
|
||||
const resetButton = document.getElementById('resetButton');
|
||||
|
||||
let mediaRecorder;
|
||||
let audioChunks = [];
|
||||
let isRecording = false;
|
||||
let conversationHistory = [];
|
||||
let resetTimer;
|
||||
|
||||
function getApiKey() {
|
||||
return document.getElementById('apiKey').value;
|
||||
}
|
||||
|
||||
function getModel() {
|
||||
return document.getElementById('modelSelect').value;
|
||||
}
|
||||
|
||||
function getWhisperModel() {
|
||||
return document.getElementById('whisperModelSelect').value;
|
||||
}
|
||||
|
||||
function getTTSModel() {
|
||||
return document.getElementById('ttsModelSelect').value;
|
||||
}
|
||||
|
||||
function resetConversation() {
|
||||
conversationHistory = [];
|
||||
console.log("Conversation has been reset.");
|
||||
clearTimeout(resetTimer);
|
||||
}
|
||||
|
||||
function setResetTimer() {
|
||||
clearTimeout(resetTimer);
|
||||
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
|
||||
}
|
||||
|
||||
recordButton.addEventListener('click', toggleRecording);
|
||||
resetButton.addEventListener('click', resetConversation);
|
||||
|
||||
function toggleRecording() {
|
||||
if (!isRecording) {
|
||||
startRecording();
|
||||
} else {
|
||||
stopRecording();
|
||||
}
|
||||
}
|
||||
|
||||
async function startRecording() {
|
||||
document.getElementById("recording").style.display = "block";
|
||||
document.getElementById("resetButton").style.display = "none";
|
||||
if (!navigator.mediaDevices) {
|
||||
alert('MediaDevices API not supported!');
|
||||
return;
|
||||
}
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
audioChunks = [];
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
audioChunks.push(event.data);
|
||||
};
|
||||
mediaRecorder.start();
|
||||
recordButton.textContent = 'Stop Recording';
|
||||
// add class bg-red-500 to recordButton
|
||||
recordButton.classList.add("bg-gray-500");
|
||||
|
||||
isRecording = true;
|
||||
}
|
||||
|
||||
function stopRecording() {
|
||||
mediaRecorder.stop();
|
||||
mediaRecorder.onstop = async () => {
|
||||
document.getElementById("recording").style.display = "none";
|
||||
document.getElementById("recordButton").style.display = "none";
|
||||
|
||||
document.getElementById("loader").style.display = "block";
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
||||
document.getElementById("statustext").textContent = "Processing audio...";
|
||||
const transcript = await sendAudioToWhisper(audioBlob);
|
||||
console.log("Transcript:", transcript);
|
||||
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
|
||||
const responseText = await sendTextToChatGPT(transcript);
|
||||
|
||||
console.log("Response:", responseText);
|
||||
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";
|
||||
|
||||
const ttsAudio = await getTextToSpeechAudio(responseText);
|
||||
playAudioResponse(ttsAudio);
|
||||
|
||||
recordButton.textContent = 'Record';
|
||||
// remove class bg-red-500 from recordButton
|
||||
recordButton.classList.remove("bg-gray-500");
|
||||
isRecording = false;
|
||||
document.getElementById("loader").style.display = "none";
|
||||
document.getElementById("recordButton").style.display = "block";
|
||||
document.getElementById("resetButton").style.display = "block";
|
||||
document.getElementById("statustext").textContent = "Press the record button to start recording.";
|
||||
};
|
||||
}
|
||||
|
||||
function submitKey(event) {
|
||||
event.preventDefault();
|
||||
localStorage.setItem("key", document.getElementById("apiKey").value);
|
||||
document.getElementById("apiKey").blur();
|
||||
}
|
||||
|
||||
document.getElementById("key").addEventListener("submit", submitKey);
|
||||
|
||||
|
||||
storeKey = localStorage.getItem("key");
|
||||
if (storeKey) {
|
||||
document.getElementById("apiKey").value = storeKey;
|
||||
} else {
|
||||
document.getElementById("apiKey").value = null;
|
||||
}
|
||||
|
||||
|
||||
async function sendAudioToWhisper(audioBlob) {
|
||||
const formData = new FormData();
|
||||
formData.append('file', audioBlob);
|
||||
formData.append('model', getWhisperModel());
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/audio/transcriptions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`
|
||||
},
|
||||
body: formData
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
console.log("Whisper result:", result)
|
||||
return result.text;
|
||||
}
|
||||
|
||||
async function sendTextToChatGPT(text) {
|
||||
conversationHistory.push({ role: "user", content: text });
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: getModel(),
|
||||
messages: conversationHistory
|
||||
})
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
const responseText = result.choices[0].message.content;
|
||||
conversationHistory.push({ role: "assistant", content: responseText });
|
||||
|
||||
setResetTimer();
|
||||
|
||||
return responseText;
|
||||
}
|
||||
|
||||
async function getTextToSpeechAudio(text) {
|
||||
API_KEY = localStorage.getItem("key");
|
||||
|
||||
const response = await fetch('/v1/audio/speech', {
|
||||
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${API_KEY}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
// "backend": "string",
|
||||
input: text,
|
||||
model: getTTSModel(),
|
||||
// "voice": "string"
|
||||
})
|
||||
});
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
return audioBlob; // Return the blob directly
|
||||
}
|
||||
|
||||
function playAudioResponse(audioBlob) {
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
audioPlayback.src = audioUrl;
|
||||
audioPlayback.hidden = false;
|
||||
audioPlayback.play();
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
<a href="/chat/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||
<a href="/text2image/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/talk/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
@ -32,6 +33,7 @@
|
||||
<a href="/chat/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-comments pr-2"></i> Chat</a>
|
||||
<a href="/text2image/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-image pr-2"></i> Generate images</a>
|
||||
<a href="/tts/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-music pr-2"></i> TTS </a>
|
||||
<a href="/talk/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fa-solid fa-phone pr-2"></i> Talk </a>
|
||||
<a href="/swagger/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
|
108
core/http/views/talk.html
Normal file
108
core/http/views/talk.html
Normal file
@ -0,0 +1,108 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
<script defer src="/static/talk.js"></script>
|
||||
<style>
|
||||
body {
|
||||
overflow: hidden;
|
||||
}
|
||||
</style>
|
||||
<body class="bg-gray-900 text-gray-200" x-data="{ key: $store.chat.key }">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar"}}
|
||||
<div class="chat-container mt-2 mr-2 ml-2 mb-2 bg-gray-800 shadow-lg rounded-lg " >
|
||||
<!-- Chat Header -->
|
||||
<div class="border-b border-gray-700 p-4" x-data="{ component: 'menu' }">
|
||||
|
||||
<div class="flex items-center justify-center">
|
||||
|
||||
<div x-show="component === 'menu'" id="menu">
|
||||
|
||||
<button @click="component = 'key'" title="Update API key"
|
||||
class="m-2 float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong"
|
||||
>Set API Key🔑</button>
|
||||
|
||||
</div>
|
||||
|
||||
<form x-show="component === 'key'" id="key">
|
||||
<input
|
||||
type="password"
|
||||
id="apiKey"
|
||||
name="apiKey"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
placeholder="API Key"
|
||||
x-model.lazy="key"
|
||||
/>
|
||||
<button @click="component = 'menu'" type="submit" title="Save API key">
|
||||
<i class="fa-solid fa-arrow-right"></i>
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center justify-center">
|
||||
<div class="w-full p-4 max-w-md border-t border-gray-700 ">
|
||||
<div class="bg-gray-700 shadow-md rounded px-8 pt-6 pb-8 mb-4">
|
||||
<div id="recording" class="" style="display: none;">
|
||||
<i class="fa-solid fa-microphone animate-pulse text-red-700"></i>
|
||||
<span class="text-white-700 text-sm font-bold mb-2">Recording... press "Stop recording" to stop</span>
|
||||
</div>
|
||||
<div id="loader" class="my-2 loader" style="display: none;"></div>
|
||||
<div id="statustext" class="my-2 p-2 block text-white-700 text-sm font-bold mb-2" ></div>
|
||||
<div class="mb-4" >
|
||||
<label for="modelSelect" class="block text-white-700 text-sm font-bold mb-2">LLM Model:</label>
|
||||
<select id="modelSelect"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
|
||||
{{ range .ModelsConfig }}
|
||||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="mb-4" >
|
||||
<label for="whisperModelSelect" class="block text-white-700 text-sm font-bold mb-2">Whisper Model:</label>
|
||||
<select id="whisperModelSelect"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
|
||||
>
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
|
||||
{{ range .ModelsConfig }}
|
||||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="mb-4" >
|
||||
<label for="ttsModelSelect" class="block text-white-700 text-sm font-bold mb-2">TTS Model:</label>
|
||||
<select id="ttsModelSelect"
|
||||
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none"
|
||||
>
|
||||
<option value="" disabled class="text-gray-400" >Select a model</option>
|
||||
{{ range .ModelsConfig }}
|
||||
<option value="{{.Name}}" class="bg-gray-700 text-white">{{.Name}}</option>
|
||||
{{ end }}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
|
||||
<button id="recordButton"
|
||||
class="bg-red-500 hover:bg-red-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline"
|
||||
><i class="fa-solid fa-microphone pr-2"></i>Talk</button>
|
||||
<a id="resetButton"
|
||||
class="inline-block align-baseline font-bold text-sm text-blue-500 hover:text-blue-800"
|
||||
href="#"
|
||||
>Reset conversation</a>
|
||||
<audio id="audioPlayback" controls hidden></audio>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user