This commit is contained in:
Saifeddine ALOUI 2024-05-05 17:28:45 +02:00
parent ab4ce7a857
commit 3c35edb0d5
13 changed files with 287 additions and 230 deletions

View File

@ -83,7 +83,7 @@ copy_to_clipboard_add_all_details: false
# Voice service
xtts_enable: false
xtts_base_url: http://localhost:8020
xtts_use_deepspeed: true
xtts_use_deepspeed: false
xtts_use_streaming_mode: true
auto_read: false
xtts_current_voice: null
@ -186,4 +186,5 @@ activate_audio_infos: true
# whisper configuration
whisper_activate: false
whisper_model: base

View File

@ -13,10 +13,11 @@ from pydantic import BaseModel, Field
from starlette.responses import StreamingResponse
from lollms.types import MSG_TYPE
from lollms.main_config import BaseConfig
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, show_yes_no_dialog
from lollms.utilities import detect_antiprompt, remove_text_from_string, trace_exception, show_yes_no_dialog, add_period
from lollms.security import sanitize_path, forbid_remote_access, check_access
from ascii_colors import ASCIIColors
from lollms.databases.discussions_database import DiscussionsDB
from lollms.client_session import Client
from pathlib import Path
from safe_store.text_vectorizer import TextVectorizer, VectorizationMethod, VisualizationMethod
import tqdm
@ -54,7 +55,8 @@ from utilities.execution_engines.svg_execution_engine import execute_svg
router = APIRouter()
lollmsElfServer:LOLLMSWebUI = LOLLMSWebUI.get_instance()
class Identification(BaseModel):
client_id:str
class CodeRequest(BaseModel):
client_id: str = Field(...)
@ -403,8 +405,10 @@ async def open_discussion_folder(request: FolderRequest):
lollmsElfServer.error(ex)
return {"status": False, "error": "An error occurred while processing the request"}
@router.get("/start_recording")
def start_recording():
@router.post("/start_recording")
def start_recording(data:Identification):
client = check_access(lollmsElfServer, data.client_id)
if lollmsElfServer.config.headless_server_mode:
return {"status":False,"error":"Start recording is blocked when in headless mode for obvious security reasons!"}
@ -417,14 +421,16 @@ def start_recording():
lollmsElfServer.rec_output_folder = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_rec"
lollmsElfServer.rec_output_folder.mkdir(exist_ok=True, parents=True)
lollmsElfServer.summoned = False
lollmsElfServer.audio_cap = AudioRecorder(lollmsElfServer.sio,lollmsElfServer.rec_output_folder/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
lollmsElfServer.audio_cap = AudioRecorder(client.discussion.discussion_folder/"audio"/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
lollmsElfServer.audio_cap.start_recording()
except:
lollmsElfServer.InfoMessage("Couldn't load media library.\nYou will not be able to perform any of the media linked operations. please verify the logs and install any required installations")
@router.get("/stop_recording")
def stop_recording():
@router.post("/stop_recording")
def stop_recording(data:Identification):
client = check_access(lollmsElfServer, data.client_id)
if lollmsElfServer.config.headless_server_mode:
return {"status":False,"error":"Stop recording is blocked when in headless mode for obvious security reasons!"}
@ -433,5 +439,21 @@ def stop_recording():
lollmsElfServer.info("Stopping audio capture")
text = lollmsElfServer.audio_cap.stop_recording()
return text
ai_text = lollmsElfServer.receive_and_generate(text, client, n_predict=lollmsElfServer.config, callback= lollmsElfServer.tasks_library.sink)
if lollmsElfServer.tts and lollmsElfServer.tts.ready:
personality_audio:Path = lollmsElfServer.personality.personality_package_path/"audio"
voice=lollmsElfServer.config.xtts_current_voice
if personality_audio.exists() and len([v for v in personality_audio.iterdir()])>0:
voices_folder = personality_audio
elif voice!="main_voice":
voices_folder = lollmsElfServer.lollms_paths.custom_voices_path
else:
voices_folder = Path(__file__).parent.parent.parent/"services/xtts/voices"
language = lollmsElfServer.config.xtts_current_language# convert_language_name()
lollmsElfServer.tts.set_speaker_folder(voices_folder)
preprocessed_text= add_period(ai_text)
voice_file = [v for v in voices_folder.iterdir() if v.stem==voice and v.suffix==".wav"]
lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, language=language)
return preprocessed_text

View File

@ -56,18 +56,25 @@ def add_events(sio:socketio):
@sio.on('start_audio_stream')
def start_audio_stream(sid):
if lollmsElfServer.config.headless_server_mode:
return {"status":False,"error":"Start recording is blocked when in headless mode for obvious security reasons!"}
if lollmsElfServer.config.host!="localhost" and lollmsElfServer.config.host!="127.0.0.1":
return {"status":False,"error":"Start recording is blocked when the server is exposed outside for very obvious reasons!"}
lollmsElfServer.info("Starting audio capture")
try:
from lollms.media import AudioRecorder
lollmsElfServer.rec_output_folder = lollmsElfServer.lollms_paths.personal_outputs_path/"audio_rec"
lollmsElfServer.rec_output_folder.mkdir(exist_ok=True, parents=True)
lollmsElfServer.summoned = False
lollmsElfServer.audio_cap = AudioRecorder(sio,lollmsElfServer.rec_output_folder/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer)
lollmsElfServer.audio_cap = AudioRecorder(client.discussion.discussion_folder/"audio"/"rt.wav", callback=lollmsElfServer.audio_callback,lollmsCom=lollmsElfServer, transcribe=True)
lollmsElfServer.audio_cap.start_recording()
except:
lollmsElfServer.InfoMessage("Couldn't load media library.\nYou will not be able to perform any of the media linked operations. please verify the logs and install any required installations")
@sio.on('stop_audio_stream')
def stop_audio_stream(sid):
lollmsElfServer.info("Stopping audio capture")

@ -1 +1 @@
Subproject commit c370b6ecceaa4437de1379ad1e08287b71bb51ca
Subproject commit fd20ec2859333fb93fd576fecbd712015e1c25e1

View File

@ -21,7 +21,7 @@ from lollms.com import NotificationType, NotificationDisplayType, LoLLMsCom
from lollms.app import LollmsApplication
from lollms.utilities import File64BitsManager, PromptReshaper, PackageManager, find_first_available_file_index, run_async, is_asyncio_loop_running, yes_or_no_input, process_ai_output
from lollms.generation import RECEPTION_MANAGER, ROLE_CHANGE_DECISION, ROLE_CHANGE_OURTPUT
from lollms.client_session import Client
import git
import asyncio
import os
@ -1306,3 +1306,22 @@ class LOLLMSWebUI(LOLLMSElfServer):
print()
self.busy=False
return ""
def receive_and_generate(self, text, client:Client, callback):
prompt = text
try:
nb_tokens = len(self.model.tokenize(prompt))
except:
nb_tokens = None
ump = self.config.discussion_prompt_separator +self.config.user_name.strip() if self.config.use_user_name_in_discussions else self.personality.user_message_prefix
message = client.discussion.add_message(
message_type = MSG_TYPE.MSG_TYPE_FULL.value,
sender_type = SENDER_TYPES.SENDER_TYPES_USER.value,
sender = ump.replace(self.config.discussion_prompt_separator,"").replace(":",""),
content = prompt,
metadata = None,
parent_message_id=self.message_id,
nb_tokens=nb_tokens
)
discussion_messages, current_message, tokens, context_details, internet_search_infos = self.prepare_query(client.client_id, -1, False, n_tokens=self.config.min_n_predict, force_using_internet=False)
return self.generate(discussion_messages, current_message, context_details, self.config.ctx_size-len(tokens)-1, client.client_id, callback)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

4
web/dist/index.html vendored
View File

@ -6,8 +6,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LoLLMS WebUI - Welcome</title>
<script type="module" crossorigin src="/assets/index-f1cd900e.js"></script>
<link rel="stylesheet" href="/assets/index-f6ab05b8.css">
<script type="module" crossorigin src="/assets/index-73d394ff.js"></script>
<link rel="stylesheet" href="/assets/index-58b402c9.css">
</head>
<body>
<div id="app"></div>

View File

@ -153,7 +153,7 @@
<i data-feather="volume-2"></i>
</div>
</div>
<div class="flex flex-row items-center">
<div v-if="this.$store.state.config.xtts_enable && !this.$store.state.config.xtts_use_streaming_mode" class="flex flex-row items-center">
<div v-if="!isSynthesizingVoice" class="text-lg hover:text-red-600 duration-75 active:scale-90 p-2 cursor-pointer"
title="generate_audio"
@click.stop="read()"
@ -214,7 +214,7 @@
</div>
<DynamicUIRenderer v-if="message.ui !== null && message.ui !== undefined && message.ui !== ''" class="w-full h-full" :code="message.ui"></DynamicUIRenderer>
<audio controls autoplay v-if="audio_url!=null" :key="audio_url">
<audio controls v-if="audio_url!=null" :key="audio_url">
<source :src="audio_url" type="audio/wav" ref="audio_player" >
Your browser does not support the audio element.
</audio>

View File

@ -44,7 +44,7 @@
ComfyUI
</RouterLink>
<RouterLink
v-if="$store.state.config.xtts_enable"
v-if="$store.state.config.xtts_enable && $store.state.config.xtts_use_streaming_mode"
:to="{ name: 'interactive' }"
class="inline-block border-l border-t border-r rounded-t py-2 px-4 text-blue-700 font-semibold"
:class="{

View File

@ -23,14 +23,20 @@
<script>
import storeLogo from '@/assets/logo.png'
export default {
name: 'WelcomeComponent',
computed:{
storeLogo(){
if (this.$store.state.config){
return storeLogo
}
return this.$store.state.config.app_custom_logo!=''?'/user_infos/'+this.$store.state.config.app_custom_logo:storeLogo
},
},
data(){
storeLogo:storeLogo
},
setup() {
return {}
}
}

View File

@ -135,7 +135,7 @@
<span>Cursor position {{ cursorPosition }}</span>
</div>
<audio controls autoplay v-if="audio_url!=null" :key="audio_url">
<audio controls v-if="audio_url!=null" :key="audio_url">
<source :src="audio_url" type="audio/wav" ref="audio_player">
Your browser does not support the audio element.
</audio>
@ -913,7 +913,7 @@ export default {
startRecording(){
this.pending = true;
if(!this.is_recording){
axios.get('/start_recording').then(response => {
axios.post('/start_recording', {client_id:this.$store.state.client_id}).then(response => {
this.is_recording = true;
this.pending = false;
console.log(response.data)

View File

@ -1917,6 +1917,8 @@
<div class="flex flex-row">
<button class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" @click="reinstallAudioService">install xtts service</button>
<button class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" @click="startAudioService">start xtts service</button>
<a class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" :href="this.$store.state.config.xtts_base_url+'/docs'" target="_blank">show xtts service entries</a>
<a class="hover:text-primary bg-green-200 rounded-lg p-4 m-4 w-full text-center items-center" href="https://github.com/ParisNeo/xtts-api-server/blob/main/LICENSE" target="_blank">licence</a>
</div>
</td>
</tr>
@ -1939,11 +1941,11 @@
</tr>
<tr>
<td style="min-width: 200px;">
<label for="current_language" class="text-sm font-bold" style="margin-right: 1rem;">Current language:</label>
<label for="xtts_current_language" class="text-sm font-bold" style="margin-right: 1rem;">Current language:</label>
</td>
<td>
<div class="flex flex-row">
<select v-model="current_language" @change="settingsChanged=true" :disabled="!xtts_enable">
<select v-model="xtts_current_language" @change="settingsChanged=true" :disabled="!xtts_enable">
<option v-for="(value, key) in voice_languages" :key="key" :value="value">
{{ key }}
</option>
@ -5495,9 +5497,9 @@ export default {
this.$store.state.config.xtts_enable = value
},
},
current_language:{
xtts_current_language:{
get() {
return this.$store.state.config.current_language;
return this.$store.state.config.xtts_current_language;
},
set(value) {
// You should not set the value directly here; use the updateSetting method instead