From 70320699b15e64c7f835e300db50d8c2c2c2c101 Mon Sep 17 00:00:00 2001
From: Saifeddine ALOUI <aloui.seifeddine@gmail.com>
Date: Sun, 5 May 2024 00:13:36 +0200
Subject: [PATCH] upgraded configurations

---
 configs/config.yaml                           |  6 +--
 .../personal/configs/lollms_elf_config.yaml   |  2 +-
 .../personal/configs/lollms_elf_config.yaml   |  2 +-
 .../configs/lollms_elf_local_config.yaml      |  2 +-
 lollms/configs/config.yaml                    |  8 ++--
 lollms/server/configs/config.yaml             |  6 +--
 lollms/server/endpoints/lollms_user.py        | 39 ++++++++++++++++++-
 lollms/server/endpoints/lollms_xtts.py        | 15 ++++---
 lollms/services/xtts/lollms_xtts.py           | 28 ++++++++++++-
 .../configs/lollms_discord_local_config.yaml  |  2 +-
 10 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/configs/config.yaml b/configs/config.yaml
index 2d1e231..e721782 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
@@ -145,11 +145,9 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)
 
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
diff --git a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
index bf57bc4..4e2af3f 100644
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
diff --git a/elf_test_cfg/personal/configs/lollms_elf_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
index bf57bc4..4e2af3f 100644
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
diff --git a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
index bf57bc4..4e2af3f 100644
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
diff --git a/lollms/configs/config.yaml b/lollms/configs/config.yaml
index d009936..e721782 100644
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
@@ -145,17 +145,15 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)
 
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path
 
-data_vectorization_save_db: false # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
diff --git a/lollms/server/configs/config.yaml b/lollms/server/configs/config.yaml
index 2d1e231..e721782 100644
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service
@@ -145,11 +145,9 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)
 
 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
diff --git a/lollms/server/endpoints/lollms_user.py b/lollms/server/endpoints/lollms_user.py
index 1d8f818..ad0c81a 100644
--- a/lollms/server/endpoints/lollms_user.py
+++ b/lollms/server/endpoints/lollms_user.py
@@ -55,11 +55,48 @@ def switch_personal_path(data:PersonalPathParameters):
 
         
 @router.post("/upload_avatar")
-@router.post("/upload_logo")
 async def upload_avatar(avatar: UploadFile = File(...)):
     """
     Uploads a user avatar file to a dedicated directory, preventing path traversal attacks.
 
+    Parameters:
+        - avatar: UploadFile object representing the user avatar file.
+
+    Returns:
+        - Dictionary with the status of the upload and the generated file name.
+
+    Raises:
+        - HTTPException with a 400 status code and an error message if the file is invalid or has an invalid type.
+    """
+    # Only allow certain file types
+    if avatar.filename.endswith((".jpg", ".png")):
+        # Create a random file name
+        random_filename = str(uuid.uuid4())
+        
+        # Use the file extension of the uploaded file
+        extension = os.path.splitext(avatar.filename)[1]
+        
+        # Create the new file path in a dedicated directory
+        file_location = os.path.join(lollmsElfServer.lollms_paths.personal_user_infos_path, f"{random_filename}{extension}")
+
+        try:
+            # Open the image to check if it's a valid image
+            img = Image.open(avatar.file)
+            
+            # Save the file
+            img.save(file_location)
+        except Exception as e:
+            raise HTTPException(status_code=400, detail="Invalid image file.")
+    else:
+        raise HTTPException(status_code=400, detail="Invalid file type.")
+        
+    return {"status": True,"fileName": f"{random_filename}{extension}"}
+
+@router.post("/upload_logo")
+async def upload_logo(avatar: UploadFile = File(...)):
+    """
+    Uploads a user avatar file to a dedicated directory, preventing path traversal attacks.
+
     Parameters:
         - avatar: UploadFile object representing the user avatar file.
 
diff --git a/lollms/server/endpoints/lollms_xtts.py b/lollms/server/endpoints/lollms_xtts.py
index 61d88fa..80fb86f 100644
--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@@ -58,7 +58,7 @@ async def set_voice(request: Request):
 
     try:
         data = (await request.json())
-        lollmsElfServer.config.current_voice=data["voice"]
+        lollmsElfServer.config.xtts_current_voice=data["voice"]
         if lollmsElfServer.config.auto_save:
             lollmsElfServer.config.save_config()
         return {"status":True}
@@ -106,7 +106,7 @@ async def text2Audio(request: LollmsText2AudioRequest):
         except Exception as ex:
             return {"url": None, "error":f"{ex}"}
             
-        voice=lollmsElfServer.config.current_voice if request.voice is None else request.voice
+        voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
         index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
         output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
         if voice is None:
@@ -133,9 +133,12 @@ async def text2Audio(request: LollmsText2AudioRequest):
             voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice]
             if len(voice_file)==0:
                 return {"status":False,"error":"Voice not found"}
-
-            lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
-            lollmsElfServer.info(f"Voice file ready at {url}")
+            if not lollmsElfServer.config.xtts_use_streaming_mode:
+                lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                lollmsElfServer.info(f"Voice file ready at {url}")
+            else:
+                lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                 
             return {"url": url}
         except Exception as ex:
             trace_exception(ex)
@@ -205,7 +208,7 @@ async def upload_voice_file(file: UploadFile = File(...)):
     safe_file_path = lollmsElfServer.lollms_paths.custom_voices_path/safe_filename
     with safe_file_path.open("wb") as f:
         f.write(contents)
-    lollmsElfServer.config.current_voice=safe_filename
+    lollmsElfServer.config.xtts_current_voice=safe_filename
     if lollmsElfServer.config.auto_save:
         lollmsElfServer.config.save_config()
 
diff --git a/lollms/services/xtts/lollms_xtts.py b/lollms/services/xtts/lollms_xtts.py
index 3875c51..6c816b1 100644
--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@@ -167,9 +167,9 @@ class LollmsXTTS:
         ASCIIColors.yellow("Loading XTTS ")
         options= ""
         if self.use_deep_speed:
-            options += "--deepspeed"
+            options += " --deepspeed"
         if self.use_streaming_mode:
-            options += "--streaming-mode --streaming-mode-improve --stream-play-sync"
+            options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
         process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", wait= False)
         return process
     
@@ -237,6 +237,30 @@ class LollmsXTTS:
         # Send the POST request
         response =  requests.post(url, headers=headers, data=json.dumps(payload))
 
+        # Check the response status code
+        if response.status_code == 200:
+            print("Request successful")
+            # You can access the response data using response.json()
+        else:
+            print("Request failed with status code:", response.status_code)
+
+    def tts_to_audio(self, text, speaker_wav, file_name_or_path, language="en"):
+        url = f"{self.xtts_base_url}/tts_to_audio"
+
+        # Define the request body
+        payload = {
+            "text": text,
+            "speaker_wav": speaker_wav,
+            "language": language
+        }
+        headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json'
+        }
+
+        # Send the POST request
+        response =  requests.post(url, headers=headers, data=json.dumps(payload))
+
         # Check the response status code
         if response.status_code == 200:
             print("Request successful")
diff --git a/personal_data/configs/lollms_discord_local_config.yaml b/personal_data/configs/lollms_discord_local_config.yaml
index bf57bc4..4e2af3f 100644
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en
 
 # Image generation service