upgraded configurations

2025-04-13 05:42:57 +00:00 · 2024-05-05 00:13:36 +02:00 · 2024-05-05 00:13:36 +02:00 · 70320699b1
commit 70320699b1
parent 6ae2600aa4
10 changed files with 84 additions and 26 deletions
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
@ -145,11 +145,9 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
--- a/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_docker_cfg/personal/configs/lollms_elf_config.yaml
@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
--- a/elf_test_cfg/personal/configs/lollms_elf_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_config.yaml
@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
--- a/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
+++ b/elf_test_cfg/personal/configs/lollms_elf_local_config.yaml
@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
--- a/lollms/configs/config.yaml
+++ b/lollms/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
@ -145,17 +145,15 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
 data_vectorization_sentense_transformer_model: "all-MiniLM-L6-v2" # you can use another model by setting its name here or its path

-data_vectorization_save_db: false # For each new session, new files
+data_vectorization_save_db: true # For each new session, new files
 data_vectorization_chunk_size: 512 # chunk size
 data_vectorization_overlap_size: 128 # overlap between chunks size
 data_vectorization_nb_chunks: 2 # number of chunks to use
--- a/lollms/server/configs/config.yaml
+++ b/lollms/server/configs/config.yaml
@ -1,5 +1,5 @@
 # =================== Lord Of Large Language Multimodal Systems Configuration file =========================== 
-version: 88
+version: 90
 binding_name: null
 model_name: null
 model_variant: null
@ -86,7 +86,7 @@ xtts_base_url: http://localhost:8020
 xtts_use_deepspeed: false
 xtts_use_streaming_mode: true
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service
@ -145,11 +145,9 @@ audio_silenceTimer: 5000
 # Data vectorization
 activate_skills_lib: false # Activate vectorizing previous conversations
 skills_lib_database_name: "default" # Default skills database
-summerize_discussion: false # activate discussion summary (better but adds computation time)

 max_summary_size: 512 # in tokens
 data_vectorization_visualize_on_vectorization: false
-use_files: true # Activate using files
 data_vectorization_activate: true # To activate/deactivate data vectorization
 data_vectorization_method: "tfidf_vectorizer" #"model_embedding" or "tfidf_vectorizer"
 data_visualization_method: "PCA" #"PCA" or "TSNE"
--- a/lollms/server/endpoints/lollms_user.py
+++ b/lollms/server/endpoints/lollms_user.py
@ -55,11 +55,48 @@ def switch_personal_path(data:PersonalPathParameters):

        
@router.post("/upload_avatar")
-@router.post("/upload_logo")
 async def upload_avatar(avatar: UploadFile = File(...)):
    """
    Uploads a user avatar file to a dedicated directory, preventing path traversal attacks.

+    Parameters:
+        - avatar: UploadFile object representing the user avatar file.
+
+    Returns:
+        - Dictionary with the status of the upload and the generated file name.
+
+    Raises:
+        - HTTPException with a 400 status code and an error message if the file is invalid or has an invalid type.
+    """
+    # Only allow certain file types
+    if avatar.filename.endswith((".jpg", ".png")):
+        # Create a random file name
+        random_filename = str(uuid.uuid4())
+        
+        # Use the file extension of the uploaded file
+        extension = os.path.splitext(avatar.filename)[1]
+        
+        # Create the new file path in a dedicated directory
+        file_location = os.path.join(lollmsElfServer.lollms_paths.personal_user_infos_path, f"{random_filename}{extension}")
+
+        try:
+            # Open the image to check if it's a valid image
+            img = Image.open(avatar.file)
+            
+            # Save the file
+            img.save(file_location)
+        except Exception as e:
+            raise HTTPException(status_code=400, detail="Invalid image file.")
+    else:
+        raise HTTPException(status_code=400, detail="Invalid file type.")
+        
+    return {"status": True,"fileName": f"{random_filename}{extension}"}
+
+@router.post("/upload_logo")
+async def upload_logo(avatar: UploadFile = File(...)):
+    """
+    Uploads a user avatar file to a dedicated directory, preventing path traversal attacks.
+
    Parameters:
        - avatar: UploadFile object representing the user avatar file.

--- a/lollms/server/endpoints/lollms_xtts.py
+++ b/lollms/server/endpoints/lollms_xtts.py
@ -58,7 +58,7 @@ async def set_voice(request: Request):

    try:
        data = (await request.json())
-        lollmsElfServer.config.current_voice=data["voice"]
+        lollmsElfServer.config.xtts_current_voice=data["voice"]
        if lollmsElfServer.config.auto_save:
            lollmsElfServer.config.save_config()
        return {"status":True}
@ -106,7 +106,7 @@ async def text2Audio(request: LollmsText2AudioRequest):
        except Exception as ex:
            return {"url": None, "error":f"{ex}"}
            
-        voice=lollmsElfServer.config.current_voice if request.voice is None else request.voice
+        voice=lollmsElfServer.config.xtts_current_voice if request.voice is None else request.voice
        index = find_first_available_file_index(lollmsElfServer.tts.output_folder, "voice_sample_",".wav")
        output_fn=f"voice_sample_{index}.wav" if request.fn is None else request.fn
        if voice is None:
@ -133,9 +133,12 @@ async def text2Audio(request: LollmsText2AudioRequest):
            voice_file =  [v for v in voices_folder.iterdir() if v.stem==voice]
            if len(voice_file)==0:
                return {"status":False,"error":"Voice not found"}
-
-            lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
-            lollmsElfServer.info(f"Voice file ready at {url}")
+            if not lollmsElfServer.config.xtts_use_streaming_mode:
+                lollmsElfServer.tts.tts_to_file(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                lollmsElfServer.info(f"Voice file ready at {url}")
+            else:
+                lollmsElfServer.tts.tts_to_audio(preprocessed_text, voice_file[0].name, f"{output_fn}", language=language)
+                 
            return {"url": url}
        except Exception as ex:
            trace_exception(ex)
@ -205,7 +208,7 @@ async def upload_voice_file(file: UploadFile = File(...)):
    safe_file_path = lollmsElfServer.lollms_paths.custom_voices_path/safe_filename
    with safe_file_path.open("wb") as f:
        f.write(contents)
-    lollmsElfServer.config.current_voice=safe_filename
+    lollmsElfServer.config.xtts_current_voice=safe_filename
    if lollmsElfServer.config.auto_save:
        lollmsElfServer.config.save_config()

--- a/lollms/services/xtts/lollms_xtts.py
+++ b/lollms/services/xtts/lollms_xtts.py
@ -167,9 +167,9 @@ class LollmsXTTS:
        ASCIIColors.yellow("Loading XTTS ")
        options= ""
        if self.use_deep_speed:
-            options += "--deepspeed"
+            options += " --deepspeed"
        if self.use_streaming_mode:
-            options += "--streaming-mode --streaming-mode-improve --stream-play-sync"
+            options += " --streaming-mode --streaming-mode-improve --stream-play-sync"
        process = run_python_script_in_env("xtts", f"-m xtts_api_server {options} -o {self.output_folder} -sf {self.voice_samples_path} -p {self.xtts_base_url.split(':')[-1].replace('/','')}", wait= False)
        return process
    
@ -237,6 +237,30 @@ class LollmsXTTS:
        # Send the POST request
        response =  requests.post(url, headers=headers, data=json.dumps(payload))

+        # Check the response status code
+        if response.status_code == 200:
+            print("Request successful")
+            # You can access the response data using response.json()
+        else:
+            print("Request failed with status code:", response.status_code)
+
+    def tts_to_audio(self, text, speaker_wav, file_name_or_path, language="en"):
+        url = f"{self.xtts_base_url}/tts_to_audio"
+
+        # Define the request body
+        payload = {
+            "text": text,
+            "speaker_wav": speaker_wav,
+            "language": language
+        }
+        headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json'
+        }
+
+        # Send the POST request
+        response =  requests.post(url, headers=headers, data=json.dumps(payload))
+
        # Check the response status code
        if response.status_code == 200:
            print("Request successful")
--- a/personal_data/configs/lollms_discord_local_config.yaml
+++ b/personal_data/configs/lollms_discord_local_config.yaml
@ -81,7 +81,7 @@ copy_to_clipboard_add_all_details: false
 enable_voice_service: false
 xtts_base_url: http://localhost:8020
 auto_read: false
-current_voice: null
+xtts_current_voice: null
 xtts_current_language: en

 # Image generation service