From c5dd60732def7a91f7cf97c905c08cd5eef8a099 Mon Sep 17 00:00:00 2001
From: Saifeddine ALOUI <aloui.seifeddine@gmail.com>
Date: Sat, 4 May 2024 16:48:30 +0200
Subject: [PATCH] enhanced translation of personalities

---
 lollms/app.py         |   3 +-
 lollms/personality.py |  34 +++---
 lollms/tasks.py       | 245 ++++++++++++++++++++++++++++++++++++++++++
 lollms/utilities.py   |  19 ++++
 4 files changed, 282 insertions(+), 19 deletions(-)
 create mode 100644 lollms/tasks.py
diff --git a/lollms/app.py b/lollms/app.py
index baea49f..b9f55f0 100644
--- a/lollms/app.py
+++ b/lollms/app.py
@@ -12,6 +12,7 @@ from lollms.types import MSG_TYPE, SENDER_TYPES
 from lollms.utilities import PromptReshaper
 from lollms.client_session import Client, Session
 from lollms.databases.skills_database import SkillsLibrary
+from lollms.tasks import TasksLibrary
 from safe_store import TextVectorizer, VectorizationMethod, VisualizationMethod
 from typing import Callable
 from pathlib import Path
@@ -63,7 +64,7 @@ class LollmsApplication(LoLLMsCom):
         self.session                    = Session(lollms_paths)
         self.skills_library             = SkillsLibrary(self.lollms_paths.personal_skills_path/(self.config.skills_lib_database_name+".db"))
 
-
+        self.tasks_library              = TasksLibrary(self)
         if not free_mode:
             try:
                 if config.auto_update:
diff --git a/lollms/personality.py b/lollms/personality.py
index d251f90..5975ed3 100644
--- a/lollms/personality.py
+++ b/lollms/personality.py
@@ -620,7 +620,21 @@ class AIPersonality:
 
         return gen
 
-    def fast_gen(self, prompt: str, max_generation_size: int=None, placeholders: dict = {}, sacrifice: list = ["previous_discussion"], debug: bool  = False, callback=None, show_progress=False) -> str:
+    def fast_gen(
+                    self, 
+                    prompt: str, 
+                    max_generation_size: int=None, 
+                    placeholders: dict = {}, 
+                    sacrifice: list = ["previous_discussion"], 
+                    debug: bool  = False, 
+                    callback=None, 
+                    show_progress=False, 
+                    temperature = None, 
+                    top_k = None, 
+                    top_p=None, 
+                    repeat_penalty=None, 
+                    repeat_last_n=None
+                ) -> str:
         """
         Fast way to generate code
 
@@ -655,29 +669,13 @@ class AIPersonality:
         max_generation_size = min(self.model.config.ctx_size - ntk, max_generation_size)
         # TODO : add show progress
 
-        gen = self.generate(prompt, max_generation_size, callback=callback, show_progress=show_progress).strip().replace("</s>", "").replace("<s>", "")
+        gen = self.generate(prompt, max_generation_size, temperature = temperature, top_k = top_k, top_p=top_p, repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, callback=callback, show_progress=show_progress).strip().replace("</s>", "").replace("<s>", "")
         if debug:
             self.print_prompt("prompt", prompt+gen)
 
         return gen
 
-    def remove_text_from_string(self, string, text_to_find):
-        """
-        Removes everything from the first occurrence of the specified text in the string (case-insensitive).
 
-        Parameters:
-        string (str): The original string.
-        text_to_find (str): The text to find in the string.
-
-        Returns:
-        str: The updated string.
-        """
-        index = string.lower().find(text_to_find.lower())
-
-        if index != -1:
-            string = string[:index]
-
-        return string
 
     def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
         if callback is None:
diff --git a/lollms/tasks.py b/lollms/tasks.py
new file mode 100644
index 0000000..281ae2d
--- /dev/null
+++ b/lollms/tasks.py
@@ -0,0 +1,245 @@
+
+import sys
+from typing import Callable, List
+from functools import partial
+from datetime import datetime
+from ascii_colors import ASCIIColors
+from lollms.types import MSG_TYPE
+from lollms.com import LoLLMsCom
+from lollms.utilities import PromptReshaper, remove_text_from_string
+
+
+class TasksLibrary:
+    def __init__(self, lollms:LoLLMsCom) -> None:
+        self.lollms = lollms
+        self.anti_prompts = [self.lollms.config.discussion_prompt_separator]+["!@>"]
+
+    def print_prompt(self, title, prompt):
+        ASCIIColors.red("*-*-*-*-*-*-*-* ", end="")
+        ASCIIColors.red(title, end="")
+        ASCIIColors.red(" *-*-*-*-*-*-*-*")
+        ASCIIColors.yellow(prompt)
+        ASCIIColors.red(" *-*-*-*-*-*-*-*")
+        
+    def sink(self, s=None,i=None,d=None):
+        pass
+    def detect_antiprompt(self, text:str) -> bool:
+        """
+        Detects if any of the antiprompts in self.anti_prompts are present in the given text.
+        Used for the Hallucination suppression system
+
+        Args:
+            text (str): The text to check for antiprompts.
+
+        Returns:
+            bool: True if any antiprompt is found in the text (ignoring case), False otherwise.
+        """
+        for prompt in self.anti_prompts:
+            if prompt.lower() in text.lower():
+                return prompt.lower()
+        return None
+
+    def process(self, text:str, message_type:MSG_TYPE, callback=None, show_progress=False):
+        if callback is None:
+            callback = self.callback
+        if text is None:
+            return True
+        if message_type==MSG_TYPE.MSG_TYPE_CHUNK:
+            bot_says = self.bot_says + text
+        elif  message_type==MSG_TYPE.MSG_TYPE_FULL:
+            bot_says = text
+
+        if show_progress:
+            if self.nb_received_tokens==0:
+                self.start_time = datetime.now()
+            dt =(datetime.now() - self.start_time).seconds
+            if dt==0:
+                dt=1
+            spd = self.nb_received_tokens/dt
+            ASCIIColors.green(f"Received {self.nb_received_tokens} tokens (speed: {spd:.2f}t/s)              ",end="\r",flush=True)
+            sys.stdout = sys.__stdout__
+            sys.stdout.flush()
+            self.nb_received_tokens+=1
+
+
+        antiprompt = self.detect_antiprompt(bot_says)
+        if antiprompt:
+            self.bot_says = remove_text_from_string(bot_says,antiprompt)
+            ASCIIColors.warning(f"\n{antiprompt} detected. Stopping generation")
+            return False
+        else:
+            if callback:
+                callback(text,message_type)
+            self.bot_says = bot_says
+            return True
+        
+    def generate(self, prompt, max_size, temperature = None, top_k = None, top_p=None, repeat_penalty=None, repeat_last_n=None, callback=None, debug=False, show_progress=False ):
+        ASCIIColors.info("Text generation started: Warming up")
+        self.nb_received_tokens = 0
+        self.bot_says = ""
+        if debug:
+            self.print_prompt("gen",prompt)
+
+        self.lollms.model.generate(
+                                prompt,
+                                max_size,
+                                partial(self.process, callback=callback, show_progress=show_progress),
+                                temperature= temperature if temperature is not None else self.lollms.config.temperature if self.lollms.config.override_personality_model_parameters else self.lollms.personality.model_temperature,
+                                top_k= top_k if top_k is not None else self.lollms.config.top_k if self.lollms.config.override_personality_model_parameters else self.lollms.personality.model_top_k,
+                                top_p= top_p if top_p is not None else self.lollms.config.top_p if self.lollms.config.override_personality_model_parameters else self.lollms.personality.model_top_p,
+                                repeat_penalty= repeat_penalty if repeat_penalty is not None else self.lollms.config.repeat_penalty if self.lollms.config.override_personality_model_parameters else self.lollms.personality.model_repeat_penalty,
+                                repeat_last_n= repeat_last_n if repeat_last_n is not None else self.lollms.config.repeat_last_n if self.lollms.config.override_personality_model_parameters else self.lollms.personality.model_repeat_last_n,
+                                ).strip()
+        return self.bot_says
+    
+    def fast_gen(
+                    self, 
+                    prompt: str, 
+                    max_generation_size: int=None, 
+                    placeholders: dict = {}, 
+                    sacrifice: list = ["previous_discussion"], 
+                    debug: bool  = False, 
+                    callback=None, 
+                    show_progress=False, 
+                    temperature = None, 
+                    top_k = None, 
+                    top_p=None, 
+                    repeat_penalty=None, 
+                    repeat_last_n=None
+                ) -> str:
+        """
+        Fast way to generate code
+
+        This method takes in a prompt, maximum generation size, optional placeholders, sacrifice list, and debug flag.
+        It reshapes the context before performing text generation by adjusting and cropping the number of tokens.
+
+        Parameters:
+        - prompt (str): The input prompt for text generation.
+        - max_generation_size (int): The maximum number of tokens to generate.
+        - placeholders (dict, optional): A dictionary of placeholders to be replaced in the prompt. Defaults to an empty dictionary.
+        - sacrifice (list, optional): A list of placeholders to sacrifice if the window is bigger than the context size minus the number of tokens to generate. Defaults to ["previous_discussion"].
+        - debug (bool, optional): Flag to enable/disable debug mode. Defaults to False.
+
+        Returns:
+        - str: The generated text after removing special tokens ("<s>" and "</s>") and stripping any leading/trailing whitespace.
+        """
+        if max_generation_size is None:
+            prompt_size = self.lollms.model.tokenize(prompt)
+            max_generation_size = self.lollms.model.config.ctx_size - len(prompt_size)
+
+        pr = PromptReshaper(prompt)
+        prompt = pr.build(placeholders,
+                        self.lollms.model.tokenize,
+                        self.lollms.model.detokenize,
+                        self.lollms.model.config.ctx_size - max_generation_size,
+                        sacrifice
+                        )
+        ntk = len(self.lollms.model.tokenize(prompt))
+        max_generation_size = min(self.lollms.model.config.ctx_size - ntk, max_generation_size)
+        # TODO : add show progress
+
+        gen = self.generate(prompt, max_generation_size, temperature = temperature, top_k = top_k, top_p=top_p, repeat_penalty=repeat_penalty, repeat_last_n=repeat_last_n, callback=callback, show_progress=show_progress).strip().replace("</s>", "").replace("<s>", "")
+        if debug:
+            self.print_prompt("prompt", prompt+gen)
+
+        return gen
+
+    def extract_code_blocks(self, text: str) -> List[dict]:
+        """
+        This function extracts code blocks from a given text.
+
+        Parameters:
+        text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
+
+        Returns:
+        List[dict]: A list of dictionaries where each dictionary represents a code block and contains the following keys:
+            - 'index' (int): The index of the code block in the text.
+            - 'file_name' (str): An empty string. This field is not used in the current implementation.
+            - 'content' (str): The content of the code block.
+            - 'type' (str): The type of the code block. If the code block starts with a language specifier (like 'python' or 'java'), this field will contain that specifier. Otherwise, it will be set to 'language-specific'.
+
+        Note:
+        The function assumes that the number of triple backticks in the text is even.
+        If the number of triple backticks is odd, it will consider the rest of the text as the last code block.
+        """        
+        remaining = text
+        bloc_index = 0
+        first_index=0
+        indices = []
+        while len(remaining)>0:
+            try:
+                index = remaining.index("```")
+                indices.append(index+first_index)
+                remaining = remaining[index+3:]
+                first_index += index+3
+                bloc_index +=1
+            except Exception as ex:
+                if bloc_index%2==1:
+                    index=len(remaining)
+                    indices.append(index)
+                remaining = ""
+
+        code_blocks = []
+        is_start = True
+        for index, code_delimiter_position in enumerate(indices):
+            block_infos = {
+                'index':index,
+                'file_name': "",
+                'content': "",
+                'type':""
+            }
+            if is_start:
+
+                sub_text = text[code_delimiter_position+3:]
+                if len(sub_text)>0:
+                    try:
+                        find_space = sub_text.index(" ")
+                    except:
+                        find_space = int(1e10)
+                    try:
+                        find_return = sub_text.index("\n")
+                    except:
+                        find_return = int(1e10)
+                    next_index = min(find_return, find_space)
+                    start_pos = next_index
+                    if code_delimiter_position+3<len(text) and text[code_delimiter_position+3] in ["\n"," ","\t"] :
+                        # No
+                        block_infos["type"]='language-specific'
+                    else:
+                        block_infos["type"]=sub_text[:next_index]
+
+                    next_pos = indices[index+1]-code_delimiter_position
+                    if sub_text[next_pos-3]=="`":
+                        block_infos["content"]=sub_text[start_pos:next_pos-3].strip()
+                    else:
+                        block_infos["content"]=sub_text[start_pos:next_pos].strip()
+                    code_blocks.append(block_infos)
+                is_start = False
+            else:
+                is_start = True
+                continue
+
+        return code_blocks
+
+    def translate_conditionning(self, prompt, original_language, language):
+        conditionning_translation_text = f"!@>instruction: Translate the following prompt to {language}.\nDo not translate any css or code, just the text and strings.\n!@>prompt:\n```{original_language}\n{prompt.replace('!@>','')}\n```\n!@>translation:\nHere is the translated prompt:\n```{language}\n"
+        cond_translation = f"```{language}\n"+self.fast_gen(conditionning_translation_text, temperature=0.1, callback=self.sink)
+        response = self.extract_code_blocks(cond_translation)
+        if len(response)>0 and len(response[0]["content"])>0:
+            conditionning = "!@>system: "+response[0]["content"]
+        else:
+            ASCIIColors.print(f"Failed to translate the conditionning message. Reverting to english conditionning with a request to use the lanuage {language}")
+            conditionning = prompt + f"\nAlways answer in {language}\n"
+        return conditionning
+
+    def translate_message(self, prompt, original_language, language):
+        message_translation_text = f"!@>instruction: Translate the following message to {language}.\nDo not translate any css or code, just the text and strings.\n!@>prompt:\n```{original_language}\n{prompt.replace('!@>','')}\n```\n!@>translation:\n```{language}\n"
+        cond_translation = f"```{language}\n"+self.fast_gen(message_translation_text, temperature=0.1, callback=self.sink)
+        response = self.extract_code_blocks(cond_translation)
+        if len(response)>0 and len(response[0]["content"])>0:
+            translated = "!@>system: "+response[0]["content"]
+        else:
+            ASCIIColors.print(f"Failed to translate the message. Reverting to english conditionning with a request to use the lanuage {language}")
+            message_translation_text = f"!@>instruction: Translate the following message to {language}.\nDo not translate any css or code, just the text and strings.\n!@>message:\n{prompt.replace('!@>','')}\n!@>translation:\n"
+            translated = self.fast_gen(message_translation_text, temperature=0.1, callback=self.sink)
+        return translated
diff --git a/lollms/utilities.py b/lollms/utilities.py
index 18d959e..d8ddb24 100644
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@@ -1090,3 +1090,22 @@ class File_Path_Generator:
             
             # If the file exists, increment the index and try again
             index += 1
+
+
+def remove_text_from_string(string: str, text_to_find:str):
+    """
+    Removes everything from the first occurrence of the specified text in the string (case-insensitive).
+
+    Parameters:
+    string (str): The original string.
+    text_to_find (str): The text to find in the string.
+
+    Returns:
+    str: The updated string.
+    """
+    index = string.lower().find(text_to_find.lower())
+
+    if index != -1:
+        string = string[:index]
+
+    return string
\ No newline at end of file