added new utilities

2025-04-04 17:39:09 +00:00 · 2023-08-17 00:52:34 +02:00 · 2023-08-17 00:52:34 +02:00 · 72a2466d3e
commit 72a2466d3e
parent 5b8cbec0d4
2 changed files with 60 additions and 1 deletions
--- a/lollms/utilities.py
+++ b/lollms/utilities.py
@ -736,3 +736,62 @@ class GenericDataLoader:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
+
+
+
+
+class PromptReshaper:
+    def __init__(self, template):
+        self.template = template
+
+    def build(self, placeholders, tokenize, detokenize, max_nb_tokens, place_holders_to_sacrifice=[]):
+        # Tokenize the template without placeholders
+        template_text = self.template
+        for placeholder in placeholders:
+            template_text = template_text.replace("{{" + placeholder + "}}", "")
+        template_tokens = tokenize(template_text)
+        
+        # Calculate the number of tokens in the template without placeholders
+        template_tokens_count = len(template_tokens)
+        
+        # Calculate the number of tokens for each placeholder
+        placeholder_tokens_count = {}
+        all_count = template_tokens_count
+        for placeholder, text in placeholders.items():
+            text_tokens = tokenize(text)
+            placeholder_tokens_count[placeholder] = len(text_tokens)
+            all_count += placeholder_tokens_count[placeholder]
+
+        def fill_template(template, data):
+            for key, value in data.items():
+                placeholder = "{{" + key + "}}"
+                template = template.replace(placeholder, value)
+            return template
+        
+        if max_nb_tokens-all_count>0 or len(place_holders_to_sacrifice)==0:
+            return fill_template(self.template, placeholders)
+        else:
+            to_remove = -int((max_nb_tokens - all_count)/len(place_holders_to_sacrifice))
+            for placeholder, text in placeholders.items():
+                if placeholder in place_holders_to_sacrifice:
+                    text_tokens = tokenize(text)[to_remove:]
+                    placeholders[placeholder]=detokenize(text_tokens)
+            return fill_template(self.template, placeholders)
+
+if __name__=="__main__":
+    def tokenize(text):
+        return text.split()  # Simple tokenization by splitting on spaces
+
+    def detokenize(tokens):
+        return ' '.join(tokens)
+
+    template = "Hello, {{name}}! How are you feeling {{emotion}} really"
+    placeholders = {
+        "name": "Alice",
+        "emotion": "happy and very happy"
+    }
+    max_nb_tokens = 10
+
+    reshaper = PromptReshaper(template)
+    final_text = reshaper.build(placeholders, tokenize, detokenize, max_nb_tokens,["emotion"])
+    print(final_text)
--- a/setup.py
+++ b/setup.py
@ -26,7 +26,7 @@ def get_all_files(path):

 setuptools.setup(
    name="lollms",
-    version="2.3.3",
+    version="2.3.4",
    author="Saifeddine ALOUI",
    author_email="aloui.saifeddine@gmail.com",
    description="A python library for AI personality definition",