fixed thinking tags extraction

2025-03-01 04:06:07 +00:00 · 2025-02-07 21:57:40 +01:00 · 2025-02-07 21:57:40 +01:00 · 998bdd4015
commit 998bdd4015
parent 7bb21b6c76
1 changed files with 10 additions and 11 deletions
--- a/lollms/personality.py
+++ b/lollms/personality.py
@ -1144,9 +1144,10 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
            "formatted_string": formatted_string
        }
    def extract_thinking_blocks(self, text: str) -> List[str]:
        """
-        Extracts content between <thinking> tags from a given text.
+        Extracts content between <thinking> or <think> tags from a given text.
        Parameters:
        text (str): The text containing thinking blocks
@ -1156,19 +1157,18 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
        """
        import re
-        # Find all matches between thinking tags
+        # Pattern to match both <thinking> and <think> blocks with matching tags
-        pattern = r'<thinking>(.*?)</thinking>'
+        pattern = r'<(thinking|think)>(.*?)</\1>'
        # re.DOTALL allows . to match newlines
        matches = re.finditer(pattern, text, re.DOTALL)
-        # Extract and clean the content
+        # Extract content from the second group (index 2) and clean
-        thinking_blocks = [match.group(1).strip() for match in matches]
+        thinking_blocks = [match.group(2).strip() for match in matches]
        return thinking_blocks
    def remove_thinking_blocks(self, text: str) -> str:
        """
-        Removes thinking blocks from text including the tags.
+        Removes thinking blocks (either <thinking> or <think>) from text including the tags.
        Parameters:
        text (str): The text containing thinking blocks
@ -1178,8 +1178,8 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
        """
        import re
-        # Replace thinking blocks with empty string
+        # Pattern to remove both <thinking> and <think> blocks with matching tags
-        pattern = r'<thinking>.*?</thinking>'
+        pattern = r'<(thinking|think)>.*?</\1>'
        cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL)
        # Remove extra whitespace and normalize newlines
@ -1187,7 +1187,6 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
        return cleaned_text
    def extract_code_blocks(self, text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
        """
        This function extracts code blocks from a given text and optionally returns the text without code blocks.