mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-03-01 04:06:07 +00:00
fixed thinking tags extraction
This commit is contained in:
parent
7bb21b6c76
commit
998bdd4015
@ -1144,9 +1144,10 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|||||||
"formatted_string": formatted_string
|
"formatted_string": formatted_string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def extract_thinking_blocks(self, text: str) -> List[str]:
|
def extract_thinking_blocks(self, text: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Extracts content between <thinking> tags from a given text.
|
Extracts content between <thinking> or <think> tags from a given text.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
text (str): The text containing thinking blocks
|
text (str): The text containing thinking blocks
|
||||||
@ -1156,19 +1157,18 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Find all matches between thinking tags
|
# Pattern to match both <thinking> and <think> blocks with matching tags
|
||||||
pattern = r'<thinking>(.*?)</thinking>'
|
pattern = r'<(thinking|think)>(.*?)</\1>'
|
||||||
# re.DOTALL allows . to match newlines
|
|
||||||
matches = re.finditer(pattern, text, re.DOTALL)
|
matches = re.finditer(pattern, text, re.DOTALL)
|
||||||
|
|
||||||
# Extract and clean the content
|
# Extract content from the second group (index 2) and clean
|
||||||
thinking_blocks = [match.group(1).strip() for match in matches]
|
thinking_blocks = [match.group(2).strip() for match in matches]
|
||||||
|
|
||||||
return thinking_blocks
|
return thinking_blocks
|
||||||
|
|
||||||
def remove_thinking_blocks(self, text: str) -> str:
|
def remove_thinking_blocks(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Removes thinking blocks from text including the tags.
|
Removes thinking blocks (either <thinking> or <think>) from text including the tags.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
text (str): The text containing thinking blocks
|
text (str): The text containing thinking blocks
|
||||||
@ -1178,8 +1178,8 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Replace thinking blocks with empty string
|
# Pattern to remove both <thinking> and <think> blocks with matching tags
|
||||||
pattern = r'<thinking>.*?</thinking>'
|
pattern = r'<(thinking|think)>.*?</\1>'
|
||||||
cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL)
|
cleaned_text = re.sub(pattern, '', text, flags=re.DOTALL)
|
||||||
|
|
||||||
# Remove extra whitespace and normalize newlines
|
# Remove extra whitespace and normalize newlines
|
||||||
@ -1187,7 +1187,6 @@ Don't forget encapsulate the code inside a html code tag. This is mandatory.
|
|||||||
|
|
||||||
return cleaned_text
|
return cleaned_text
|
||||||
|
|
||||||
|
|
||||||
def extract_code_blocks(self, text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
|
def extract_code_blocks(self, text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
|
||||||
"""
|
"""
|
||||||
This function extracts code blocks from a given text and optionally returns the text without code blocks.
|
This function extracts code blocks from a given text and optionally returns the text without code blocks.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user