diff --git a/lollms/personality.py b/lollms/personality.py
index 5b7255b..e5d3541 100644
--- a/lollms/personality.py
+++ b/lollms/personality.py
@@ -4374,138 +4374,82 @@ transition-all duration-300 ease-in-out">
return updated_content, True # Section updated successfully
- def extract_code_blocks(self, text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
- """
- This function extracts code blocks from a given text and optionally returns the text without code blocks.
-
- Parameters:
- text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
- return_remaining_text (bool): If True, also returns the text with code blocks removed.
-
- Returns:
- Union[List[dict], Tuple[List[dict], str]]:
- - If return_remaining_text is False: Returns only the list of code block dictionaries
- - If return_remaining_text is True: Returns a tuple containing:
- * List of code block dictionaries
- * String containing the text with all code blocks removed
-
- Each code block dictionary contains:
- - 'index' (int): The index of the code block in the text
- - 'file_name' (str): The name of the file extracted from the preceding line, if available
- - 'content' (str): The content of the code block
- - 'type' (str): The type of the code block
- - 'is_complete' (bool): True if the block has a closing tag, False otherwise
- """
- remaining = text
- bloc_index = 0
- first_index = 0
- indices = []
- text_without_blocks = text
+ def extract_code_blocks(text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
+ codes = []
+ remaining_text = text
+ current_index = 0
- # Find all code block delimiters
- while len(remaining) > 0:
- try:
- index = remaining.index("```")
- indices.append(index + first_index)
- remaining = remaining[index + 3:]
- first_index += index + 3
- bloc_index += 1
- except Exception as ex:
- if bloc_index % 2 == 1:
- index = index+len(remaining)
- indices.append(index)
- remaining = ""
-
- code_blocks = []
- is_start = True
-
- # Process code blocks and build text without blocks if requested
- if return_remaining_text:
- text_parts = []
- last_end = 0
+ while True:
+ # Find next code block start
+ start_pos = remaining_text.find('```')
+ if start_pos == -1:
+ break
+
+ # Check for file name before code block
+ file_name = ''
+ file_name_match = remaining_text[:start_pos].rfind('')
+ if file_name_match != -1:
+ file_name_end = remaining_text[:start_pos].rfind('')
+ if file_name_end != -1 and file_name_match < file_name_end:
+ file_name = remaining_text[file_name_match + 11:file_name_end].strip()
- for index, code_delimiter_position in enumerate(indices):
- if is_start:
- block_infos = {
- 'index': len(code_blocks),
- 'file_name': "",
- 'section': "",
- 'content': "",
- 'type': "",
- 'is_complete': False
- }
-
- # Store text before code block if returning remaining text
- if return_remaining_text:
- text_parts.append(text[last_end:code_delimiter_position].strip())
-
- # Check the preceding line for file name
- preceding_text = text[:code_delimiter_position].strip().splitlines()
- if preceding_text:
- last_line = preceding_text[-1].strip()
- if last_line.startswith("") and last_line.endswith(""):
- file_name = last_line[len(""):-len("")].strip()
- block_infos['file_name'] = file_name
- elif last_line.startswith("## filename:"):
- file_name = last_line[len("## filename:"):].strip()
- block_infos['file_name'] = file_name
- if last_line.startswith("") and last_line.endswith(""):
- section = last_line[len("")].strip()
- block_infos['section'] = section
-
- sub_text = text[code_delimiter_position + 3:]
- if len(sub_text) > 0:
- try:
- find_space = sub_text.index(" ")
- except:
- find_space = int(1e10)
- try:
- find_return = sub_text.index("\n")
- except:
- find_return = int(1e10)
- next_index = min(find_return, find_space)
- if '{' in sub_text[:next_index]:
- next_index = 0
- start_pos = next_index
-
- if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
- block_infos["type"] = 'language-specific'
- else:
- block_infos["type"] = sub_text[:next_index]
-
- if index + 1 < len(indices):
- next_pos = indices[index + 1] - code_delimiter_position
- if next_pos - 3>0:
- if next_pos - 3 < len(sub_text) and sub_text[next_pos - 3] == "`":
- block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
- block_infos["is_complete"] = True
- else:
- block_infos["content"] = sub_text[start_pos:next_pos].strip()
- block_infos["is_complete"] = False
-
- if return_remaining_text:
- last_end = indices[index + 1] + 3
- else:
- block_infos["content"] = sub_text[start_pos:].strip()
- block_infos["is_complete"] = False
-
- if return_remaining_text:
- last_end = len(text)
-
- code_blocks.append(block_infos)
- is_start = False
+ # Get code type if specified
+ code_type = ''
+ next_newline = remaining_text.find('\n', start_pos + 3)
+ if next_newline != -1:
+ potential_type = remaining_text[start_pos + 3:next_newline].strip()
+ if potential_type:
+ code_type = potential_type
+ start_pos = next_newline + 1
+ else:
+ start_pos += 3
else:
- is_start = True
+ start_pos += 3
- if return_remaining_text:
- # Add any remaining text after the last code block
- if last_end < len(text):
- text_parts.append(text[last_end:].strip())
- # Join all non-code parts with newlines
- text_without_blocks = '\n'.join(filter(None, text_parts))
- return code_blocks, text_without_blocks
+ # Find matching end tag
+ tag_count = 1
+ pos = start_pos
+ content_start = start_pos
+ is_complete = False
- return code_blocks
+ while pos < len(remaining_text):
+ if remaining_text[pos:pos + 3] == '```':
+ tag_count -= 1
+ if tag_count == 0:
+ # Found matching end tag
+ content = remaining_text[content_start:pos].strip()
+ is_complete = True
+ codes.append({
+ 'index': current_index,
+ 'file_name': file_name,
+ 'content': content,
+ 'type': code_type,
+ 'is_complete': True
+ })
+ remaining_text = remaining_text[pos + 3:]
+ break
+ elif remaining_text[pos:pos + 3] == '```':
+ tag_count += 1
+ pos += 1
+
+ if not is_complete:
+ # Handle incomplete code block
+ content = remaining_text[content_start:].strip()
+ codes.append({
+ 'index': current_index,
+ 'file_name': file_name,
+ 'content': content,
+ 'type': code_type,
+ 'is_complete': False
+ })
+ remaining_text = ''
+
+ current_index += 1
+
+ if return_remaining_text:
+ return codes, remaining_text
+ return codes
+
def build_and_execute_python_code(self,context, instructions, execution_function_signature, extra_imports=""):