mirror of
https://github.com/ParisNeo/lollms.git
synced 2025-01-21 20:08:29 +00:00
Enhanced code extraction
This commit is contained in:
parent
f855262848
commit
c224bcb35d
@ -4374,138 +4374,82 @@ transition-all duration-300 ease-in-out">
|
|||||||
|
|
||||||
return updated_content, True # Section updated successfully
|
return updated_content, True # Section updated successfully
|
||||||
|
|
||||||
def extract_code_blocks(self, text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
|
def extract_code_blocks(text: str, return_remaining_text: bool = False) -> Union[List[dict], Tuple[List[dict], str]]:
|
||||||
"""
|
codes = []
|
||||||
This function extracts code blocks from a given text and optionally returns the text without code blocks.
|
remaining_text = text
|
||||||
|
current_index = 0
|
||||||
|
|
||||||
Parameters:
|
while True:
|
||||||
text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
|
# Find next code block start
|
||||||
return_remaining_text (bool): If True, also returns the text with code blocks removed.
|
start_pos = remaining_text.find('```')
|
||||||
|
if start_pos == -1:
|
||||||
|
break
|
||||||
|
|
||||||
Returns:
|
# Check for file name before code block
|
||||||
Union[List[dict], Tuple[List[dict], str]]:
|
file_name = ''
|
||||||
- If return_remaining_text is False: Returns only the list of code block dictionaries
|
file_name_match = remaining_text[:start_pos].rfind('<file_name>')
|
||||||
- If return_remaining_text is True: Returns a tuple containing:
|
if file_name_match != -1:
|
||||||
* List of code block dictionaries
|
file_name_end = remaining_text[:start_pos].rfind('</file_name>')
|
||||||
* String containing the text with all code blocks removed
|
if file_name_end != -1 and file_name_match < file_name_end:
|
||||||
|
file_name = remaining_text[file_name_match + 11:file_name_end].strip()
|
||||||
|
|
||||||
Each code block dictionary contains:
|
# Get code type if specified
|
||||||
- 'index' (int): The index of the code block in the text
|
code_type = ''
|
||||||
- 'file_name' (str): The name of the file extracted from the preceding line, if available
|
next_newline = remaining_text.find('\n', start_pos + 3)
|
||||||
- 'content' (str): The content of the code block
|
if next_newline != -1:
|
||||||
- 'type' (str): The type of the code block
|
potential_type = remaining_text[start_pos + 3:next_newline].strip()
|
||||||
- 'is_complete' (bool): True if the block has a closing tag, False otherwise
|
if potential_type:
|
||||||
"""
|
code_type = potential_type
|
||||||
remaining = text
|
start_pos = next_newline + 1
|
||||||
bloc_index = 0
|
else:
|
||||||
first_index = 0
|
start_pos += 3
|
||||||
indices = []
|
else:
|
||||||
text_without_blocks = text
|
start_pos += 3
|
||||||
|
|
||||||
# Find all code block delimiters
|
# Find matching end tag
|
||||||
while len(remaining) > 0:
|
tag_count = 1
|
||||||
try:
|
pos = start_pos
|
||||||
index = remaining.index("```")
|
content_start = start_pos
|
||||||
indices.append(index + first_index)
|
is_complete = False
|
||||||
remaining = remaining[index + 3:]
|
|
||||||
first_index += index + 3
|
|
||||||
bloc_index += 1
|
|
||||||
except Exception as ex:
|
|
||||||
if bloc_index % 2 == 1:
|
|
||||||
index = index+len(remaining)
|
|
||||||
indices.append(index)
|
|
||||||
remaining = ""
|
|
||||||
|
|
||||||
code_blocks = []
|
while pos < len(remaining_text):
|
||||||
is_start = True
|
if remaining_text[pos:pos + 3] == '```':
|
||||||
|
tag_count -= 1
|
||||||
|
if tag_count == 0:
|
||||||
|
# Found matching end tag
|
||||||
|
content = remaining_text[content_start:pos].strip()
|
||||||
|
is_complete = True
|
||||||
|
codes.append({
|
||||||
|
'index': current_index,
|
||||||
|
'file_name': file_name,
|
||||||
|
'content': content,
|
||||||
|
'type': code_type,
|
||||||
|
'is_complete': True
|
||||||
|
})
|
||||||
|
remaining_text = remaining_text[pos + 3:]
|
||||||
|
break
|
||||||
|
elif remaining_text[pos:pos + 3] == '```':
|
||||||
|
tag_count += 1
|
||||||
|
pos += 1
|
||||||
|
|
||||||
# Process code blocks and build text without blocks if requested
|
if not is_complete:
|
||||||
if return_remaining_text:
|
# Handle incomplete code block
|
||||||
text_parts = []
|
content = remaining_text[content_start:].strip()
|
||||||
last_end = 0
|
codes.append({
|
||||||
|
'index': current_index,
|
||||||
for index, code_delimiter_position in enumerate(indices):
|
'file_name': file_name,
|
||||||
if is_start:
|
'content': content,
|
||||||
block_infos = {
|
'type': code_type,
|
||||||
'index': len(code_blocks),
|
|
||||||
'file_name': "",
|
|
||||||
'section': "",
|
|
||||||
'content': "",
|
|
||||||
'type': "",
|
|
||||||
'is_complete': False
|
'is_complete': False
|
||||||
}
|
})
|
||||||
|
remaining_text = ''
|
||||||
|
|
||||||
# Store text before code block if returning remaining text
|
current_index += 1
|
||||||
if return_remaining_text:
|
|
||||||
text_parts.append(text[last_end:code_delimiter_position].strip())
|
|
||||||
|
|
||||||
# Check the preceding line for file name
|
|
||||||
preceding_text = text[:code_delimiter_position].strip().splitlines()
|
|
||||||
if preceding_text:
|
|
||||||
last_line = preceding_text[-1].strip()
|
|
||||||
if last_line.startswith("<file_name>") and last_line.endswith("</file_name>"):
|
|
||||||
file_name = last_line[len("<file_name>"):-len("</file_name>")].strip()
|
|
||||||
block_infos['file_name'] = file_name
|
|
||||||
elif last_line.startswith("## filename:"):
|
|
||||||
file_name = last_line[len("## filename:"):].strip()
|
|
||||||
block_infos['file_name'] = file_name
|
|
||||||
if last_line.startswith("<section>") and last_line.endswith("</section>"):
|
|
||||||
section = last_line[len("<section>"):-len("</section>")].strip()
|
|
||||||
block_infos['section'] = section
|
|
||||||
|
|
||||||
sub_text = text[code_delimiter_position + 3:]
|
|
||||||
if len(sub_text) > 0:
|
|
||||||
try:
|
|
||||||
find_space = sub_text.index(" ")
|
|
||||||
except:
|
|
||||||
find_space = int(1e10)
|
|
||||||
try:
|
|
||||||
find_return = sub_text.index("\n")
|
|
||||||
except:
|
|
||||||
find_return = int(1e10)
|
|
||||||
next_index = min(find_return, find_space)
|
|
||||||
if '{' in sub_text[:next_index]:
|
|
||||||
next_index = 0
|
|
||||||
start_pos = next_index
|
|
||||||
|
|
||||||
if code_delimiter_position + 3 < len(text) and text[code_delimiter_position + 3] in ["\n", " ", "\t"]:
|
|
||||||
block_infos["type"] = 'language-specific'
|
|
||||||
else:
|
|
||||||
block_infos["type"] = sub_text[:next_index]
|
|
||||||
|
|
||||||
if index + 1 < len(indices):
|
|
||||||
next_pos = indices[index + 1] - code_delimiter_position
|
|
||||||
if next_pos - 3>0:
|
|
||||||
if next_pos - 3 < len(sub_text) and sub_text[next_pos - 3] == "`":
|
|
||||||
block_infos["content"] = sub_text[start_pos:next_pos - 3].strip()
|
|
||||||
block_infos["is_complete"] = True
|
|
||||||
else:
|
|
||||||
block_infos["content"] = sub_text[start_pos:next_pos].strip()
|
|
||||||
block_infos["is_complete"] = False
|
|
||||||
|
|
||||||
if return_remaining_text:
|
if return_remaining_text:
|
||||||
last_end = indices[index + 1] + 3
|
return codes, remaining_text
|
||||||
else:
|
return codes
|
||||||
block_infos["content"] = sub_text[start_pos:].strip()
|
|
||||||
block_infos["is_complete"] = False
|
|
||||||
|
|
||||||
if return_remaining_text:
|
|
||||||
last_end = len(text)
|
|
||||||
|
|
||||||
code_blocks.append(block_infos)
|
|
||||||
is_start = False
|
|
||||||
else:
|
|
||||||
is_start = True
|
|
||||||
|
|
||||||
if return_remaining_text:
|
|
||||||
# Add any remaining text after the last code block
|
|
||||||
if last_end < len(text):
|
|
||||||
text_parts.append(text[last_end:].strip())
|
|
||||||
# Join all non-code parts with newlines
|
|
||||||
text_without_blocks = '\n'.join(filter(None, text_parts))
|
|
||||||
return code_blocks, text_without_blocks
|
|
||||||
|
|
||||||
return code_blocks
|
|
||||||
|
|
||||||
|
|
||||||
def build_and_execute_python_code(self,context, instructions, execution_function_signature, extra_imports=""):
|
def build_and_execute_python_code(self,context, instructions, execution_function_signature, extra_imports=""):
|
||||||
|
Loading…
Reference in New Issue
Block a user