refactor(tools): extract position calculation logic in fuzzy_match (#1681)
Extract the repeated line-position calculation pattern into a _calculate_line_positions() helper. The same 4-line pattern was duplicated across _strategy_trimmed_boundary, _strategy_block_anchor, _strategy_context_aware, and _find_normalized_matches. Also standardizes the end_pos clamping (some sites used min(), some used an if-guard). Based on PR #1604 by aydnOktay. Co-authored-by: aydnOktay <aydnOktay@users.noreply.github.com>
This commit is contained in:
parent
30c417fe70
commit
6d1c5d4491
1 changed files with 33 additions and 18 deletions
|
|
@ -254,10 +254,9 @@ def _strategy_trimmed_boundary(content: str, pattern: str) -> List[Tuple[int, in
|
||||||
|
|
||||||
if '\n'.join(check_lines) == modified_pattern:
|
if '\n'.join(check_lines) == modified_pattern:
|
||||||
# Found match - calculate original positions
|
# Found match - calculate original positions
|
||||||
start_pos = sum(len(line) + 1 for line in content_lines[:i])
|
start_pos, end_pos = _calculate_line_positions(
|
||||||
end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1
|
content_lines, i, i + pattern_line_count, len(content)
|
||||||
if end_pos >= len(content):
|
)
|
||||||
end_pos = len(content)
|
|
||||||
matches.append((start_pos, end_pos))
|
matches.append((start_pos, end_pos))
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
|
|
@ -309,9 +308,10 @@ def _strategy_block_anchor(content: str, pattern: str) -> List[Tuple[int, int]]:
|
||||||
|
|
||||||
if similarity >= threshold:
|
if similarity >= threshold:
|
||||||
# Calculate positions using ORIGINAL lines to ensure correct character offsets in the file
|
# Calculate positions using ORIGINAL lines to ensure correct character offsets in the file
|
||||||
start_pos = sum(len(line) + 1 for line in orig_content_lines[:i])
|
start_pos, end_pos = _calculate_line_positions(
|
||||||
end_pos = sum(len(line) + 1 for line in orig_content_lines[:i + pattern_line_count]) - 1
|
orig_content_lines, i, i + pattern_line_count, len(content)
|
||||||
matches.append((start_pos, min(end_pos, len(content))))
|
)
|
||||||
|
matches.append((start_pos, end_pos))
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
|
|
@ -343,10 +343,9 @@ def _strategy_context_aware(content: str, pattern: str) -> List[Tuple[int, int]]
|
||||||
|
|
||||||
# Need at least 50% of lines to have high similarity
|
# Need at least 50% of lines to have high similarity
|
||||||
if high_similarity_count >= len(pattern_lines) * 0.5:
|
if high_similarity_count >= len(pattern_lines) * 0.5:
|
||||||
start_pos = sum(len(line) + 1 for line in content_lines[:i])
|
start_pos, end_pos = _calculate_line_positions(
|
||||||
end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1
|
content_lines, i, i + pattern_line_count, len(content)
|
||||||
if end_pos >= len(content):
|
)
|
||||||
end_pos = len(content)
|
|
||||||
matches.append((start_pos, end_pos))
|
matches.append((start_pos, end_pos))
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
|
|
@ -356,6 +355,26 @@ def _strategy_context_aware(content: str, pattern: str) -> List[Tuple[int, int]]
|
||||||
# Helper Functions
|
# Helper Functions
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
def _calculate_line_positions(content_lines: List[str], start_line: int,
|
||||||
|
end_line: int, content_length: int) -> Tuple[int, int]:
|
||||||
|
"""Calculate start and end character positions from line indices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_lines: List of lines (without newlines)
|
||||||
|
start_line: Starting line index (0-based)
|
||||||
|
end_line: Ending line index (exclusive, 0-based)
|
||||||
|
content_length: Total length of the original content string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (start_pos, end_pos) in the original content
|
||||||
|
"""
|
||||||
|
start_pos = sum(len(line) + 1 for line in content_lines[:start_line])
|
||||||
|
end_pos = sum(len(line) + 1 for line in content_lines[:end_line]) - 1
|
||||||
|
if end_pos >= content_length:
|
||||||
|
end_pos = content_length
|
||||||
|
return start_pos, end_pos
|
||||||
|
|
||||||
|
|
||||||
def _find_normalized_matches(content: str, content_lines: List[str],
|
def _find_normalized_matches(content: str, content_lines: List[str],
|
||||||
content_normalized_lines: List[str],
|
content_normalized_lines: List[str],
|
||||||
pattern: str, pattern_normalized: str) -> List[Tuple[int, int]]:
|
pattern: str, pattern_normalized: str) -> List[Tuple[int, int]]:
|
||||||
|
|
@ -383,13 +402,9 @@ def _find_normalized_matches(content: str, content_lines: List[str],
|
||||||
|
|
||||||
if block == pattern_normalized:
|
if block == pattern_normalized:
|
||||||
# Found a match - calculate original positions
|
# Found a match - calculate original positions
|
||||||
start_pos = sum(len(line) + 1 for line in content_lines[:i])
|
start_pos, end_pos = _calculate_line_positions(
|
||||||
end_pos = sum(len(line) + 1 for line in content_lines[:i + num_pattern_lines]) - 1
|
content_lines, i, i + num_pattern_lines, len(content)
|
||||||
|
)
|
||||||
# Handle case where end is past content
|
|
||||||
if end_pos >= len(content):
|
|
||||||
end_pos = len(content)
|
|
||||||
|
|
||||||
matches.append((start_pos, end_pos))
|
matches.append((start_pos, end_pos))
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue