From 6d1c5d44911a3be7551adc9e97b98c1a0c3ac001 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 17 Mar 2026 03:00:17 -0700 Subject: [PATCH] refactor(tools): extract position calculation logic in fuzzy_match (#1681) Extract the repeated line-position calculation pattern into a _calculate_line_positions() helper. The same 4-line pattern was duplicated across _strategy_trimmed_boundary, _strategy_block_anchor, _strategy_context_aware, and _find_normalized_matches. Also standardizes the end_pos clamping (some sites used min(), some used an if-guard). Based on PR #1604 by aydnOktay. Co-authored-by: aydnOktay --- tools/fuzzy_match.py | 51 ++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index f53451c6..ddcdf427 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -254,10 +254,9 @@ def _strategy_trimmed_boundary(content: str, pattern: str) -> List[Tuple[int, in if '\n'.join(check_lines) == modified_pattern: # Found match - calculate original positions - start_pos = sum(len(line) + 1 for line in content_lines[:i]) - end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1 - if end_pos >= len(content): - end_pos = len(content) + start_pos, end_pos = _calculate_line_positions( + content_lines, i, i + pattern_line_count, len(content) + ) matches.append((start_pos, end_pos)) return matches @@ -309,9 +308,10 @@ def _strategy_block_anchor(content: str, pattern: str) -> List[Tuple[int, int]]: if similarity >= threshold: # Calculate positions using ORIGINAL lines to ensure correct character offsets in the file - start_pos = sum(len(line) + 1 for line in orig_content_lines[:i]) - end_pos = sum(len(line) + 1 for line in orig_content_lines[:i + pattern_line_count]) - 1 - matches.append((start_pos, min(end_pos, len(content)))) + start_pos, end_pos = _calculate_line_positions( + orig_content_lines, i, i + pattern_line_count, len(content) + ) + matches.append((start_pos, end_pos)) return matches @@ -343,10 +343,9 @@ def _strategy_context_aware(content: str, pattern: str) -> List[Tuple[int, int]] # Need at least 50% of lines to have high similarity if high_similarity_count >= len(pattern_lines) * 0.5: - start_pos = sum(len(line) + 1 for line in content_lines[:i]) - end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1 - if end_pos >= len(content): - end_pos = len(content) + start_pos, end_pos = _calculate_line_positions( + content_lines, i, i + pattern_line_count, len(content) + ) matches.append((start_pos, end_pos)) return matches @@ -356,6 +355,26 @@ def _strategy_context_aware(content: str, pattern: str) -> List[Tuple[int, int]] # Helper Functions # ============================================================================= +def _calculate_line_positions(content_lines: List[str], start_line: int, + end_line: int, content_length: int) -> Tuple[int, int]: + """Calculate start and end character positions from line indices. + + Args: + content_lines: List of lines (without newlines) + start_line: Starting line index (0-based) + end_line: Ending line index (exclusive, 0-based) + content_length: Total length of the original content string + + Returns: + Tuple of (start_pos, end_pos) in the original content + """ + start_pos = sum(len(line) + 1 for line in content_lines[:start_line]) + end_pos = sum(len(line) + 1 for line in content_lines[:end_line]) - 1 + if end_pos >= content_length: + end_pos = content_length + return start_pos, end_pos + + def _find_normalized_matches(content: str, content_lines: List[str], content_normalized_lines: List[str], pattern: str, pattern_normalized: str) -> List[Tuple[int, int]]: @@ -383,13 +402,9 @@ def _find_normalized_matches(content: str, content_lines: List[str], if block == pattern_normalized: # Found a match - calculate original positions - start_pos = sum(len(line) + 1 for line in content_lines[:i]) - end_pos = sum(len(line) + 1 for line in content_lines[:i + num_pattern_lines]) - 1 - - # Handle case where end is past content - if end_pos >= len(content): - end_pos = len(content) - + start_pos, end_pos = _calculate_line_positions( + content_lines, i, i + num_pattern_lines, len(content) + ) matches.append((start_pos, end_pos)) return matches