feat: enhance search functionality in ShellFileOperations

- Updated the `_search_with_rg` and `_search_with_grep` methods to include filename in the output and improve result handling.
- Adjusted result fetching to account for context lines, ensuring accurate total counts and pagination.
- Enhanced parsing logic for matches and context lines, improving the accuracy of search results.
- Refactored result slicing to maintain consistency across output modes, ensuring users receive the correct number of results.
This commit is contained in:
teknium1 2026-02-19 15:10:17 -08:00
parent d49af633f0
commit 057d3e1810

View file

@ -808,7 +808,7 @@ class ShellFileOperations(FileOperations):
def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str], def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str],
limit: int, offset: int, output_mode: str, context: int) -> SearchResult: limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
"""Search using ripgrep.""" """Search using ripgrep."""
cmd_parts = ["rg", "--line-number", "--no-heading"] cmd_parts = ["rg", "--line-number", "--no-heading", "--with-filename"]
# Add context if requested # Add context if requested
if context > 0: if context > 0:
@ -828,16 +828,21 @@ class ShellFileOperations(FileOperations):
cmd_parts.append(self._escape_shell_arg(pattern)) cmd_parts.append(self._escape_shell_arg(pattern))
cmd_parts.append(self._escape_shell_arg(path)) cmd_parts.append(self._escape_shell_arg(path))
# Limit results # Fetch extra rows so we can report the true total before slicing.
cmd_parts.extend(["|", "head", "-n", str(limit + offset)]) # For context mode, rg emits separator lines ("--") between groups,
# so we grab generously and filter in Python.
fetch_limit = limit + offset + 200 if context > 0 else limit + offset
cmd_parts.extend(["|", "head", "-n", str(fetch_limit)])
cmd = " ".join(cmd_parts) cmd = " ".join(cmd_parts)
result = self._exec(cmd, timeout=60) result = self._exec(cmd, timeout=60)
# Parse results based on output mode # Parse results based on output mode
if output_mode == "files_only": if output_mode == "files_only":
files = [f for f in result.stdout.strip().split('\n') if f][offset:] all_files = [f for f in result.stdout.strip().split('\n') if f]
return SearchResult(files=files[:limit], total_count=len(files)) total = len(all_files)
page = all_files[offset:offset + limit]
return SearchResult(files=page, total_count=total)
elif output_mode == "count": elif output_mode == "count":
counts = {} counts = {}
@ -852,34 +857,54 @@ class ShellFileOperations(FileOperations):
return SearchResult(counts=counts, total_count=sum(counts.values())) return SearchResult(counts=counts, total_count=sum(counts.values()))
else: else:
# Parse content matches # Parse content matches and context lines.
# rg match lines: "file:lineno:content" (colon separator)
# rg context lines: "file-lineno-content" (dash separator)
# rg group seps: "--"
matches = [] matches = []
for line in result.stdout.strip().split('\n')[offset:]: for line in result.stdout.strip().split('\n'):
if not line: if not line or line == "--":
continue continue
# Format: file:line:content
# Try match line first (colon-separated: file:line:content)
parts = line.split(':', 2) parts = line.split(':', 2)
if len(parts) >= 3: if len(parts) >= 3:
try: try:
matches.append(SearchMatch( matches.append(SearchMatch(
path=parts[0], path=parts[0],
line_number=int(parts[1]), line_number=int(parts[1]),
content=parts[2][:500] # Truncate long lines content=parts[2][:500]
)) ))
continue
except ValueError: except ValueError:
# Line number not an int, skip
pass pass
# Try context line (dash-separated: file-line-content)
# Only attempt if context was requested to avoid false positives
if context > 0:
parts = line.split('-', 2)
if len(parts) >= 3:
try:
matches.append(SearchMatch(
path=parts[0],
line_number=int(parts[1]),
content=parts[2][:500]
))
except ValueError:
pass
total = len(matches)
page = matches[offset:offset + limit]
return SearchResult( return SearchResult(
matches=matches[:limit], matches=page,
total_count=len(matches), total_count=total,
truncated=len(matches) > limit truncated=total > offset + limit
) )
def _search_with_grep(self, pattern: str, path: str, file_glob: Optional[str], def _search_with_grep(self, pattern: str, path: str, file_glob: Optional[str],
limit: int, offset: int, output_mode: str, context: int) -> SearchResult: limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
"""Fallback search using grep.""" """Fallback search using grep."""
cmd_parts = ["grep", "-rn"] cmd_parts = ["grep", "-rnH"] # -H forces filename even for single-file searches
# Add context if requested # Add context if requested
if context > 0: if context > 0:
@ -899,16 +924,18 @@ class ShellFileOperations(FileOperations):
cmd_parts.append(self._escape_shell_arg(pattern)) cmd_parts.append(self._escape_shell_arg(pattern))
cmd_parts.append(self._escape_shell_arg(path)) cmd_parts.append(self._escape_shell_arg(path))
# Limit and offset # Fetch generously so we can compute total before slicing
cmd_parts.extend(["|", "tail", "-n", f"+{offset + 1}", "|", "head", "-n", str(limit)]) fetch_limit = limit + offset + (200 if context > 0 else 0)
cmd_parts.extend(["|", "head", "-n", str(fetch_limit)])
cmd = " ".join(cmd_parts) cmd = " ".join(cmd_parts)
result = self._exec(cmd, timeout=60) result = self._exec(cmd, timeout=60)
# Parse results (same format as rg)
if output_mode == "files_only": if output_mode == "files_only":
files = [f for f in result.stdout.strip().split('\n') if f] all_files = [f for f in result.stdout.strip().split('\n') if f]
return SearchResult(files=files, total_count=len(files)) total = len(all_files)
page = all_files[offset:offset + limit]
return SearchResult(files=page, total_count=total)
elif output_mode == "count": elif output_mode == "count":
counts = {} counts = {}
@ -923,10 +950,14 @@ class ShellFileOperations(FileOperations):
return SearchResult(counts=counts, total_count=sum(counts.values())) return SearchResult(counts=counts, total_count=sum(counts.values()))
else: else:
# grep match lines: "file:lineno:content" (colon)
# grep context lines: "file-lineno-content" (dash)
# grep group seps: "--"
matches = [] matches = []
for line in result.stdout.strip().split('\n'): for line in result.stdout.strip().split('\n'):
if not line: if not line or line == "--":
continue continue
parts = line.split(':', 2) parts = line.split(':', 2)
if len(parts) >= 3: if len(parts) >= 3:
try: try:
@ -935,10 +966,26 @@ class ShellFileOperations(FileOperations):
line_number=int(parts[1]), line_number=int(parts[1]),
content=parts[2][:500] content=parts[2][:500]
)) ))
continue
except ValueError: except ValueError:
pass pass
if context > 0:
parts = line.split('-', 2)
if len(parts) >= 3:
try:
matches.append(SearchMatch(
path=parts[0],
line_number=int(parts[1]),
content=parts[2][:500]
))
except ValueError:
pass
total = len(matches)
page = matches[offset:offset + limit]
return SearchResult( return SearchResult(
matches=matches, matches=page,
total_count=len(matches) total_count=total,
truncated=total > offset + limit
) )