fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check)
- Reject documents with None file_size instead of silently allowing download
- Cap text file injection at 100 KB to prevent oversized prompt payloads
- Sanitize display_name in run.py context notes to block prompt injection via filenames
- Add 35 unit tests covering document cache utilities and Telegram document handling

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
tekelala 2026-02-27 11:53:46 -05:00
parent b2172c4b2e
commit fbb1923fad
5 changed files with 516 additions and 6 deletions

View file

@ -209,11 +209,21 @@ def cache_document_from_bytes(data: bytes, filename: str) -> str:
Returns:
Absolute path to the cached document file as a string.
Raises:
ValueError: If the sanitized path escapes the cache directory.
"""
cache_dir = get_document_cache_dir()
safe_name = filename if filename else "document"
# Sanitize: strip directory components, null bytes, and control characters
safe_name = Path(filename).name if filename else "document"
safe_name = safe_name.replace("\x00", "").strip()
if not safe_name or safe_name in (".", ".."):
safe_name = "document"
cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
filepath = cache_dir / cached_name
# Final safety check: ensure path stays inside cache dir
if not filepath.resolve().is_relative_to(cache_dir.resolve()):
raise ValueError(f"Path traversal rejected: {filename!r}")
filepath.write_bytes(data)
return str(filepath)

View file

@ -513,10 +513,11 @@ class TelegramAdapter(BasePlatformAdapter):
return
# Check file size (Telegram Bot API limit: 20 MB)
if doc.file_size and doc.file_size > 20 * 1024 * 1024:
MAX_DOC_BYTES = 20 * 1024 * 1024
if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
event.text = (
"The document is too large (over 20 MB). "
"Please send a smaller file."
"The document is too large or its size could not be verified. "
"Maximum: 20 MB."
)
print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
await self.handle_message(event)
@ -532,8 +533,9 @@ class TelegramAdapter(BasePlatformAdapter):
event.media_types = [mime_type]
print(f"[Telegram] Cached user document: {cached_path}", flush=True)
# For text files, inject content into event.text
if ext in (".md", ".txt"):
# For text files, inject content into event.text (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"

View file

@ -757,6 +757,9 @@ class GatewayRunner:
# Format: doc_<12hex>_<original_filename>
parts = basename.split("_", 2)
display_name = parts[2] if len(parts) >= 3 else basename
# Sanitize to prevent prompt injection via filenames
import re as _re
display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
if mtype.startswith("text/"):
context_note = (