fix(security): patch path traversal, size bypass, and prompt injection in document processing

- Sanitize filenames in cache_document_from_bytes to prevent path traversal (strip directory components, null bytes, resolve check) - Reject documents with None file_size instead of silently allowing download - Cap text file injection at 100 KB to prevent oversized prompt payloads - Sanitize display_name in run.py context notes to block prompt injection via filenames - Add 35 unit tests covering document cache utilities and Telegram document handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:53:46 -05:00 · 2026-02-27 11:53:46 -05:00 · fbb1923fad
commit fbb1923fad
parent b2172c4b2e
5 changed files with 516 additions and 6 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -209,11 +209,21 @@ def cache_document_from_bytes(data: bytes, filename: str) -> str:

    Returns:
        Absolute path to the cached document file as a string.
+
+    Raises:
+        ValueError: If the sanitized path escapes the cache directory.
    """
    cache_dir = get_document_cache_dir()
-    safe_name = filename if filename else "document"
+    # Sanitize: strip directory components, null bytes, and control characters
+    safe_name = Path(filename).name if filename else "document"
+    safe_name = safe_name.replace("\x00", "").strip()
+    if not safe_name or safe_name in (".", ".."):
+        safe_name = "document"
    cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
    filepath = cache_dir / cached_name
+    # Final safety check: ensure path stays inside cache dir
+    if not filepath.resolve().is_relative_to(cache_dir.resolve()):
+        raise ValueError(f"Path traversal rejected: {filename!r}")
    filepath.write_bytes(data)
    return str(filepath)

--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -513,10 +513,11 @@ class TelegramAdapter(BasePlatformAdapter):
                    return

                # Check file size (Telegram Bot API limit: 20 MB)
-                if doc.file_size and doc.file_size > 20 * 1024 * 1024:
+                MAX_DOC_BYTES = 20 * 1024 * 1024
+                if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
                    event.text = (
-                        "The document is too large (over 20 MB). "
-                        "Please send a smaller file."
+                        "The document is too large or its size could not be verified. "
+                        "Maximum: 20 MB."
                    )
                    print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
                    await self.handle_message(event)
@ -532,8 +533,9 @@ class TelegramAdapter(BasePlatformAdapter):
                event.media_types = [mime_type]
                print(f"[Telegram] Cached user document: {cached_path}", flush=True)

-                # For text files, inject content into event.text
-                if ext in (".md", ".txt"):
+                # For text files, inject content into event.text (capped at 100 KB)
+                MAX_TEXT_INJECT_BYTES = 100 * 1024
+                if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
                    try:
                        text_content = raw_bytes.decode("utf-8")
                        display_name = original_filename or f"document{ext}"
--- a/gateway/run.py
+++ b/gateway/run.py
@ -757,6 +757,9 @@ class GatewayRunner:
                # Format: doc_<12hex>_<original_filename>
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
+                # Sanitize to prevent prompt injection via filenames
+                import re as _re
+                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (