fix: platform base extract_images and truncate_message bugs + tests

- extract_images: only remove extracted image tags from content, preserve non-image markdown links (e.g. PDFs) that were previously silently lost - truncate_message: walk only chunk_body (not prepended prefix) so the reopened code fence does not toggle in_code off, leaving continuation chunks with unclosed code blocks - Add 49 unit tests covering MessageEvent command parsing, extract_images, extract_media, truncate_message code block handling, and _get_human_delay
2026-02-28 21:21:03 +03:00 · 2026-02-28 21:21:03 +03:00 · ff6d62802d
commit ff6d62802d
parent 6366177118
2 changed files with 357 additions and 6 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -460,10 +460,14 @@ class BasePlatformAdapter(ABC):
            url = match.group(1)
            images.append((url, ""))
        
-        # Remove matched image tags from content if we found images
+        # Remove only the matched image tags from content (not all markdown images)
        if images:
-            cleaned = re.sub(md_pattern, '', cleaned)
-            cleaned = re.sub(html_pattern, '', cleaned)
+            extracted_urls = {url for url, _ in images}
+            def _remove_if_extracted(match):
+                url = match.group(2) if match.lastindex >= 2 else match.group(1)
+                return '' if url in extracted_urls else match.group(0)
+            cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
+            cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
            # Clean up leftover blank lines
            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
        
@ -798,11 +802,11 @@ class BasePlatformAdapter(ABC):

            full_chunk = prefix + chunk_body

-            # Walk the chunk line-by-line to determine whether we end
-            # inside an open code block.
+            # Walk only the chunk_body (not the prefix we prepended) to
+            # determine whether we end inside an open code block.
            in_code = carry_lang is not None
            lang = carry_lang or ""
-            for line in full_chunk.split("\n"):
+            for line in chunk_body.split("\n"):
                stripped = line.strip()
                if stripped.startswith("```"):
                    if in_code: