Hermes Agent UX Improvements

2026-02-22 02:16:11 -08:00 · 2026-02-22 02:16:11 -08:00 · ededaaa874
commit ededaaa874
parent b1f55e3ee5
23 changed files with 945 additions and 1545 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC):
    
    def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
        """
-        Split a long message into chunks.
-        
+        Split a long message into chunks, preserving code block boundaries.
+
+        When a split falls inside a triple-backtick code block, the fence is
+        closed at the end of the current chunk and reopened (with the original
+        language tag) at the start of the next chunk.  Multi-chunk responses
+        receive indicators like ``(1/3)``.
+
        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
-        
+
        Returns:
            List of message chunks
        """
        if len(content) <= max_length:
            return [content]
-        
-        chunks = []
-        while content:
-            if len(content) <= max_length:
-                chunks.append(content)
+
+        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
+        FENCE_CLOSE = "\n```"
+
+        chunks: List[str] = []
+        remaining = content
+        # When the previous chunk ended mid-code-block, this holds the
+        # language tag (possibly "") so we can reopen the fence.
+        carry_lang: Optional[str] = None
+
+        while remaining:
+            # If we're continuing a code block from the previous chunk,
+            # prepend a new opening fence with the same language tag.
+            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
+
+            # How much body text we can fit after accounting for the prefix,
+            # a potential closing fence, and the chunk indicator.
+            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+            if headroom < 1:
+                headroom = max_length // 2
+
+            # Everything remaining fits in one final chunk
+            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+                chunks.append(prefix + remaining)
                break
-            
-            # Try to split at a newline
-            split_idx = content.rfind("\n", 0, max_length)
-            if split_idx == -1:
-                # No newline, split at space
-                split_idx = content.rfind(" ", 0, max_length)
-            if split_idx == -1:
-                # No space either, hard split
-                split_idx = max_length
-            
-            chunks.append(content[:split_idx])
-            content = content[split_idx:].lstrip()
-        
+
+            # Find a natural split point (prefer newlines, then spaces)
+            region = remaining[:headroom]
+            split_at = region.rfind("\n")
+            if split_at < headroom // 2:
+                split_at = region.rfind(" ")
+            if split_at < 1:
+                split_at = headroom
+
+            chunk_body = remaining[:split_at]
+            remaining = remaining[split_at:].lstrip()
+
+            full_chunk = prefix + chunk_body
+
+            # Walk the chunk line-by-line to determine whether we end
+            # inside an open code block.
+            in_code = carry_lang is not None
+            lang = carry_lang or ""
+            for line in full_chunk.split("\n"):
+                stripped = line.strip()
+                if stripped.startswith("```"):
+                    if in_code:
+                        in_code = False
+                        lang = ""
+                    else:
+                        in_code = True
+                        tag = stripped[3:].strip()
+                        lang = tag.split()[0] if tag else ""
+
+            if in_code:
+                # Close the orphaned fence so the chunk is valid on its own
+                full_chunk += FENCE_CLOSE
+                carry_lang = lang
+            else:
+                carry_lang = None
+
+            chunks.append(full_chunk)
+
+        # Append chunk indicators when the response spans multiple messages
+        if len(chunks) > 1:
+            total = len(chunks)
+            chunks = [
+                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
+            ]
+
        return chunks
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
 """

 import asyncio
+import re
 from typing import Dict, List, Optional, Any

 try:
@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
    return TELEGRAM_AVAILABLE


+# Matches every character that MarkdownV2 requires to be backslash-escaped
+# when it appears outside a code span or fenced code block.
+_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
+
+
+def _escape_mdv2(text: str) -> str:
+    """Escape Telegram MarkdownV2 special characters with a preceding backslash."""
+    return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
+
+
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    msg = await self._bot.send_message(
                        chat_id=int(chat_id),
                        text=chunk,
-                        parse_mode=ParseMode.MARKDOWN,
+                        parse_mode=ParseMode.MARKDOWN_V2,
                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                        message_thread_id=int(thread_id) if thread_id else None,
                    )
@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
    
    def format_message(self, content: str) -> str:
        """
-        Format message for Telegram.
-        
-        Telegram uses a subset of markdown. We'll use the simpler
-        Markdown mode (not MarkdownV2) for compatibility.
+        Convert standard markdown to Telegram MarkdownV2 format.
+
+        Protected regions (code blocks, inline code) are extracted first so
+        their contents are never modified.  Standard markdown constructs
+        (headers, bold, italic, links) are translated to MarkdownV2 syntax,
+        and all remaining special characters are escaped.
        """
-        # Basic escaping for Telegram Markdown
-        # In Markdown mode (not V2), only certain characters need escaping
-        return content
+        if not content:
+            return content
+
+        placeholders: dict = {}
+        counter = [0]
+
+        def _ph(value: str) -> str:
+            """Stash *value* behind a placeholder token that survives escaping."""
+            key = f"\x00PH{counter[0]}\x00"
+            counter[0] += 1
+            placeholders[key] = value
+            return key
+
+        text = content
+
+        # 1) Protect fenced code blocks (``` ... ```)
+        text = re.sub(
+            r'(```(?:[^\n]*\n)?[\s\S]*?```)',
+            lambda m: _ph(m.group(0)),
+            text,
+        )
+
+        # 2) Protect inline code (`...`)
+        text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
+
+        # 3) Convert markdown links – escape the display text; inside the URL
+        #    only ')' and '\' need escaping per the MarkdownV2 spec.
+        def _convert_link(m):
+            display = _escape_mdv2(m.group(1))
+            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
+            return _ph(f'[{display}]({url})')
+
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+
+        # 4) Convert markdown headers (## Title) → bold *Title*
+        def _convert_header(m):
+            inner = m.group(1).strip()
+            # Strip redundant bold markers that may appear inside a header
+            inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
+            return _ph(f'*{_escape_mdv2(inner)}*')
+
+        text = re.sub(
+            r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
+        )
+
+        # 5) Convert bold: **text** → *text* (MarkdownV2 bold)
+        text = re.sub(
+            r'\*\*(.+?)\*\*',
+            lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
+            text,
+        )
+
+        # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
+        text = re.sub(
+            r'\*([^*]+)\*',
+            lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
+            text,
+        )
+
+        # 7) Escape remaining special characters in plain text
+        text = _escape_mdv2(text)
+
+        # 8) Restore placeholders in reverse insertion order so that
+        #    nested references (a placeholder inside another) resolve correctly.
+        for key in reversed(list(placeholders.keys())):
+            text = text.replace(key, placeholders[key])
+
+        return text
    
    async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming text messages."""