Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks

Major feature additions inspired by OpenClaw/ClawdBot integration analysis:

Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)

Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description

Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling

Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads

DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending

Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)

Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications

Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings

Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style

Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
This commit is contained in:
teknium1 2026-02-15 21:38:59 -08:00
parent 5404a8fcd8
commit 69aa35a51c
23 changed files with 2080 additions and 32 deletions

View file

@ -108,6 +108,65 @@ def cleanup_image_cache(max_age_hours: int = 24) -> int:
return removed
# ---------------------------------------------------------------------------
# Audio cache utilities
#
# Same pattern as image cache -- voice messages from platforms are downloaded
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
# ---------------------------------------------------------------------------
AUDIO_CACHE_DIR = Path(os.path.expanduser("~/.hermes/audio_cache"))
def get_audio_cache_dir() -> Path:
"""Return the audio cache directory, creating it if it doesn't exist."""
AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return AUDIO_CACHE_DIR
def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
"""
Save raw audio bytes to the cache and return the absolute file path.
Args:
data: Raw audio bytes.
ext: File extension including the dot (e.g. ".ogg", ".mp3").
Returns:
Absolute path to the cached audio file as a string.
"""
cache_dir = get_audio_cache_dir()
filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
filepath = cache_dir / filename
filepath.write_bytes(data)
return str(filepath)
async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
"""
Download an audio file from a URL and save it to the local cache.
Args:
url: The HTTP/HTTPS URL to download from.
ext: File extension including the dot (e.g. ".ogg", ".mp3").
Returns:
Absolute path to the cached audio file as a string.
"""
import httpx
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
response = await client.get(
url,
headers={
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
"Accept": "audio/*,*/*;q=0.8",
},
)
response.raise_for_status()
return cache_audio_from_bytes(response.content, ext)
class MessageType(Enum):
"""Types of incoming messages."""
TEXT = "text"
@ -422,6 +481,27 @@ class BasePlatformAdapter(ABC):
# Spawn background task to process this message
asyncio.create_task(self._process_message_background(event, session_key))
@staticmethod
def _get_human_delay() -> float:
"""
Return a random delay in seconds for human-like response pacing.
Reads from env vars:
HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
"""
import random
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
if mode == "off":
return 0.0
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
if mode == "natural":
min_ms, max_ms = 800, 2500
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
"""Background task that actually processes the message."""
# Create interrupt event for this session
@ -463,8 +543,13 @@ class BasePlatformAdapter(ABC):
if not fallback_result.success:
print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
# Human-like pacing delay between text and media
human_delay = self._get_human_delay()
# Send extracted images as native attachments
for image_url, alt_text in images:
if human_delay > 0:
await asyncio.sleep(human_delay)
try:
img_result = await self.send_image(
chat_id=event.source.chat_id,
@ -478,6 +563,8 @@ class BasePlatformAdapter(ABC):
# Send extracted audio/voice files as native attachments
for audio_path, is_voice in media_files:
if human_delay > 0:
await asyncio.sleep(human_delay)
try:
voice_result = await self.send_voice(
chat_id=event.source.chat_id,

View file

@ -33,6 +33,7 @@ from gateway.platforms.base import (
MessageType,
SendResult,
cache_image_from_url,
cache_audio_from_url,
)
@ -49,7 +50,10 @@ class DiscordAdapter(BasePlatformAdapter):
- Receiving messages from servers and DMs
- Sending responses with Discord markdown
- Thread support
- Slash commands (future)
- Native slash commands (/ask, /reset, /status, /stop)
- Button-based exec approvals
- Auto-threading for long conversations
- Reaction-based feedback
"""
# Discord message limits
@ -59,6 +63,7 @@ class DiscordAdapter(BasePlatformAdapter):
super().__init__(config, Platform.DISCORD)
self._client: Optional[commands.Bot] = None
self._ready_event = asyncio.Event()
self._allowed_user_ids: set = set() # For button approval authorization
async def connect(self) -> bool:
"""Connect to Discord and start receiving events."""
@ -83,10 +88,23 @@ class DiscordAdapter(BasePlatformAdapter):
intents=intents,
)
# Parse allowed user IDs for button authorization
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
if allowed_env:
self._allowed_user_ids = {
uid.strip() for uid in allowed_env.split(",") if uid.strip()
}
# Register event handlers
@self._client.event
async def on_ready():
print(f"[{self.name}] Connected as {self._client.user}")
# Sync slash commands with Discord
try:
synced = await self._client.tree.sync()
print(f"[{self.name}] Synced {len(synced)} slash command(s)")
except Exception as e:
print(f"[{self.name}] Slash command sync failed: {e}")
self._ready_event.set()
@self._client.event
@ -96,6 +114,9 @@ class DiscordAdapter(BasePlatformAdapter):
return
await self._handle_message(message)
# Register slash commands
self._register_slash_commands()
# Start the bot in background
asyncio.create_task(self._client.start(self.config.token))
@ -325,6 +346,116 @@ class DiscordAdapter(BasePlatformAdapter):
# Discord markdown is fairly standard, no special escaping needed
return content
def _register_slash_commands(self) -> None:
"""Register Discord slash commands on the command tree."""
if not self._client:
return
tree = self._client.tree
@tree.command(name="ask", description="Ask Hermes a question")
@discord.app_commands.describe(question="Your question for Hermes")
async def slash_ask(interaction: discord.Interaction, question: str):
await interaction.response.defer()
event = self._build_slash_event(interaction, question)
await self.handle_message(event)
# The response is sent via the normal send() flow
# Send a followup to close the interaction if needed
try:
await interaction.followup.send("Processing complete~", ephemeral=True)
except Exception:
pass
@tree.command(name="reset", description="Reset your Hermes session")
async def slash_reset(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/reset")
await self.handle_message(event)
try:
await interaction.followup.send("Session reset~", ephemeral=True)
except Exception:
pass
@tree.command(name="status", description="Show Hermes session status")
async def slash_status(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/status")
await self.handle_message(event)
try:
await interaction.followup.send("Status sent~", ephemeral=True)
except Exception:
pass
@tree.command(name="stop", description="Stop the running Hermes agent")
async def slash_stop(interaction: discord.Interaction):
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, "/stop")
await self.handle_message(event)
try:
await interaction.followup.send("Stop requested~", ephemeral=True)
except Exception:
pass
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
"""Build a MessageEvent from a Discord slash command interaction."""
is_dm = isinstance(interaction.channel, discord.DMChannel)
chat_type = "dm" if is_dm else "group"
chat_name = ""
if not is_dm and hasattr(interaction.channel, "name"):
chat_name = interaction.channel.name
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
source = self.build_source(
chat_id=str(interaction.channel_id),
chat_name=chat_name,
chat_type=chat_type,
user_id=str(interaction.user.id),
user_name=interaction.user.display_name,
)
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
return MessageEvent(
text=text,
message_type=msg_type,
source=source,
raw_message=interaction,
)
async def send_exec_approval(
self, chat_id: str, command: str, approval_id: str
) -> SendResult:
"""
Send a button-based exec approval prompt for a dangerous command.
Returns SendResult. The approval is resolved when a user clicks a button.
"""
if not self._client or not DISCORD_AVAILABLE:
return SendResult(success=False, error="Not connected")
try:
channel = self._client.get_channel(int(chat_id))
if not channel:
channel = await self._client.fetch_channel(int(chat_id))
embed = discord.Embed(
title="Command Approval Required",
description=f"```\n{command[:500]}\n```",
color=discord.Color.orange(),
)
embed.set_footer(text=f"Approval ID: {approval_id}")
view = ExecApprovalView(
approval_id=approval_id,
allowed_user_ids=self._allowed_user_ids,
)
msg = await channel.send(embed=embed, view=view)
return SendResult(success=True, message_id=str(msg.id))
except Exception as e:
return SendResult(success=False, error=str(e))
async def _handle_message(self, message: DiscordMessage) -> None:
"""Handle incoming Discord messages."""
# In server channels (not DMs), require the bot to be @mentioned
@ -424,8 +555,21 @@ class DiscordAdapter(BasePlatformAdapter):
# Fall back to the CDN URL if caching fails
media_urls.append(att.url)
media_types.append(content_type)
elif content_type.startswith("audio/"):
try:
ext = "." + content_type.split("/")[-1].split(";")[0]
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
ext = ".ogg"
cached_path = await cache_audio_from_url(att.url, ext=ext)
media_urls.append(cached_path)
media_types.append(content_type)
print(f"[Discord] Cached user audio: {cached_path}", flush=True)
except Exception as e:
print(f"[Discord] Failed to cache audio attachment: {e}", flush=True)
media_urls.append(att.url)
media_types.append(content_type)
else:
# Non-image attachments: keep the original URL
# Other attachments: keep the original URL
media_urls.append(att.url)
media_types.append(content_type)
@ -442,3 +586,94 @@ class DiscordAdapter(BasePlatformAdapter):
)
await self.handle_message(event)
# ---------------------------------------------------------------------------
# Discord UI Components (outside the adapter class)
# ---------------------------------------------------------------------------
if DISCORD_AVAILABLE:
class ExecApprovalView(discord.ui.View):
"""
Interactive button view for exec approval of dangerous commands.
Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
Only users in the allowed list can click. The view times out after 5 minutes.
"""
def __init__(self, approval_id: str, allowed_user_ids: set):
super().__init__(timeout=300) # 5-minute timeout
self.approval_id = approval_id
self.allowed_user_ids = allowed_user_ids
self.resolved = False
def _check_auth(self, interaction: discord.Interaction) -> bool:
"""Verify the user clicking is authorized."""
if not self.allowed_user_ids:
return True # No allowlist = anyone can approve
return str(interaction.user.id) in self.allowed_user_ids
async def _resolve(
self, interaction: discord.Interaction, action: str, color: discord.Color
):
"""Resolve the approval and update the message."""
if self.resolved:
await interaction.response.send_message(
"This approval has already been resolved~", ephemeral=True
)
return
if not self._check_auth(interaction):
await interaction.response.send_message(
"You're not authorized to approve commands~", ephemeral=True
)
return
self.resolved = True
# Update the embed with the decision
embed = interaction.message.embeds[0] if interaction.message.embeds else None
if embed:
embed.color = color
embed.set_footer(text=f"{action} by {interaction.user.display_name}")
# Disable all buttons
for child in self.children:
child.disabled = True
await interaction.response.edit_message(embed=embed, view=self)
# Store the approval decision for the gateway to pick up
try:
from tools.terminal_tool import _session_approved_patterns
if action == "allow_once":
pass # One-time approval handled by gateway
elif action == "allow_always":
_session_approved_patterns.add(self.approval_id)
except ImportError:
pass
@discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
async def allow_once(
self, interaction: discord.Interaction, button: discord.ui.Button
):
await self._resolve(interaction, "allow_once", discord.Color.green())
@discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
async def allow_always(
self, interaction: discord.Interaction, button: discord.ui.Button
):
await self._resolve(interaction, "allow_always", discord.Color.blue())
@discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
async def deny(
self, interaction: discord.Interaction, button: discord.ui.Button
):
await self._resolve(interaction, "deny", discord.Color.red())
async def on_timeout(self):
"""Handle view timeout -- disable buttons and mark as expired."""
self.resolved = True
for child in self.children:
child.disabled = True

374
gateway/platforms/slack.py Normal file
View file

@ -0,0 +1,374 @@
"""
Slack platform adapter.
Uses slack-bolt (Python) with Socket Mode for:
- Receiving messages from channels and DMs
- Sending responses back
- Handling slash commands
- Thread support
"""
import asyncio
import os
from typing import Dict, List, Optional, Any
try:
from slack_bolt.async_app import AsyncApp
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
from slack_sdk.web.async_client import AsyncWebClient
SLACK_AVAILABLE = True
except ImportError:
SLACK_AVAILABLE = False
AsyncApp = Any
AsyncSocketModeHandler = Any
AsyncWebClient = Any
import sys
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SendResult,
cache_image_from_url,
cache_audio_from_url,
)
def check_slack_requirements() -> bool:
"""Check if Slack dependencies are available."""
return SLACK_AVAILABLE
class SlackAdapter(BasePlatformAdapter):
"""
Slack bot adapter using Socket Mode.
Requires two tokens:
- SLACK_BOT_TOKEN (xoxb-...) for API calls
- SLACK_APP_TOKEN (xapp-...) for Socket Mode connection
Features:
- DMs and channel messages (mention-gated in channels)
- Thread support
- File/image/audio attachments
- Slash commands (/hermes)
- Typing indicators (not natively supported by Slack bots)
"""
MAX_MESSAGE_LENGTH = 4000 # Slack's limit is higher but mrkdwn can inflate
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.SLACK)
self._app: Optional[AsyncApp] = None
self._handler: Optional[AsyncSocketModeHandler] = None
self._bot_user_id: Optional[str] = None
async def connect(self) -> bool:
"""Connect to Slack via Socket Mode."""
if not SLACK_AVAILABLE:
print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
return False
bot_token = self.config.token
app_token = os.getenv("SLACK_APP_TOKEN")
if not bot_token:
print("[Slack] SLACK_BOT_TOKEN not set")
return False
if not app_token:
print("[Slack] SLACK_APP_TOKEN not set")
return False
try:
self._app = AsyncApp(token=bot_token)
# Get our own bot user ID for mention detection
auth_response = await self._app.client.auth_test()
self._bot_user_id = auth_response.get("user_id")
bot_name = auth_response.get("user", "unknown")
# Register message event handler
@self._app.event("message")
async def handle_message_event(event, say):
await self._handle_slack_message(event)
# Register slash command handler
@self._app.command("/hermes")
async def handle_hermes_command(ack, command):
await ack()
await self._handle_slash_command(command)
# Start Socket Mode handler in background
self._handler = AsyncSocketModeHandler(self._app, app_token)
asyncio.create_task(self._handler.start_async())
self._running = True
print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
return True
except Exception as e:
print(f"[Slack] Connection failed: {e}")
return False
async def disconnect(self) -> None:
"""Disconnect from Slack."""
if self._handler:
await self._handler.close_async()
self._running = False
print("[Slack] Disconnected")
async def send(
self,
chat_id: str,
content: str,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a message to a Slack channel or DM."""
if not self._app:
return SendResult(success=False, error="Not connected")
try:
kwargs = {
"channel": chat_id,
"text": content,
}
# Reply in thread if thread_ts is available
if reply_to:
kwargs["thread_ts"] = reply_to
elif metadata and metadata.get("thread_ts"):
kwargs["thread_ts"] = metadata["thread_ts"]
result = await self._app.client.chat_postMessage(**kwargs)
return SendResult(
success=True,
message_id=result.get("ts"),
raw_response=result,
)
except Exception as e:
print(f"[Slack] Send error: {e}")
return SendResult(success=False, error=str(e))
async def send_typing(self, chat_id: str) -> None:
"""Slack doesn't have a direct typing indicator API for bots."""
pass
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send an image to Slack by uploading the URL as a file."""
if not self._app:
return SendResult(success=False, error="Not connected")
try:
import httpx
# Download the image first
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
response = await client.get(image_url)
response.raise_for_status()
result = await self._app.client.files_upload_v2(
channel=chat_id,
content=response.content,
filename="image.png",
initial_comment=caption or "",
thread_ts=reply_to,
)
return SendResult(success=True, raw_response=result)
except Exception as e:
# Fall back to sending the URL as text
text = f"{caption}\n{image_url}" if caption else image_url
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send an audio file to Slack."""
if not self._app:
return SendResult(success=False, error="Not connected")
try:
result = await self._app.client.files_upload_v2(
channel=chat_id,
file=audio_path,
filename=os.path.basename(audio_path),
initial_comment=caption or "",
thread_ts=reply_to,
)
return SendResult(success=True, raw_response=result)
except Exception as e:
return SendResult(success=False, error=str(e))
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Get information about a Slack channel."""
if not self._app:
return {"name": chat_id, "type": "unknown"}
try:
result = await self._app.client.conversations_info(channel=chat_id)
channel = result.get("channel", {})
is_dm = channel.get("is_im", False)
return {
"name": channel.get("name", chat_id),
"type": "dm" if is_dm else "group",
}
except Exception:
return {"name": chat_id, "type": "unknown"}
# ----- Internal handlers -----
async def _handle_slack_message(self, event: dict) -> None:
"""Handle an incoming Slack message event."""
# Ignore bot messages (including our own)
if event.get("bot_id") or event.get("subtype") == "bot_message":
return
# Ignore message edits and deletions
subtype = event.get("subtype")
if subtype in ("message_changed", "message_deleted"):
return
text = event.get("text", "")
user_id = event.get("user", "")
channel_id = event.get("channel", "")
thread_ts = event.get("thread_ts") or event.get("ts")
ts = event.get("ts", "")
# Determine if this is a DM or channel message
channel_type = event.get("channel_type", "")
is_dm = channel_type == "im"
# In channels, only respond if bot is mentioned
if not is_dm and self._bot_user_id:
if f"<@{self._bot_user_id}>" not in text:
return
# Strip the bot mention from the text
text = text.replace(f"<@{self._bot_user_id}>", "").strip()
# Determine message type
msg_type = MessageType.TEXT
if text.startswith("/"):
msg_type = MessageType.COMMAND
# Handle file attachments
media_urls = []
media_types = []
files = event.get("files", [])
for f in files:
mimetype = f.get("mimetype", "unknown")
url = f.get("url_private_download") or f.get("url_private", "")
if mimetype.startswith("image/") and url:
try:
ext = "." + mimetype.split("/")[-1].split(";")[0]
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
ext = ".jpg"
# Slack private URLs require the bot token as auth header
cached = await self._download_slack_file(url, ext)
media_urls.append(cached)
media_types.append(mimetype)
msg_type = MessageType.PHOTO
except Exception as e:
print(f"[Slack] Failed to cache image: {e}", flush=True)
elif mimetype.startswith("audio/") and url:
try:
ext = "." + mimetype.split("/")[-1].split(";")[0]
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
ext = ".ogg"
cached = await self._download_slack_file(url, ext, audio=True)
media_urls.append(cached)
media_types.append(mimetype)
msg_type = MessageType.VOICE
except Exception as e:
print(f"[Slack] Failed to cache audio: {e}", flush=True)
# Build source
source = self.build_source(
chat_id=channel_id,
chat_name=channel_id, # Will be resolved later if needed
chat_type="dm" if is_dm else "group",
user_id=user_id,
thread_id=thread_ts,
)
msg_event = MessageEvent(
text=text,
message_type=msg_type,
source=source,
raw_message=event,
message_id=ts,
media_urls=media_urls,
media_types=media_types,
reply_to_message_id=thread_ts if thread_ts != ts else None,
)
await self.handle_message(msg_event)
async def _handle_slash_command(self, command: dict) -> None:
"""Handle /hermes slash command."""
text = command.get("text", "").strip()
user_id = command.get("user_id", "")
channel_id = command.get("channel_id", "")
# Map common slash subcommands to gateway commands
if text in ("new", "reset"):
text = "/reset"
elif text == "status":
text = "/status"
elif text == "stop":
text = "/stop"
elif text:
pass # Treat as a regular question
else:
text = "/help"
source = self.build_source(
chat_id=channel_id,
chat_type="dm", # Slash commands are always in DM-like context
user_id=user_id,
)
event = MessageEvent(
text=text,
message_type=MessageType.COMMAND if text.startswith("/") else MessageType.TEXT,
source=source,
raw_message=command,
)
await self.handle_message(event)
async def _download_slack_file(self, url: str, ext: str, audio: bool = False) -> str:
"""Download a Slack file using the bot token for auth."""
import httpx
bot_token = self.config.token
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
response = await client.get(
url,
headers={"Authorization": f"Bearer {bot_token}"},
)
response.raise_for_status()
if audio:
from gateway.platforms.base import cache_audio_from_bytes
return cache_audio_from_bytes(response.content, ext)
else:
from gateway.platforms.base import cache_image_from_bytes
return cache_image_from_bytes(response.content, ext)

View file

@ -39,6 +39,7 @@ from gateway.platforms.base import (
MessageType,
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
)
@ -91,7 +92,7 @@ class TelegramAdapter(BasePlatformAdapter):
self._handle_command
))
self._app.add_handler(TelegramMessageHandler(
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL,
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
self._handle_media_message
))
@ -311,7 +312,9 @@ class TelegramAdapter(BasePlatformAdapter):
msg = update.message
# Determine media type
if msg.photo:
if msg.sticker:
msg_type = MessageType.STICKER
elif msg.photo:
msg_type = MessageType.PHOTO
elif msg.video:
msg_type = MessageType.VIDEO
@ -328,6 +331,12 @@ class TelegramAdapter(BasePlatformAdapter):
if msg.caption:
event.text = msg.caption
# Handle stickers: describe via vision tool with caching
if msg.sticker:
await self._handle_sticker(msg, event)
await self.handle_message(event)
return
# Download photo to local image cache so the vision tool can access it
# even after Telegram's ephemeral file URLs expire (~1 hour).
if msg.photo:
@ -352,8 +361,97 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
# Download voice/audio messages to cache for STT transcription
if msg.voice:
try:
file_obj = await msg.voice.get_file()
audio_bytes = await file_obj.download_as_bytearray()
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
event.media_urls = [cached_path]
event.media_types = ["audio/ogg"]
print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
except Exception as e:
print(f"[Telegram] Failed to cache voice: {e}", flush=True)
elif msg.audio:
try:
file_obj = await msg.audio.get_file()
audio_bytes = await file_obj.download_as_bytearray()
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
event.media_urls = [cached_path]
event.media_types = ["audio/mp3"]
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
except Exception as e:
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
await self.handle_message(event)
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
"""
Describe a Telegram sticker via vision analysis, with caching.
For static stickers (WEBP), we download, analyze with vision, and cache
the description by file_unique_id. For animated/video stickers, we inject
a placeholder noting the emoji.
"""
from gateway.sticker_cache import (
get_cached_description,
cache_sticker_description,
build_sticker_injection,
build_animated_sticker_injection,
STICKER_VISION_PROMPT,
)
sticker = msg.sticker
emoji = sticker.emoji or ""
set_name = sticker.set_name or ""
# Animated and video stickers can't be analyzed as static images
if sticker.is_animated or sticker.is_video:
event.text = build_animated_sticker_injection(emoji)
return
# Check the cache first
cached = get_cached_description(sticker.file_unique_id)
if cached:
event.text = build_sticker_injection(
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
)
print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
return
# Cache miss -- download and analyze
try:
file_obj = await sticker.get_file()
image_bytes = await file_obj.download_as_bytearray()
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
event.text = build_sticker_injection(description, emoji, set_name)
else:
# Vision failed -- use emoji as fallback
event.text = build_sticker_injection(
f"a sticker with emoji {emoji}" if emoji else "a sticker",
emoji, set_name,
)
except Exception as e:
print(f"[Telegram] Sticker analysis error: {e}", flush=True)
event.text = build_sticker_injection(
f"a sticker with emoji {emoji}" if emoji else "a sticker",
emoji, set_name,
)
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
"""Build a MessageEvent from a Telegram message."""
chat = message.chat

View file

@ -31,6 +31,7 @@ from gateway.platforms.base import (
MessageType,
SendResult,
cache_image_from_url,
cache_audio_from_url,
)
@ -322,8 +323,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
print(f"[{self.name}] Cached user image: {cached_path}", flush=True)
except Exception as e:
print(f"[{self.name}] Failed to cache image: {e}", flush=True)
cached_urls.append(url) # Fall back to original URL
cached_urls.append(url)
media_types.append("image/jpeg")
elif msg_type == MessageType.VOICE and url.startswith(("http://", "https://")):
try:
cached_path = await cache_audio_from_url(url, ext=".ogg")
cached_urls.append(cached_path)
media_types.append("audio/ogg")
print(f"[{self.name}] Cached user voice: {cached_path}", flush=True)
except Exception as e:
print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
cached_urls.append(url)
media_types.append("audio/ogg")
else:
cached_urls.append(url)
media_types.append("unknown")