Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
This commit is contained in:
parent
5404a8fcd8
commit
69aa35a51c
23 changed files with 2080 additions and 32 deletions
|
|
@ -108,6 +108,65 @@ def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
|||
return removed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Audio cache utilities
|
||||
#
|
||||
# Same pattern as image cache -- voice messages from platforms are downloaded
|
||||
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
AUDIO_CACHE_DIR = Path(os.path.expanduser("~/.hermes/audio_cache"))
|
||||
|
||||
|
||||
def get_audio_cache_dir() -> Path:
|
||||
"""Return the audio cache directory, creating it if it doesn't exist."""
|
||||
AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return AUDIO_CACHE_DIR
|
||||
|
||||
|
||||
def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Save raw audio bytes to the cache and return the absolute file path.
|
||||
|
||||
Args:
|
||||
data: Raw audio bytes.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
cache_dir = get_audio_cache_dir()
|
||||
filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
|
||||
|
||||
async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Download an audio file from a URL and save it to the local cache.
|
||||
|
||||
Args:
|
||||
url: The HTTP/HTTPS URL to download from.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
||||
"Accept": "audio/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
"""Types of incoming messages."""
|
||||
TEXT = "text"
|
||||
|
|
@ -422,6 +481,27 @@ class BasePlatformAdapter(ABC):
|
|||
# Spawn background task to process this message
|
||||
asyncio.create_task(self._process_message_background(event, session_key))
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
"""
|
||||
Return a random delay in seconds for human-like response pacing.
|
||||
|
||||
Reads from env vars:
|
||||
HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
|
||||
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
|
||||
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
|
||||
"""
|
||||
import random
|
||||
|
||||
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
||||
if mode == "off":
|
||||
return 0.0
|
||||
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
||||
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
||||
if mode == "natural":
|
||||
min_ms, max_ms = 800, 2500
|
||||
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
||||
|
||||
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
|
||||
"""Background task that actually processes the message."""
|
||||
# Create interrupt event for this session
|
||||
|
|
@ -463,8 +543,13 @@ class BasePlatformAdapter(ABC):
|
|||
if not fallback_result.success:
|
||||
print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
|
||||
|
||||
# Human-like pacing delay between text and media
|
||||
human_delay = self._get_human_delay()
|
||||
|
||||
# Send extracted images as native attachments
|
||||
for image_url, alt_text in images:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
|
|
@ -478,6 +563,8 @@ class BasePlatformAdapter(ABC):
|
|||
|
||||
# Send extracted audio/voice files as native attachments
|
||||
for audio_path, is_voice in media_files:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
voice_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -49,7 +50,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
- Receiving messages from servers and DMs
|
||||
- Sending responses with Discord markdown
|
||||
- Thread support
|
||||
- Slash commands (future)
|
||||
- Native slash commands (/ask, /reset, /status, /stop)
|
||||
- Button-based exec approvals
|
||||
- Auto-threading for long conversations
|
||||
- Reaction-based feedback
|
||||
"""
|
||||
|
||||
# Discord message limits
|
||||
|
|
@ -59,6 +63,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
super().__init__(config, Platform.DISCORD)
|
||||
self._client: Optional[commands.Bot] = None
|
||||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
|
|
@ -83,10 +88,23 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
intents=intents,
|
||||
)
|
||||
|
||||
# Parse allowed user IDs for button authorization
|
||||
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
|
||||
if allowed_env:
|
||||
self._allowed_user_ids = {
|
||||
uid.strip() for uid in allowed_env.split(",") if uid.strip()
|
||||
}
|
||||
|
||||
# Register event handlers
|
||||
@self._client.event
|
||||
async def on_ready():
|
||||
print(f"[{self.name}] Connected as {self._client.user}")
|
||||
# Sync slash commands with Discord
|
||||
try:
|
||||
synced = await self._client.tree.sync()
|
||||
print(f"[{self.name}] Synced {len(synced)} slash command(s)")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Slash command sync failed: {e}")
|
||||
self._ready_event.set()
|
||||
|
||||
@self._client.event
|
||||
|
|
@ -96,6 +114,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return
|
||||
await self._handle_message(message)
|
||||
|
||||
# Register slash commands
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
asyncio.create_task(self._client.start(self.config.token))
|
||||
|
||||
|
|
@ -325,6 +346,116 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# Discord markdown is fairly standard, no special escaping needed
|
||||
return content
|
||||
|
||||
def _register_slash_commands(self) -> None:
|
||||
"""Register Discord slash commands on the command tree."""
|
||||
if not self._client:
|
||||
return
|
||||
|
||||
tree = self._client.tree
|
||||
|
||||
@tree.command(name="ask", description="Ask Hermes a question")
|
||||
@discord.app_commands.describe(question="Your question for Hermes")
|
||||
async def slash_ask(interaction: discord.Interaction, question: str):
|
||||
await interaction.response.defer()
|
||||
event = self._build_slash_event(interaction, question)
|
||||
await self.handle_message(event)
|
||||
# The response is sent via the normal send() flow
|
||||
# Send a followup to close the interaction if needed
|
||||
try:
|
||||
await interaction.followup.send("Processing complete~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="reset", description="Reset your Hermes session")
|
||||
async def slash_reset(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/reset")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Session reset~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="status", description="Show Hermes session status")
|
||||
async def slash_status(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/status")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Status sent~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="stop", description="Stop the running Hermes agent")
|
||||
async def slash_stop(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/stop")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Stop requested~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
chat_type = "dm" if is_dm else "group"
|
||||
chat_name = ""
|
||||
if not is_dm and hasattr(interaction.channel, "name"):
|
||||
chat_name = interaction.channel.name
|
||||
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
|
||||
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=str(interaction.channel_id),
|
||||
chat_name=chat_name,
|
||||
chat_type=chat_type,
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
)
|
||||
|
||||
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, approval_id: str
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a button-based exec approval prompt for a dangerous command.
|
||||
|
||||
Returns SendResult. The approval is resolved when a user clicks a button.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
|
||||
embed = discord.Embed(
|
||||
title="Command Approval Required",
|
||||
description=f"```\n{command[:500]}\n```",
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
embed.set_footer(text=f"Approval ID: {approval_id}")
|
||||
|
||||
view = ExecApprovalView(
|
||||
approval_id=approval_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _handle_message(self, message: DiscordMessage) -> None:
|
||||
"""Handle incoming Discord messages."""
|
||||
# In server channels (not DMs), require the bot to be @mentioned
|
||||
|
|
@ -424,8 +555,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# Fall back to the CDN URL if caching fails
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
elif content_type.startswith("audio/"):
|
||||
try:
|
||||
ext = "." + content_type.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached_path = await cache_audio_from_url(att.url, ext=ext)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(content_type)
|
||||
print(f"[Discord] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Discord] Failed to cache audio attachment: {e}", flush=True)
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
else:
|
||||
# Non-image attachments: keep the original URL
|
||||
# Other attachments: keep the original URL
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
|
||||
|
|
@ -442,3 +586,94 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Discord UI Components (outside the adapter class)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
|
||||
class ExecApprovalView(discord.ui.View):
|
||||
"""
|
||||
Interactive button view for exec approval of dangerous commands.
|
||||
|
||||
Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
|
||||
Only users in the allowed list can click. The view times out after 5 minutes.
|
||||
"""
|
||||
|
||||
def __init__(self, approval_id: str, allowed_user_ids: set):
|
||||
super().__init__(timeout=300) # 5-minute timeout
|
||||
self.approval_id = approval_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
"""Verify the user clicking is authorized."""
|
||||
if not self.allowed_user_ids:
|
||||
return True # No allowlist = anyone can approve
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, action: str, color: discord.Color
|
||||
):
|
||||
"""Resolve the approval and update the message."""
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"This approval has already been resolved~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to approve commands~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
|
||||
# Update the embed with the decision
|
||||
embed = interaction.message.embeds[0] if interaction.message.embeds else None
|
||||
if embed:
|
||||
embed.color = color
|
||||
embed.set_footer(text=f"{action} by {interaction.user.display_name}")
|
||||
|
||||
# Disable all buttons
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
await interaction.response.edit_message(embed=embed, view=self)
|
||||
|
||||
# Store the approval decision for the gateway to pick up
|
||||
try:
|
||||
from tools.terminal_tool import _session_approved_patterns
|
||||
if action == "allow_once":
|
||||
pass # One-time approval handled by gateway
|
||||
elif action == "allow_always":
|
||||
_session_approved_patterns.add(self.approval_id)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
|
||||
async def allow_once(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_once", discord.Color.green())
|
||||
|
||||
@discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
|
||||
async def allow_always(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_always", discord.Color.blue())
|
||||
|
||||
@discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
|
||||
async def deny(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "deny", discord.Color.red())
|
||||
|
||||
async def on_timeout(self):
|
||||
"""Handle view timeout -- disable buttons and mark as expired."""
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
|
|
|||
374
gateway/platforms/slack.py
Normal file
374
gateway/platforms/slack.py
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
"""
|
||||
Slack platform adapter.
|
||||
|
||||
Uses slack-bolt (Python) with Socket Mode for:
|
||||
- Receiving messages from channels and DMs
|
||||
- Sending responses back
|
||||
- Handling slash commands
|
||||
- Thread support
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
||||
from slack_sdk.web.async_client import AsyncWebClient
|
||||
SLACK_AVAILABLE = True
|
||||
except ImportError:
|
||||
SLACK_AVAILABLE = False
|
||||
AsyncApp = Any
|
||||
AsyncSocketModeHandler = Any
|
||||
AsyncWebClient = Any
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
def check_slack_requirements() -> bool:
|
||||
"""Check if Slack dependencies are available."""
|
||||
return SLACK_AVAILABLE
|
||||
|
||||
|
||||
class SlackAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Slack bot adapter using Socket Mode.
|
||||
|
||||
Requires two tokens:
|
||||
- SLACK_BOT_TOKEN (xoxb-...) for API calls
|
||||
- SLACK_APP_TOKEN (xapp-...) for Socket Mode connection
|
||||
|
||||
Features:
|
||||
- DMs and channel messages (mention-gated in channels)
|
||||
- Thread support
|
||||
- File/image/audio attachments
|
||||
- Slash commands (/hermes)
|
||||
- Typing indicators (not natively supported by Slack bots)
|
||||
"""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4000 # Slack's limit is higher but mrkdwn can inflate
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SLACK)
|
||||
self._app: Optional[AsyncApp] = None
|
||||
self._handler: Optional[AsyncSocketModeHandler] = None
|
||||
self._bot_user_id: Optional[str] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
if not SLACK_AVAILABLE:
|
||||
print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
|
||||
return False
|
||||
|
||||
bot_token = self.config.token
|
||||
app_token = os.getenv("SLACK_APP_TOKEN")
|
||||
|
||||
if not bot_token:
|
||||
print("[Slack] SLACK_BOT_TOKEN not set")
|
||||
return False
|
||||
if not app_token:
|
||||
print("[Slack] SLACK_APP_TOKEN not set")
|
||||
return False
|
||||
|
||||
try:
|
||||
self._app = AsyncApp(token=bot_token)
|
||||
|
||||
# Get our own bot user ID for mention detection
|
||||
auth_response = await self._app.client.auth_test()
|
||||
self._bot_user_id = auth_response.get("user_id")
|
||||
bot_name = auth_response.get("user", "unknown")
|
||||
|
||||
# Register message event handler
|
||||
@self._app.event("message")
|
||||
async def handle_message_event(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
await self._handle_slash_command(command)
|
||||
|
||||
# Start Socket Mode handler in background
|
||||
self._handler = AsyncSocketModeHandler(self._app, app_token)
|
||||
asyncio.create_task(self._handler.start_async())
|
||||
|
||||
self._running = True
|
||||
print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Connection failed: {e}")
|
||||
return False
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect from Slack."""
|
||||
if self._handler:
|
||||
await self._handler.close_async()
|
||||
self._running = False
|
||||
print("[Slack] Disconnected")
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a message to a Slack channel or DM."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
kwargs = {
|
||||
"channel": chat_id,
|
||||
"text": content,
|
||||
}
|
||||
|
||||
# Reply in thread if thread_ts is available
|
||||
if reply_to:
|
||||
kwargs["thread_ts"] = reply_to
|
||||
elif metadata and metadata.get("thread_ts"):
|
||||
kwargs["thread_ts"] = metadata["thread_ts"]
|
||||
|
||||
result = await self._app.client.chat_postMessage(**kwargs)
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=result.get("ts"),
|
||||
raw_response=result,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Send error: {e}")
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Slack doesn't have a direct typing indicator API for bots."""
|
||||
pass
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image to Slack by uploading the URL as a file."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import httpx
|
||||
|
||||
# Download the image first
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
content=response.content,
|
||||
filename="image.png",
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
# Fall back to sending the URL as text
|
||||
text = f"{caption}\n{image_url}" if caption else image_url
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an audio file to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=audio_path,
|
||||
filename=os.path.basename(audio_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Slack channel."""
|
||||
if not self._app:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
try:
|
||||
result = await self._app.client.conversations_info(channel=chat_id)
|
||||
channel = result.get("channel", {})
|
||||
is_dm = channel.get("is_im", False)
|
||||
return {
|
||||
"name": channel.get("name", chat_id),
|
||||
"type": "dm" if is_dm else "group",
|
||||
}
|
||||
except Exception:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
# ----- Internal handlers -----
|
||||
|
||||
async def _handle_slack_message(self, event: dict) -> None:
|
||||
"""Handle an incoming Slack message event."""
|
||||
# Ignore bot messages (including our own)
|
||||
if event.get("bot_id") or event.get("subtype") == "bot_message":
|
||||
return
|
||||
|
||||
# Ignore message edits and deletions
|
||||
subtype = event.get("subtype")
|
||||
if subtype in ("message_changed", "message_deleted"):
|
||||
return
|
||||
|
||||
text = event.get("text", "")
|
||||
user_id = event.get("user", "")
|
||||
channel_id = event.get("channel", "")
|
||||
thread_ts = event.get("thread_ts") or event.get("ts")
|
||||
ts = event.get("ts", "")
|
||||
|
||||
# Determine if this is a DM or channel message
|
||||
channel_type = event.get("channel_type", "")
|
||||
is_dm = channel_type == "im"
|
||||
|
||||
# In channels, only respond if bot is mentioned
|
||||
if not is_dm and self._bot_user_id:
|
||||
if f"<@{self._bot_user_id}>" not in text:
|
||||
return
|
||||
# Strip the bot mention from the text
|
||||
text = text.replace(f"<@{self._bot_user_id}>", "").strip()
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if text.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
|
||||
# Handle file attachments
|
||||
media_urls = []
|
||||
media_types = []
|
||||
files = event.get("files", [])
|
||||
for f in files:
|
||||
mimetype = f.get("mimetype", "unknown")
|
||||
url = f.get("url_private_download") or f.get("url_private", "")
|
||||
if mimetype.startswith("image/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
|
||||
ext = ".jpg"
|
||||
# Slack private URLs require the bot token as auth header
|
||||
cached = await self._download_slack_file(url, ext)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.PHOTO
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache image: {e}", flush=True)
|
||||
elif mimetype.startswith("audio/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached = await self._download_slack_file(url, ext, audio=True)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.VOICE
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache audio: {e}", flush=True)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_name=channel_id, # Will be resolved later if needed
|
||||
chat_type="dm" if is_dm else "group",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
)
|
||||
|
||||
msg_event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=event,
|
||||
message_id=ts,
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||
)
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
async def _handle_slash_command(self, command: dict) -> None:
|
||||
"""Handle /hermes slash command."""
|
||||
text = command.get("text", "").strip()
|
||||
user_id = command.get("user_id", "")
|
||||
channel_id = command.get("channel_id", "")
|
||||
|
||||
# Map common slash subcommands to gateway commands
|
||||
if text in ("new", "reset"):
|
||||
text = "/reset"
|
||||
elif text == "status":
|
||||
text = "/status"
|
||||
elif text == "stop":
|
||||
text = "/stop"
|
||||
elif text:
|
||||
pass # Treat as a regular question
|
||||
else:
|
||||
text = "/help"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_type="dm", # Slash commands are always in DM-like context
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.COMMAND if text.startswith("/") else MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=command,
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _download_slack_file(self, url: str, ext: str, audio: bool = False) -> str:
|
||||
"""Download a Slack file using the bot token for auth."""
|
||||
import httpx
|
||||
|
||||
bot_token = self.config.token
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
else:
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
|
|
@ -39,6 +39,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -91,7 +92,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self._handle_command
|
||||
))
|
||||
self._app.add_handler(TelegramMessageHandler(
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL,
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
|
||||
self._handle_media_message
|
||||
))
|
||||
|
||||
|
|
@ -311,7 +312,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
msg = update.message
|
||||
|
||||
# Determine media type
|
||||
if msg.photo:
|
||||
if msg.sticker:
|
||||
msg_type = MessageType.STICKER
|
||||
elif msg.photo:
|
||||
msg_type = MessageType.PHOTO
|
||||
elif msg.video:
|
||||
msg_type = MessageType.VIDEO
|
||||
|
|
@ -328,6 +331,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
if msg.caption:
|
||||
event.text = msg.caption
|
||||
|
||||
# Handle stickers: describe via vision tool with caching
|
||||
if msg.sticker:
|
||||
await self._handle_sticker(msg, event)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Download photo to local image cache so the vision tool can access it
|
||||
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
||||
if msg.photo:
|
||||
|
|
@ -352,8 +361,97 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
|
||||
|
||||
# Download voice/audio messages to cache for STT transcription
|
||||
if msg.voice:
|
||||
try:
|
||||
file_obj = await msg.voice.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/ogg"]
|
||||
print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache voice: {e}", flush=True)
|
||||
elif msg.audio:
|
||||
try:
|
||||
file_obj = await msg.audio.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/mp3"]
|
||||
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
|
||||
"""
|
||||
Describe a Telegram sticker via vision analysis, with caching.
|
||||
|
||||
For static stickers (WEBP), we download, analyze with vision, and cache
|
||||
the description by file_unique_id. For animated/video stickers, we inject
|
||||
a placeholder noting the emoji.
|
||||
"""
|
||||
from gateway.sticker_cache import (
|
||||
get_cached_description,
|
||||
cache_sticker_description,
|
||||
build_sticker_injection,
|
||||
build_animated_sticker_injection,
|
||||
STICKER_VISION_PROMPT,
|
||||
)
|
||||
|
||||
sticker = msg.sticker
|
||||
emoji = sticker.emoji or ""
|
||||
set_name = sticker.set_name or ""
|
||||
|
||||
# Animated and video stickers can't be analyzed as static images
|
||||
if sticker.is_animated or sticker.is_video:
|
||||
event.text = build_animated_sticker_injection(emoji)
|
||||
return
|
||||
|
||||
# Check the cache first
|
||||
cached = get_cached_description(sticker.file_unique_id)
|
||||
if cached:
|
||||
event.text = build_sticker_injection(
|
||||
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
|
||||
)
|
||||
print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
|
||||
return
|
||||
|
||||
# Cache miss -- download and analyze
|
||||
try:
|
||||
file_obj = await sticker.get_file()
|
||||
image_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
|
||||
print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
|
||||
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
import json as _json
|
||||
|
||||
result_json = await vision_analyze_tool(
|
||||
image_url=cached_path,
|
||||
user_prompt=STICKER_VISION_PROMPT,
|
||||
)
|
||||
result = _json.loads(result_json)
|
||||
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "a sticker")
|
||||
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
|
||||
event.text = build_sticker_injection(description, emoji, set_name)
|
||||
else:
|
||||
# Vision failed -- use emoji as fallback
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Sticker analysis error: {e}", flush=True)
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
|
||||
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Telegram message."""
|
||||
chat = message.chat
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -322,8 +323,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
print(f"[{self.name}] Cached user image: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache image: {e}", flush=True)
|
||||
cached_urls.append(url) # Fall back to original URL
|
||||
cached_urls.append(url)
|
||||
media_types.append("image/jpeg")
|
||||
elif msg_type == MessageType.VOICE and url.startswith(("http://", "https://")):
|
||||
try:
|
||||
cached_path = await cache_audio_from_url(url, ext=".ogg")
|
||||
cached_urls.append(cached_path)
|
||||
media_types.append("audio/ogg")
|
||||
print(f"[{self.name}] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
|
||||
cached_urls.append(url)
|
||||
media_types.append("audio/ogg")
|
||||
else:
|
||||
cached_urls.append(url)
|
||||
media_types.append("unknown")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue