Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
This commit is contained in:
parent
5404a8fcd8
commit
69aa35a51c
23 changed files with 2080 additions and 32 deletions
|
|
@ -22,6 +22,7 @@ class Platform(Enum):
|
|||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
WHATSAPP = "whatsapp"
|
||||
SLACK = "slack"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -308,6 +309,22 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
config.platforms[Platform.WHATSAPP] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP].enabled = True
|
||||
|
||||
# Slack
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
if Platform.SLACK not in config.platforms:
|
||||
config.platforms[Platform.SLACK] = PlatformConfig()
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
config.platforms[Platform.SLACK].token = slack_token
|
||||
# Home channel
|
||||
slack_home = os.getenv("SLACK_HOME_CHANNEL")
|
||||
if slack_home:
|
||||
config.platforms[Platform.SLACK].home_channel = HomeChannel(
|
||||
platform=Platform.SLACK,
|
||||
chat_id=slack_home,
|
||||
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
|
||||
)
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
|
|
|||
150
gateway/hooks.py
Normal file
150
gateway/hooks.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""
|
||||
Event Hook System
|
||||
|
||||
A lightweight event-driven system that fires handlers at key lifecycle points.
|
||||
Hooks are discovered from ~/.hermes/hooks/ directories, each containing:
|
||||
- HOOK.yaml (metadata: name, description, events list)
|
||||
- handler.py (Python handler with async def handle(event_type, context))
|
||||
|
||||
Events:
|
||||
- gateway:startup -- Gateway process starts
|
||||
- session:start -- New session created
|
||||
- session:reset -- User ran /new or /reset
|
||||
- agent:start -- Agent begins processing a message
|
||||
- agent:step -- Each turn in the tool-calling loop
|
||||
- agent:end -- Agent finishes processing
|
||||
- command:* -- Any slash command executed (wildcard match)
|
||||
|
||||
Errors in hooks are caught and logged but never block the main pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
HOOKS_DIR = Path(os.path.expanduser("~/.hermes/hooks"))
|
||||
|
||||
|
||||
class HookRegistry:
|
||||
"""
|
||||
Discovers, loads, and fires event hooks.
|
||||
|
||||
Usage:
|
||||
registry = HookRegistry()
|
||||
registry.discover_and_load()
|
||||
await registry.emit("agent:start", {"platform": "telegram", ...})
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# event_type -> [handler_fn, ...]
|
||||
self._handlers: Dict[str, List[Callable]] = {}
|
||||
self._loaded_hooks: List[dict] = [] # metadata for listing
|
||||
|
||||
@property
|
||||
def loaded_hooks(self) -> List[dict]:
|
||||
"""Return metadata about all loaded hooks."""
|
||||
return list(self._loaded_hooks)
|
||||
|
||||
def discover_and_load(self) -> None:
|
||||
"""
|
||||
Scan the hooks directory for hook directories and load their handlers.
|
||||
|
||||
Each hook directory must contain:
|
||||
- HOOK.yaml with at least 'name' and 'events' keys
|
||||
- handler.py with a top-level 'handle' function (sync or async)
|
||||
"""
|
||||
if not HOOKS_DIR.exists():
|
||||
return
|
||||
|
||||
for hook_dir in sorted(HOOKS_DIR.iterdir()):
|
||||
if not hook_dir.is_dir():
|
||||
continue
|
||||
|
||||
manifest_path = hook_dir / "HOOK.yaml"
|
||||
handler_path = hook_dir / "handler.py"
|
||||
|
||||
if not manifest_path.exists() or not handler_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8"))
|
||||
if not manifest or not isinstance(manifest, dict):
|
||||
print(f"[hooks] Skipping {hook_dir.name}: invalid HOOK.yaml", flush=True)
|
||||
continue
|
||||
|
||||
hook_name = manifest.get("name", hook_dir.name)
|
||||
events = manifest.get("events", [])
|
||||
if not events:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
print(f"[hooks] Skipping {hook_name}: no 'handle' function found", flush=True)
|
||||
continue
|
||||
|
||||
# Register the handler for each declared event
|
||||
for event in events:
|
||||
self._handlers.setdefault(event, []).append(handle_fn)
|
||||
|
||||
self._loaded_hooks.append({
|
||||
"name": hook_name,
|
||||
"description": manifest.get("description", ""),
|
||||
"events": events,
|
||||
"path": str(hook_dir),
|
||||
})
|
||||
|
||||
print(f"[hooks] Loaded hook '{hook_name}' for events: {events}", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
|
||||
|
||||
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Fire all handlers registered for an event.
|
||||
|
||||
Supports wildcard matching: handlers registered for "command:*" will
|
||||
fire for any "command:..." event. Handlers registered for a base type
|
||||
like "agent" won't fire for "agent:start" -- only exact matches and
|
||||
explicit wildcards.
|
||||
|
||||
Args:
|
||||
event_type: The event identifier (e.g. "agent:start").
|
||||
context: Optional dict with event-specific data.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
# Collect handlers: exact match + wildcard match
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
|
||||
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
|
||||
for fn in handlers:
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
# Support both sync and async handlers
|
||||
if asyncio.iscoroutine(result):
|
||||
await result
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
282
gateway/pairing.py
Normal file
282
gateway/pairing.py
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
"""
|
||||
DM Pairing System
|
||||
|
||||
Code-based approval flow for authorizing new users on messaging platforms.
|
||||
Instead of static allowlists with user IDs, unknown users receive a one-time
|
||||
pairing code that the bot owner approves via the CLI.
|
||||
|
||||
Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
- 8-char codes from 32-char unambiguous alphabet (no 0/O/1/I)
|
||||
- Cryptographic randomness via secrets.choice()
|
||||
- 1-hour code expiry
|
||||
- Max 3 pending codes per platform
|
||||
- Rate limiting: 1 request per user per 10 minutes
|
||||
- Lockout after 5 failed approval attempts (1 hour)
|
||||
- File permissions: chmod 0600 on all data files
|
||||
- Codes are never logged to stdout
|
||||
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
|
||||
ALPHABET = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789"
|
||||
CODE_LENGTH = 8
|
||||
|
||||
# Timing constants
|
||||
CODE_TTL_SECONDS = 3600 # Codes expire after 1 hour
|
||||
RATE_LIMIT_SECONDS = 600 # 1 request per user per 10 minutes
|
||||
LOCKOUT_SECONDS = 3600 # Lockout duration after too many failures
|
||||
|
||||
# Limits
|
||||
MAX_PENDING_PER_PLATFORM = 3 # Max pending codes per platform
|
||||
MAX_FAILED_ATTEMPTS = 5 # Failed approvals before lockout
|
||||
|
||||
PAIRING_DIR = Path(os.path.expanduser("~/.hermes/pairing"))
|
||||
|
||||
|
||||
def _secure_write(path: Path, data: str) -> None:
|
||||
"""Write data to file with restrictive permissions (owner read/write only)."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(data, encoding="utf-8")
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
pass # Windows doesn't support chmod the same way
|
||||
|
||||
|
||||
class PairingStore:
|
||||
"""
|
||||
Manages pairing codes and approved user lists.
|
||||
|
||||
Data files per platform:
|
||||
- {platform}-pending.json : pending pairing requests
|
||||
- {platform}-approved.json : approved (paired) users
|
||||
- _rate_limits.json : rate limit tracking
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
PAIRING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _pending_path(self, platform: str) -> Path:
|
||||
return PAIRING_DIR / f"{platform}-pending.json"
|
||||
|
||||
def _approved_path(self, platform: str) -> Path:
|
||||
return PAIRING_DIR / f"{platform}-approved.json"
|
||||
|
||||
def _rate_limit_path(self) -> Path:
|
||||
return PAIRING_DIR / "_rate_limits.json"
|
||||
|
||||
def _load_json(self, path: Path) -> dict:
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
def _save_json(self, path: Path, data: dict) -> None:
|
||||
_secure_write(path, json.dumps(data, indent=2, ensure_ascii=False))
|
||||
|
||||
# ----- Approved users -----
|
||||
|
||||
def is_approved(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user is approved (paired) on a platform."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
return user_id in approved
|
||||
|
||||
def list_approved(self, platform: str = None) -> list:
|
||||
"""List approved users, optionally filtered by platform."""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("approved")
|
||||
for p in platforms:
|
||||
approved = self._load_json(self._approved_path(p))
|
||||
for uid, info in approved.items():
|
||||
results.append({"platform": p, "user_id": uid, **info})
|
||||
return results
|
||||
|
||||
def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
|
||||
"""Add a user to the approved list."""
|
||||
approved = self._load_json(self._approved_path(platform))
|
||||
approved[user_id] = {
|
||||
"user_name": user_name,
|
||||
"approved_at": time.time(),
|
||||
}
|
||||
self._save_json(self._approved_path(platform), approved)
|
||||
|
||||
def revoke(self, platform: str, user_id: str) -> bool:
|
||||
"""Remove a user from the approved list. Returns True if found."""
|
||||
path = self._approved_path(platform)
|
||||
approved = self._load_json(path)
|
||||
if user_id in approved:
|
||||
del approved[user_id]
|
||||
self._save_json(path, approved)
|
||||
return True
|
||||
return False
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Generate a pairing code for a new user.
|
||||
|
||||
Returns the code string, or None if:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
"""
|
||||
self._cleanup_expired(platform)
|
||||
|
||||
# Check lockout
|
||||
if self._is_locked_out(platform):
|
||||
return None
|
||||
|
||||
# Check rate limit for this specific user
|
||||
if self._is_rate_limited(platform, user_id):
|
||||
return None
|
||||
|
||||
# Check max pending
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if len(pending) >= MAX_PENDING_PER_PLATFORM:
|
||||
return None
|
||||
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Record rate limit
|
||||
self._record_rate_limit(platform, user_id)
|
||||
|
||||
return code
|
||||
|
||||
def approve_code(self, platform: str, code: str) -> Optional[dict]:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns {user_id, user_name} on success, None if code is invalid/expired.
|
||||
"""
|
||||
self._cleanup_expired(platform)
|
||||
code = code.upper().strip()
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
return results
|
||||
|
||||
def clear_pending(self, platform: str = None) -> int:
|
||||
"""Clear all pending requests. Returns count removed."""
|
||||
count = 0
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
count += len(pending)
|
||||
self._save_json(self._pending_path(p), {})
|
||||
return count
|
||||
|
||||
# ----- Rate limiting and lockout -----
|
||||
|
||||
def _is_rate_limited(self, platform: str, user_id: str) -> bool:
|
||||
"""Check if a user has requested a code too recently."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
key = f"{platform}:{user_id}"
|
||||
last_request = limits.get(key, 0)
|
||||
return (time.time() - last_request) < RATE_LIMIT_SECONDS
|
||||
|
||||
def _record_rate_limit(self, platform: str, user_id: str) -> None:
|
||||
"""Record the time of a pairing request for rate limiting."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
key = f"{platform}:{user_id}"
|
||||
limits[key] = time.time()
|
||||
self._save_json(self._rate_limit_path(), limits)
|
||||
|
||||
def _is_locked_out(self, platform: str) -> bool:
|
||||
"""Check if a platform is in lockout due to failed approval attempts."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
lockout_key = f"_lockout:{platform}"
|
||||
lockout_until = limits.get(lockout_key, 0)
|
||||
return time.time() < lockout_until
|
||||
|
||||
def _record_failed_attempt(self, platform: str) -> None:
|
||||
"""Record a failed approval attempt. Triggers lockout after MAX_FAILED_ATTEMPTS."""
|
||||
limits = self._load_json(self._rate_limit_path())
|
||||
fail_key = f"_failures:{platform}"
|
||||
fails = limits.get(fail_key, 0) + 1
|
||||
limits[fail_key] = fails
|
||||
if fails >= MAX_FAILED_ATTEMPTS:
|
||||
lockout_key = f"_lockout:{platform}"
|
||||
limits[lockout_key] = time.time() + LOCKOUT_SECONDS
|
||||
limits[fail_key] = 0 # Reset counter
|
||||
print(f"[pairing] Platform {platform} locked out for {LOCKOUT_SECONDS}s "
|
||||
f"after {MAX_FAILED_ATTEMPTS} failed attempts", flush=True)
|
||||
self._save_json(self._rate_limit_path(), limits)
|
||||
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes."""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
if expired:
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
"""List all platforms that have data files of a given suffix."""
|
||||
platforms = []
|
||||
for f in PAIRING_DIR.iterdir():
|
||||
if f.name.endswith(f"-{suffix}.json"):
|
||||
platform = f.name.replace(f"-{suffix}.json", "")
|
||||
if not platform.startswith("_"):
|
||||
platforms.append(platform)
|
||||
return platforms
|
||||
|
|
@ -108,6 +108,65 @@ def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
|||
return removed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Audio cache utilities
|
||||
#
|
||||
# Same pattern as image cache -- voice messages from platforms are downloaded
|
||||
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
AUDIO_CACHE_DIR = Path(os.path.expanduser("~/.hermes/audio_cache"))
|
||||
|
||||
|
||||
def get_audio_cache_dir() -> Path:
|
||||
"""Return the audio cache directory, creating it if it doesn't exist."""
|
||||
AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return AUDIO_CACHE_DIR
|
||||
|
||||
|
||||
def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Save raw audio bytes to the cache and return the absolute file path.
|
||||
|
||||
Args:
|
||||
data: Raw audio bytes.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
cache_dir = get_audio_cache_dir()
|
||||
filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
|
||||
|
||||
async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
|
||||
"""
|
||||
Download an audio file from a URL and save it to the local cache.
|
||||
|
||||
Args:
|
||||
url: The HTTP/HTTPS URL to download from.
|
||||
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
||||
|
||||
Returns:
|
||||
Absolute path to the cached audio file as a string.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
||||
"Accept": "audio/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
|
||||
|
||||
class MessageType(Enum):
|
||||
"""Types of incoming messages."""
|
||||
TEXT = "text"
|
||||
|
|
@ -422,6 +481,27 @@ class BasePlatformAdapter(ABC):
|
|||
# Spawn background task to process this message
|
||||
asyncio.create_task(self._process_message_background(event, session_key))
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
"""
|
||||
Return a random delay in seconds for human-like response pacing.
|
||||
|
||||
Reads from env vars:
|
||||
HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
|
||||
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
|
||||
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
|
||||
"""
|
||||
import random
|
||||
|
||||
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
||||
if mode == "off":
|
||||
return 0.0
|
||||
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
||||
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
||||
if mode == "natural":
|
||||
min_ms, max_ms = 800, 2500
|
||||
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
||||
|
||||
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
|
||||
"""Background task that actually processes the message."""
|
||||
# Create interrupt event for this session
|
||||
|
|
@ -463,8 +543,13 @@ class BasePlatformAdapter(ABC):
|
|||
if not fallback_result.success:
|
||||
print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
|
||||
|
||||
# Human-like pacing delay between text and media
|
||||
human_delay = self._get_human_delay()
|
||||
|
||||
# Send extracted images as native attachments
|
||||
for image_url, alt_text in images:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
|
|
@ -478,6 +563,8 @@ class BasePlatformAdapter(ABC):
|
|||
|
||||
# Send extracted audio/voice files as native attachments
|
||||
for audio_path, is_voice in media_files:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
voice_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -49,7 +50,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
- Receiving messages from servers and DMs
|
||||
- Sending responses with Discord markdown
|
||||
- Thread support
|
||||
- Slash commands (future)
|
||||
- Native slash commands (/ask, /reset, /status, /stop)
|
||||
- Button-based exec approvals
|
||||
- Auto-threading for long conversations
|
||||
- Reaction-based feedback
|
||||
"""
|
||||
|
||||
# Discord message limits
|
||||
|
|
@ -59,6 +63,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
super().__init__(config, Platform.DISCORD)
|
||||
self._client: Optional[commands.Bot] = None
|
||||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
|
|
@ -83,10 +88,23 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
intents=intents,
|
||||
)
|
||||
|
||||
# Parse allowed user IDs for button authorization
|
||||
allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
|
||||
if allowed_env:
|
||||
self._allowed_user_ids = {
|
||||
uid.strip() for uid in allowed_env.split(",") if uid.strip()
|
||||
}
|
||||
|
||||
# Register event handlers
|
||||
@self._client.event
|
||||
async def on_ready():
|
||||
print(f"[{self.name}] Connected as {self._client.user}")
|
||||
# Sync slash commands with Discord
|
||||
try:
|
||||
synced = await self._client.tree.sync()
|
||||
print(f"[{self.name}] Synced {len(synced)} slash command(s)")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Slash command sync failed: {e}")
|
||||
self._ready_event.set()
|
||||
|
||||
@self._client.event
|
||||
|
|
@ -96,6 +114,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return
|
||||
await self._handle_message(message)
|
||||
|
||||
# Register slash commands
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
asyncio.create_task(self._client.start(self.config.token))
|
||||
|
||||
|
|
@ -325,6 +346,116 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# Discord markdown is fairly standard, no special escaping needed
|
||||
return content
|
||||
|
||||
def _register_slash_commands(self) -> None:
|
||||
"""Register Discord slash commands on the command tree."""
|
||||
if not self._client:
|
||||
return
|
||||
|
||||
tree = self._client.tree
|
||||
|
||||
@tree.command(name="ask", description="Ask Hermes a question")
|
||||
@discord.app_commands.describe(question="Your question for Hermes")
|
||||
async def slash_ask(interaction: discord.Interaction, question: str):
|
||||
await interaction.response.defer()
|
||||
event = self._build_slash_event(interaction, question)
|
||||
await self.handle_message(event)
|
||||
# The response is sent via the normal send() flow
|
||||
# Send a followup to close the interaction if needed
|
||||
try:
|
||||
await interaction.followup.send("Processing complete~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="reset", description="Reset your Hermes session")
|
||||
async def slash_reset(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/reset")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Session reset~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="status", description="Show Hermes session status")
|
||||
async def slash_status(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/status")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Status sent~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@tree.command(name="stop", description="Stop the running Hermes agent")
|
||||
async def slash_stop(interaction: discord.Interaction):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, "/stop")
|
||||
await self.handle_message(event)
|
||||
try:
|
||||
await interaction.followup.send("Stop requested~", ephemeral=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
chat_type = "dm" if is_dm else "group"
|
||||
chat_name = ""
|
||||
if not is_dm and hasattr(interaction.channel, "name"):
|
||||
chat_name = interaction.channel.name
|
||||
if hasattr(interaction.channel, "guild") and interaction.channel.guild:
|
||||
chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=str(interaction.channel_id),
|
||||
chat_name=chat_name,
|
||||
chat_type=chat_type,
|
||||
user_id=str(interaction.user.id),
|
||||
user_name=interaction.user.display_name,
|
||||
)
|
||||
|
||||
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self, chat_id: str, command: str, approval_id: str
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a button-based exec approval prompt for a dangerous command.
|
||||
|
||||
Returns SendResult. The approval is resolved when a user clicks a button.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
|
||||
embed = discord.Embed(
|
||||
title="Command Approval Required",
|
||||
description=f"```\n{command[:500]}\n```",
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
embed.set_footer(text=f"Approval ID: {approval_id}")
|
||||
|
||||
view = ExecApprovalView(
|
||||
approval_id=approval_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _handle_message(self, message: DiscordMessage) -> None:
|
||||
"""Handle incoming Discord messages."""
|
||||
# In server channels (not DMs), require the bot to be @mentioned
|
||||
|
|
@ -424,8 +555,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# Fall back to the CDN URL if caching fails
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
elif content_type.startswith("audio/"):
|
||||
try:
|
||||
ext = "." + content_type.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached_path = await cache_audio_from_url(att.url, ext=ext)
|
||||
media_urls.append(cached_path)
|
||||
media_types.append(content_type)
|
||||
print(f"[Discord] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Discord] Failed to cache audio attachment: {e}", flush=True)
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
else:
|
||||
# Non-image attachments: keep the original URL
|
||||
# Other attachments: keep the original URL
|
||||
media_urls.append(att.url)
|
||||
media_types.append(content_type)
|
||||
|
||||
|
|
@ -442,3 +586,94 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Discord UI Components (outside the adapter class)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
|
||||
class ExecApprovalView(discord.ui.View):
|
||||
"""
|
||||
Interactive button view for exec approval of dangerous commands.
|
||||
|
||||
Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
|
||||
Only users in the allowed list can click. The view times out after 5 minutes.
|
||||
"""
|
||||
|
||||
def __init__(self, approval_id: str, allowed_user_ids: set):
|
||||
super().__init__(timeout=300) # 5-minute timeout
|
||||
self.approval_id = approval_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
"""Verify the user clicking is authorized."""
|
||||
if not self.allowed_user_ids:
|
||||
return True # No allowlist = anyone can approve
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, action: str, color: discord.Color
|
||||
):
|
||||
"""Resolve the approval and update the message."""
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"This approval has already been resolved~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to approve commands~", ephemeral=True
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
|
||||
# Update the embed with the decision
|
||||
embed = interaction.message.embeds[0] if interaction.message.embeds else None
|
||||
if embed:
|
||||
embed.color = color
|
||||
embed.set_footer(text=f"{action} by {interaction.user.display_name}")
|
||||
|
||||
# Disable all buttons
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
await interaction.response.edit_message(embed=embed, view=self)
|
||||
|
||||
# Store the approval decision for the gateway to pick up
|
||||
try:
|
||||
from tools.terminal_tool import _session_approved_patterns
|
||||
if action == "allow_once":
|
||||
pass # One-time approval handled by gateway
|
||||
elif action == "allow_always":
|
||||
_session_approved_patterns.add(self.approval_id)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
|
||||
async def allow_once(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_once", discord.Color.green())
|
||||
|
||||
@discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
|
||||
async def allow_always(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "allow_always", discord.Color.blue())
|
||||
|
||||
@discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
|
||||
async def deny(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button
|
||||
):
|
||||
await self._resolve(interaction, "deny", discord.Color.red())
|
||||
|
||||
async def on_timeout(self):
|
||||
"""Handle view timeout -- disable buttons and mark as expired."""
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
|
|
|||
374
gateway/platforms/slack.py
Normal file
374
gateway/platforms/slack.py
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
"""
|
||||
Slack platform adapter.
|
||||
|
||||
Uses slack-bolt (Python) with Socket Mode for:
|
||||
- Receiving messages from channels and DMs
|
||||
- Sending responses back
|
||||
- Handling slash commands
|
||||
- Thread support
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
try:
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
||||
from slack_sdk.web.async_client import AsyncWebClient
|
||||
SLACK_AVAILABLE = True
|
||||
except ImportError:
|
||||
SLACK_AVAILABLE = False
|
||||
AsyncApp = Any
|
||||
AsyncSocketModeHandler = Any
|
||||
AsyncWebClient = Any
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(__file__).rsplit("/", 3)[0])
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
def check_slack_requirements() -> bool:
|
||||
"""Check if Slack dependencies are available."""
|
||||
return SLACK_AVAILABLE
|
||||
|
||||
|
||||
class SlackAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Slack bot adapter using Socket Mode.
|
||||
|
||||
Requires two tokens:
|
||||
- SLACK_BOT_TOKEN (xoxb-...) for API calls
|
||||
- SLACK_APP_TOKEN (xapp-...) for Socket Mode connection
|
||||
|
||||
Features:
|
||||
- DMs and channel messages (mention-gated in channels)
|
||||
- Thread support
|
||||
- File/image/audio attachments
|
||||
- Slash commands (/hermes)
|
||||
- Typing indicators (not natively supported by Slack bots)
|
||||
"""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4000 # Slack's limit is higher but mrkdwn can inflate
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SLACK)
|
||||
self._app: Optional[AsyncApp] = None
|
||||
self._handler: Optional[AsyncSocketModeHandler] = None
|
||||
self._bot_user_id: Optional[str] = None
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
if not SLACK_AVAILABLE:
|
||||
print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
|
||||
return False
|
||||
|
||||
bot_token = self.config.token
|
||||
app_token = os.getenv("SLACK_APP_TOKEN")
|
||||
|
||||
if not bot_token:
|
||||
print("[Slack] SLACK_BOT_TOKEN not set")
|
||||
return False
|
||||
if not app_token:
|
||||
print("[Slack] SLACK_APP_TOKEN not set")
|
||||
return False
|
||||
|
||||
try:
|
||||
self._app = AsyncApp(token=bot_token)
|
||||
|
||||
# Get our own bot user ID for mention detection
|
||||
auth_response = await self._app.client.auth_test()
|
||||
self._bot_user_id = auth_response.get("user_id")
|
||||
bot_name = auth_response.get("user", "unknown")
|
||||
|
||||
# Register message event handler
|
||||
@self._app.event("message")
|
||||
async def handle_message_event(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
|
||||
# Register slash command handler
|
||||
@self._app.command("/hermes")
|
||||
async def handle_hermes_command(ack, command):
|
||||
await ack()
|
||||
await self._handle_slash_command(command)
|
||||
|
||||
# Start Socket Mode handler in background
|
||||
self._handler = AsyncSocketModeHandler(self._app, app_token)
|
||||
asyncio.create_task(self._handler.start_async())
|
||||
|
||||
self._running = True
|
||||
print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Connection failed: {e}")
|
||||
return False
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect from Slack."""
|
||||
if self._handler:
|
||||
await self._handler.close_async()
|
||||
self._running = False
|
||||
print("[Slack] Disconnected")
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a message to a Slack channel or DM."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
kwargs = {
|
||||
"channel": chat_id,
|
||||
"text": content,
|
||||
}
|
||||
|
||||
# Reply in thread if thread_ts is available
|
||||
if reply_to:
|
||||
kwargs["thread_ts"] = reply_to
|
||||
elif metadata and metadata.get("thread_ts"):
|
||||
kwargs["thread_ts"] = metadata["thread_ts"]
|
||||
|
||||
result = await self._app.client.chat_postMessage(**kwargs)
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=result.get("ts"),
|
||||
raw_response=result,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Slack] Send error: {e}")
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Slack doesn't have a direct typing indicator API for bots."""
|
||||
pass
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image to Slack by uploading the URL as a file."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import httpx
|
||||
|
||||
# Download the image first
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
content=response.content,
|
||||
filename="image.png",
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
# Fall back to sending the URL as text
|
||||
text = f"{caption}\n{image_url}" if caption else image_url
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an audio file to Slack."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
result = await self._app.client.files_upload_v2(
|
||||
channel=chat_id,
|
||||
file=audio_path,
|
||||
filename=os.path.basename(audio_path),
|
||||
initial_comment=caption or "",
|
||||
thread_ts=reply_to,
|
||||
)
|
||||
return SendResult(success=True, raw_response=result)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Slack channel."""
|
||||
if not self._app:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
try:
|
||||
result = await self._app.client.conversations_info(channel=chat_id)
|
||||
channel = result.get("channel", {})
|
||||
is_dm = channel.get("is_im", False)
|
||||
return {
|
||||
"name": channel.get("name", chat_id),
|
||||
"type": "dm" if is_dm else "group",
|
||||
}
|
||||
except Exception:
|
||||
return {"name": chat_id, "type": "unknown"}
|
||||
|
||||
# ----- Internal handlers -----
|
||||
|
||||
async def _handle_slack_message(self, event: dict) -> None:
|
||||
"""Handle an incoming Slack message event."""
|
||||
# Ignore bot messages (including our own)
|
||||
if event.get("bot_id") or event.get("subtype") == "bot_message":
|
||||
return
|
||||
|
||||
# Ignore message edits and deletions
|
||||
subtype = event.get("subtype")
|
||||
if subtype in ("message_changed", "message_deleted"):
|
||||
return
|
||||
|
||||
text = event.get("text", "")
|
||||
user_id = event.get("user", "")
|
||||
channel_id = event.get("channel", "")
|
||||
thread_ts = event.get("thread_ts") or event.get("ts")
|
||||
ts = event.get("ts", "")
|
||||
|
||||
# Determine if this is a DM or channel message
|
||||
channel_type = event.get("channel_type", "")
|
||||
is_dm = channel_type == "im"
|
||||
|
||||
# In channels, only respond if bot is mentioned
|
||||
if not is_dm and self._bot_user_id:
|
||||
if f"<@{self._bot_user_id}>" not in text:
|
||||
return
|
||||
# Strip the bot mention from the text
|
||||
text = text.replace(f"<@{self._bot_user_id}>", "").strip()
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if text.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
|
||||
# Handle file attachments
|
||||
media_urls = []
|
||||
media_types = []
|
||||
files = event.get("files", [])
|
||||
for f in files:
|
||||
mimetype = f.get("mimetype", "unknown")
|
||||
url = f.get("url_private_download") or f.get("url_private", "")
|
||||
if mimetype.startswith("image/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"):
|
||||
ext = ".jpg"
|
||||
# Slack private URLs require the bot token as auth header
|
||||
cached = await self._download_slack_file(url, ext)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.PHOTO
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache image: {e}", flush=True)
|
||||
elif mimetype.startswith("audio/") and url:
|
||||
try:
|
||||
ext = "." + mimetype.split("/")[-1].split(";")[0]
|
||||
if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"):
|
||||
ext = ".ogg"
|
||||
cached = await self._download_slack_file(url, ext, audio=True)
|
||||
media_urls.append(cached)
|
||||
media_types.append(mimetype)
|
||||
msg_type = MessageType.VOICE
|
||||
except Exception as e:
|
||||
print(f"[Slack] Failed to cache audio: {e}", flush=True)
|
||||
|
||||
# Build source
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_name=channel_id, # Will be resolved later if needed
|
||||
chat_type="dm" if is_dm else "group",
|
||||
user_id=user_id,
|
||||
thread_id=thread_ts,
|
||||
)
|
||||
|
||||
msg_event = MessageEvent(
|
||||
text=text,
|
||||
message_type=msg_type,
|
||||
source=source,
|
||||
raw_message=event,
|
||||
message_id=ts,
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||
)
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
async def _handle_slash_command(self, command: dict) -> None:
|
||||
"""Handle /hermes slash command."""
|
||||
text = command.get("text", "").strip()
|
||||
user_id = command.get("user_id", "")
|
||||
channel_id = command.get("channel_id", "")
|
||||
|
||||
# Map common slash subcommands to gateway commands
|
||||
if text in ("new", "reset"):
|
||||
text = "/reset"
|
||||
elif text == "status":
|
||||
text = "/status"
|
||||
elif text == "stop":
|
||||
text = "/stop"
|
||||
elif text:
|
||||
pass # Treat as a regular question
|
||||
else:
|
||||
text = "/help"
|
||||
|
||||
source = self.build_source(
|
||||
chat_id=channel_id,
|
||||
chat_type="dm", # Slash commands are always in DM-like context
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.COMMAND if text.startswith("/") else MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=command,
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _download_slack_file(self, url: str, ext: str, audio: bool = False) -> str:
|
||||
"""Download a Slack file using the bot token for auth."""
|
||||
import httpx
|
||||
|
||||
bot_token = self.config.token
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {bot_token}"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
else:
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
|
|
@ -39,6 +39,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -91,7 +92,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self._handle_command
|
||||
))
|
||||
self._app.add_handler(TelegramMessageHandler(
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL,
|
||||
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
|
||||
self._handle_media_message
|
||||
))
|
||||
|
||||
|
|
@ -311,7 +312,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
msg = update.message
|
||||
|
||||
# Determine media type
|
||||
if msg.photo:
|
||||
if msg.sticker:
|
||||
msg_type = MessageType.STICKER
|
||||
elif msg.photo:
|
||||
msg_type = MessageType.PHOTO
|
||||
elif msg.video:
|
||||
msg_type = MessageType.VIDEO
|
||||
|
|
@ -328,6 +331,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
if msg.caption:
|
||||
event.text = msg.caption
|
||||
|
||||
# Handle stickers: describe via vision tool with caching
|
||||
if msg.sticker:
|
||||
await self._handle_sticker(msg, event)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Download photo to local image cache so the vision tool can access it
|
||||
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
||||
if msg.photo:
|
||||
|
|
@ -352,8 +361,97 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
|
||||
|
||||
# Download voice/audio messages to cache for STT transcription
|
||||
if msg.voice:
|
||||
try:
|
||||
file_obj = await msg.voice.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/ogg"]
|
||||
print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache voice: {e}", flush=True)
|
||||
elif msg.audio:
|
||||
try:
|
||||
file_obj = await msg.audio.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/mp3"]
|
||||
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
|
||||
"""
|
||||
Describe a Telegram sticker via vision analysis, with caching.
|
||||
|
||||
For static stickers (WEBP), we download, analyze with vision, and cache
|
||||
the description by file_unique_id. For animated/video stickers, we inject
|
||||
a placeholder noting the emoji.
|
||||
"""
|
||||
from gateway.sticker_cache import (
|
||||
get_cached_description,
|
||||
cache_sticker_description,
|
||||
build_sticker_injection,
|
||||
build_animated_sticker_injection,
|
||||
STICKER_VISION_PROMPT,
|
||||
)
|
||||
|
||||
sticker = msg.sticker
|
||||
emoji = sticker.emoji or ""
|
||||
set_name = sticker.set_name or ""
|
||||
|
||||
# Animated and video stickers can't be analyzed as static images
|
||||
if sticker.is_animated or sticker.is_video:
|
||||
event.text = build_animated_sticker_injection(emoji)
|
||||
return
|
||||
|
||||
# Check the cache first
|
||||
cached = get_cached_description(sticker.file_unique_id)
|
||||
if cached:
|
||||
event.text = build_sticker_injection(
|
||||
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
|
||||
)
|
||||
print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
|
||||
return
|
||||
|
||||
# Cache miss -- download and analyze
|
||||
try:
|
||||
file_obj = await sticker.get_file()
|
||||
image_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
|
||||
print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
|
||||
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
import json as _json
|
||||
|
||||
result_json = await vision_analyze_tool(
|
||||
image_url=cached_path,
|
||||
user_prompt=STICKER_VISION_PROMPT,
|
||||
)
|
||||
result = _json.loads(result_json)
|
||||
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "a sticker")
|
||||
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
|
||||
event.text = build_sticker_injection(description, emoji, set_name)
|
||||
else:
|
||||
# Vision failed -- use emoji as fallback
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[Telegram] Sticker analysis error: {e}", flush=True)
|
||||
event.text = build_sticker_injection(
|
||||
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
||||
emoji, set_name,
|
||||
)
|
||||
|
||||
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Telegram message."""
|
||||
chat = message.chat
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -322,8 +323,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
print(f"[{self.name}] Cached user image: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache image: {e}", flush=True)
|
||||
cached_urls.append(url) # Fall back to original URL
|
||||
cached_urls.append(url)
|
||||
media_types.append("image/jpeg")
|
||||
elif msg_type == MessageType.VOICE and url.startswith(("http://", "https://")):
|
||||
try:
|
||||
cached_path = await cache_audio_from_url(url, ext=".ogg")
|
||||
cached_urls.append(cached_path)
|
||||
media_types.append("audio/ogg")
|
||||
print(f"[{self.name}] Cached user voice: {cached_path}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
|
||||
cached_urls.append(url)
|
||||
media_types.append("audio/ogg")
|
||||
else:
|
||||
cached_urls.append(url)
|
||||
media_types.append("unknown")
|
||||
|
|
|
|||
187
gateway/run.py
187
gateway/run.py
|
|
@ -85,6 +85,14 @@ class GatewayRunner:
|
|||
# Track pending exec approvals per session
|
||||
# Key: session_key, Value: {"command": str, "pattern_key": str}
|
||||
self._pending_approvals: Dict[str, Dict[str, str]] = {}
|
||||
|
||||
# DM pairing store for code-based user authorization
|
||||
from gateway.pairing import PairingStore
|
||||
self.pairing_store = PairingStore()
|
||||
|
||||
# Event hook system
|
||||
from gateway.hooks import HookRegistry
|
||||
self.hooks = HookRegistry()
|
||||
|
||||
async def start(self) -> bool:
|
||||
"""
|
||||
|
|
@ -95,6 +103,9 @@ class GatewayRunner:
|
|||
print("[gateway] Starting Hermes Gateway...")
|
||||
print(f"[gateway] Session storage: {self.config.sessions_dir}")
|
||||
|
||||
# Discover and load event hooks
|
||||
self.hooks.discover_and_load()
|
||||
|
||||
connected_count = 0
|
||||
|
||||
# Initialize and connect each configured platform
|
||||
|
|
@ -131,6 +142,15 @@ class GatewayRunner:
|
|||
self.delivery_router.adapters = self.adapters
|
||||
|
||||
self._running = True
|
||||
|
||||
# Emit gateway:startup hook
|
||||
hook_count = len(self.hooks.loaded_hooks)
|
||||
if hook_count:
|
||||
print(f"[gateway] {hook_count} hook(s) loaded")
|
||||
await self.hooks.emit("gateway:startup", {
|
||||
"platforms": [p.value for p in self.adapters.keys()],
|
||||
})
|
||||
|
||||
print(f"[gateway] Gateway running with {connected_count} platform(s)")
|
||||
print("[gateway] Press Ctrl+C to stop")
|
||||
|
||||
|
|
@ -183,18 +203,23 @@ class GatewayRunner:
|
|||
return None
|
||||
return WhatsAppAdapter(config)
|
||||
|
||||
elif platform == Platform.SLACK:
|
||||
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
|
||||
if not check_slack_requirements():
|
||||
print(f"[gateway] Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
|
||||
return None
|
||||
return SlackAdapter(config)
|
||||
|
||||
return None
|
||||
|
||||
def _is_user_authorized(self, source: SessionSource) -> bool:
|
||||
"""
|
||||
Check if a user is authorized to use the bot.
|
||||
|
||||
Authorization is checked via environment variables:
|
||||
- GATEWAY_ALLOWED_USERS: Comma-separated list of user IDs (all platforms)
|
||||
- TELEGRAM_ALLOWED_USERS: Telegram-specific user IDs
|
||||
- DISCORD_ALLOWED_USERS: Discord-specific user IDs
|
||||
|
||||
If no allowlist is configured, all users are allowed (open access).
|
||||
Checks in order:
|
||||
1. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
|
||||
2. DM pairing approved list
|
||||
3. If no allowlists AND no pairing approvals exist, allow all (open access)
|
||||
"""
|
||||
user_id = source.user_id
|
||||
if not user_id:
|
||||
|
|
@ -205,12 +230,18 @@ class GatewayRunner:
|
|||
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
|
||||
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
|
||||
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
|
||||
Platform.SLACK: "SLACK_ALLOWED_USERS",
|
||||
}
|
||||
|
||||
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""))
|
||||
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "")
|
||||
|
||||
# If no allowlists configured, allow all (backward compatible)
|
||||
# Check pairing store (always checked, regardless of allowlists)
|
||||
platform_name = source.platform.value if source.platform else ""
|
||||
if self.pairing_store.is_approved(platform_name, user_id):
|
||||
return True
|
||||
|
||||
# If no allowlists configured and no pairing approvals, allow all (backward compatible)
|
||||
if not platform_allowlist and not global_allowlist:
|
||||
return True
|
||||
|
||||
|
|
@ -241,7 +272,31 @@ class GatewayRunner:
|
|||
# Check if user is authorized
|
||||
if not self._is_user_authorized(source):
|
||||
print(f"[gateway] Unauthorized user: {source.user_id} ({source.user_name}) on {source.platform.value}")
|
||||
return None # Silently ignore unauthorized users
|
||||
# In DMs: offer pairing code. In groups: silently ignore.
|
||||
if source.chat_type == "dm":
|
||||
platform_name = source.platform.value if source.platform else "unknown"
|
||||
code = self.pairing_store.generate_code(
|
||||
platform_name, source.user_id, source.user_name or ""
|
||||
)
|
||||
if code:
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter:
|
||||
await adapter.send(
|
||||
source.chat_id,
|
||||
f"Hi~ I don't recognize you yet!\n\n"
|
||||
f"Here's your pairing code: `{code}`\n\n"
|
||||
f"Ask the bot owner to run:\n"
|
||||
f"`hermes pairing approve {platform_name} {code}`"
|
||||
)
|
||||
else:
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter:
|
||||
await adapter.send(
|
||||
source.chat_id,
|
||||
"Too many pairing requests right now~ "
|
||||
"Please try again later!"
|
||||
)
|
||||
return None
|
||||
|
||||
# Check for commands
|
||||
command = event.get_command()
|
||||
|
|
@ -327,7 +382,34 @@ class GatewayRunner:
|
|||
message_text, image_paths
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# Auto-transcribe voice/audio messages sent by the user
|
||||
# -----------------------------------------------------------------
|
||||
if event.media_urls:
|
||||
audio_paths = []
|
||||
for i, path in enumerate(event.media_urls):
|
||||
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
||||
is_audio = (
|
||||
mtype.startswith("audio/")
|
||||
or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
|
||||
)
|
||||
if is_audio:
|
||||
audio_paths.append(path)
|
||||
if audio_paths:
|
||||
message_text = await self._enrich_message_with_transcription(
|
||||
message_text, audio_paths
|
||||
)
|
||||
|
||||
try:
|
||||
# Emit agent:start hook
|
||||
hook_ctx = {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"session_id": session_entry.session_id,
|
||||
"message": message_text[:500],
|
||||
}
|
||||
await self.hooks.emit("agent:start", hook_ctx)
|
||||
|
||||
# Run the agent
|
||||
response = await self._run_agent(
|
||||
message=message_text,
|
||||
|
|
@ -338,6 +420,12 @@ class GatewayRunner:
|
|||
session_key=session_key
|
||||
)
|
||||
|
||||
# Emit agent:end hook
|
||||
await self.hooks.emit("agent:end", {
|
||||
**hook_ctx,
|
||||
"response": (response or "")[:500],
|
||||
})
|
||||
|
||||
# Check if the agent encountered a dangerous command needing approval
|
||||
# The terminal tool stores the last pending approval globally
|
||||
try:
|
||||
|
|
@ -382,6 +470,13 @@ class GatewayRunner:
|
|||
# Reset the session
|
||||
new_entry = self.session_store.reset_session(session_key)
|
||||
|
||||
# Emit session:reset hook
|
||||
await self.hooks.emit("session:reset", {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"session_key": session_key,
|
||||
})
|
||||
|
||||
if new_entry:
|
||||
return "✨ Session reset! I've started fresh with no memory of our previous conversation."
|
||||
else:
|
||||
|
|
@ -482,23 +577,22 @@ class GatewayRunner:
|
|||
if result.get("success"):
|
||||
description = result.get("analysis", "")
|
||||
enriched_parts.append(
|
||||
f"[User sent an image. Vision analysis:\n{description}]\n"
|
||||
f"[To examine this image further, use vision_analyze with "
|
||||
f"image_url: {path}]"
|
||||
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
||||
f"[If you need a closer look, use vision_analyze with "
|
||||
f"image_url: {path} ~]"
|
||||
)
|
||||
else:
|
||||
# Analysis failed -- still tell the model the image exists
|
||||
enriched_parts.append(
|
||||
f"[User sent an image but automatic analysis failed. "
|
||||
f"You can try analyzing it with vision_analyze using "
|
||||
f"image_url: {path}]"
|
||||
"[The user sent an image but I couldn't quite see it "
|
||||
"this time (>_<) You can try looking at it yourself "
|
||||
f"with vision_analyze using image_url: {path}]"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[gateway] Vision auto-analysis error: {e}", flush=True)
|
||||
enriched_parts.append(
|
||||
f"[User sent an image but automatic analysis encountered an error. "
|
||||
f"You can try analyzing it with vision_analyze using "
|
||||
f"image_url: {path}]"
|
||||
f"[The user sent an image but something went wrong when I "
|
||||
f"tried to look at it~ You can try examining it yourself "
|
||||
f"with vision_analyze using image_url: {path}]"
|
||||
)
|
||||
|
||||
# Combine: vision descriptions first, then the user's original text
|
||||
|
|
@ -509,6 +603,63 @@ class GatewayRunner:
|
|||
return prefix
|
||||
return user_text
|
||||
|
||||
async def _enrich_message_with_transcription(
|
||||
self,
|
||||
user_text: str,
|
||||
audio_paths: List[str],
|
||||
) -> str:
|
||||
"""
|
||||
Auto-transcribe user voice/audio messages using OpenAI Whisper API
|
||||
and prepend the transcript to the message text.
|
||||
|
||||
Args:
|
||||
user_text: The user's original caption / message text.
|
||||
audio_paths: List of local file paths to cached audio files.
|
||||
|
||||
Returns:
|
||||
The enriched message string with transcriptions prepended.
|
||||
"""
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
import asyncio
|
||||
|
||||
enriched_parts = []
|
||||
for path in audio_paths:
|
||||
try:
|
||||
print(f"[gateway] Transcribing user voice: {path}", flush=True)
|
||||
result = await asyncio.to_thread(transcribe_audio, path)
|
||||
if result["success"]:
|
||||
transcript = result["transcript"]
|
||||
enriched_parts.append(
|
||||
f'[The user sent a voice message~ '
|
||||
f'Here\'s what they said: "{transcript}"]'
|
||||
)
|
||||
else:
|
||||
error = result.get("error", "unknown error")
|
||||
if "OPENAI_API_KEY" in error:
|
||||
enriched_parts.append(
|
||||
"[The user sent a voice message but I can't listen "
|
||||
"to it right now~ OPENAI_API_KEY isn't set up yet "
|
||||
"(';w;') Let them know!]"
|
||||
)
|
||||
else:
|
||||
enriched_parts.append(
|
||||
"[The user sent a voice message but I had trouble "
|
||||
f"transcribing it~ ({error})]"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[gateway] Transcription error: {e}", flush=True)
|
||||
enriched_parts.append(
|
||||
"[The user sent a voice message but something went wrong "
|
||||
"when I tried to listen to it~ Let them know!]"
|
||||
)
|
||||
|
||||
if enriched_parts:
|
||||
prefix = "\n\n".join(enriched_parts)
|
||||
if user_text:
|
||||
return f"{prefix}\n\n{user_text}"
|
||||
return prefix
|
||||
return user_text
|
||||
|
||||
async def _run_agent(
|
||||
self,
|
||||
message: str,
|
||||
|
|
|
|||
111
gateway/sticker_cache.py
Normal file
111
gateway/sticker_cache.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""
|
||||
Sticker description cache for Telegram.
|
||||
|
||||
When users send stickers, we describe them via the vision tool and cache
|
||||
the descriptions keyed by file_unique_id so we don't re-analyze the same
|
||||
sticker image on every send. Descriptions are concise (1-2 sentences).
|
||||
|
||||
Cache location: ~/.hermes/sticker_cache.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
CACHE_PATH = Path(os.path.expanduser("~/.hermes/sticker_cache.json"))
|
||||
|
||||
# Vision prompt for describing stickers -- kept concise to save tokens
|
||||
STICKER_VISION_PROMPT = (
|
||||
"Describe this sticker in 1-2 sentences. Focus on what it depicts -- "
|
||||
"character, action, emotion. Be concise and objective."
|
||||
)
|
||||
|
||||
|
||||
def _load_cache() -> dict:
|
||||
"""Load the sticker cache from disk."""
|
||||
if CACHE_PATH.exists():
|
||||
try:
|
||||
return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def _save_cache(cache: dict) -> None:
|
||||
"""Save the sticker cache to disk."""
|
||||
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
CACHE_PATH.write_text(
|
||||
json.dumps(cache, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def get_cached_description(file_unique_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Look up a cached sticker description.
|
||||
|
||||
Returns:
|
||||
dict with keys {description, emoji, set_name, cached_at} or None.
|
||||
"""
|
||||
cache = _load_cache()
|
||||
return cache.get(file_unique_id)
|
||||
|
||||
|
||||
def cache_sticker_description(
|
||||
file_unique_id: str,
|
||||
description: str,
|
||||
emoji: str = "",
|
||||
set_name: str = "",
|
||||
) -> None:
|
||||
"""
|
||||
Store a sticker description in the cache.
|
||||
|
||||
Args:
|
||||
file_unique_id: Telegram's stable sticker identifier.
|
||||
description: Vision-generated description text.
|
||||
emoji: Associated emoji (e.g. "😀").
|
||||
set_name: Sticker set name if available.
|
||||
"""
|
||||
cache = _load_cache()
|
||||
cache[file_unique_id] = {
|
||||
"description": description,
|
||||
"emoji": emoji,
|
||||
"set_name": set_name,
|
||||
"cached_at": time.time(),
|
||||
}
|
||||
_save_cache(cache)
|
||||
|
||||
|
||||
def build_sticker_injection(
|
||||
description: str,
|
||||
emoji: str = "",
|
||||
set_name: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Build the warm-style injection text for a sticker description.
|
||||
|
||||
Returns a string like:
|
||||
[The user sent a sticker 😀 from "MyPack"~ It shows: "A cat waving" (=^.w.^=)]
|
||||
"""
|
||||
context = ""
|
||||
if set_name and emoji:
|
||||
context = f" {emoji} from \"{set_name}\""
|
||||
elif emoji:
|
||||
context = f" {emoji}"
|
||||
|
||||
return f"[The user sent a sticker{context}~ It shows: \"{description}\" (=^.w.^=)]"
|
||||
|
||||
|
||||
def build_animated_sticker_injection(emoji: str = "") -> str:
|
||||
"""
|
||||
Build injection text for animated/video stickers we can't analyze.
|
||||
"""
|
||||
if emoji:
|
||||
return (
|
||||
f"[The user sent an animated sticker {emoji}~ "
|
||||
f"I can't see animated ones yet, but the emoji suggests: {emoji}]"
|
||||
)
|
||||
return "[The user sent an animated sticker~ I can't see animated ones yet]"
|
||||
Loading…
Add table
Add a link
Reference in a new issue