fix: address PR review round 5 — streaming guard, VC auth, history prefix, auto-TTS control

1. Gate _streaming_api_call to chat_completions mode only — Anthropic and Codex fall back to _interruptible_api_call. Preserve Anthropic base_url across all client rebuild paths (interrupt, fallback, 401 refresh). 2. Discord VC synthetic events now use chat_type="channel" instead of defaulting to "dm" — prevents session bleed into DM context. Authorization runs before echoing transcript. Sanitize @everyone/@here in voice transcripts. 3. CLI voice prefix ("[Voice input...]") is now API-call-local only — stripped from returned history so it never persists to session DB or resumed sessions. 4. /voice off now disables base adapter auto-TTS via _auto_tts_disabled_chats set — voice input no longer triggers TTS when voice mode is off.
2026-03-14 10:31:49 +03:00 · 2026-03-14 10:31:49 +03:00 · cc0a453476
commit cc0a453476
parent 35748a2fb0
5 changed files with 59 additions and 22 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -2119,9 +2119,13 @@ class GatewayRunner:
        args = event.get_command_args().strip().lower()
        chat_id = event.source.chat_id

+        adapter = self.adapters.get(event.source.platform)
+
        if args in ("on", "enable"):
            self._voice_mode[chat_id] = "voice_only"
            self._save_voice_modes()
+            if adapter:
+                adapter._auto_tts_disabled_chats.discard(chat_id)
            return (
                "Voice mode enabled.\n"
                "I'll reply with voice when you send voice messages.\n"
@ -2130,10 +2134,14 @@ class GatewayRunner:
        elif args in ("off", "disable"):
            self._voice_mode.pop(chat_id, None)
            self._save_voice_modes()
+            if adapter:
+                adapter._auto_tts_disabled_chats.add(chat_id)
            return "Voice mode disabled. Text-only replies."
        elif args == "tts":
            self._voice_mode[chat_id] = "all"
            self._save_voice_modes()
+            if adapter:
+                adapter._auto_tts_disabled_chats.discard(chat_id)
            return (
                "Auto-TTS enabled.\n"
                "All replies will include a voice message."
@ -2171,10 +2179,14 @@ class GatewayRunner:
            if current == "off":
                self._voice_mode[chat_id] = "voice_only"
                self._save_voice_modes()
+                if adapter:
+                    adapter._auto_tts_disabled_chats.discard(chat_id)
                return "Voice mode enabled."
            else:
                self._voice_mode.pop(chat_id, None)
                self._save_voice_modes()
+                if adapter:
+                    adapter._auto_tts_disabled_chats.add(chat_id)
                return "Voice mode disabled."

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
@ -2211,6 +2223,7 @@ class GatewayRunner:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
            self._voice_mode[event.source.chat_id] = "all"
            self._save_voice_modes()
+            adapter._auto_tts_disabled_chats.discard(event.source.chat_id)
            return (
                f"Joined voice channel **{voice_channel.name}**.\n"
                f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
@ -2265,21 +2278,28 @@ class GatewayRunner:
        if not text_ch_id:
            return

-        # Show transcript in text channel
-        try:
-            channel = adapter._client.get_channel(text_ch_id)
-            if channel:
-                await channel.send(f"**[Voice]** <@{user_id}>: {transcript}")
-        except Exception:
-            pass
-
-        # Build a synthetic MessageEvent and feed through the normal pipeline
+        # Check authorization before processing voice input
        source = SessionSource(
            platform=Platform.DISCORD,
            chat_id=str(text_ch_id),
            user_id=str(user_id),
            user_name=str(user_id),
+            chat_type="channel",
        )
+        if not self._is_user_authorized(source):
+            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
+            return
+
+        # Show transcript in text channel (after auth, with mention sanitization)
+        try:
+            channel = adapter._client.get_channel(text_ch_id)
+            if channel:
+                safe_text = transcript[:2000].replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere")
+                await channel.send(f"**[Voice]** <@{user_id}>: {safe_text}")
+        except Exception:
+            pass
+
+        # Build a synthetic MessageEvent and feed through the normal pipeline
        # Use SimpleNamespace as raw_message so _get_guild_id() can extract
        # guild_id and _send_voice_reply() plays audio in the voice channel.
        from types import SimpleNamespace