From 9722bd8be0254e175361b92b8477cb2167b69d0a Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Wed, 11 Mar 2026 23:57:42 +0300
Subject: [PATCH] fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)
---
 gateway/platforms/base.py           |  16 +-
 gateway/platforms/discord.py        |  53 +++-
 gateway/run.py                      |  24 +-
 tests/gateway/test_voice_command.py | 450 ++++++++++++++++++++++++++++
 4 files changed, 517 insertions(+), 26 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 940f360e..c3abaa69 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -752,11 +752,17 @@ class BasePlatformAdapter(ABC):
 
                 # Play TTS audio before text (voice-first experience)
                 if _tts_path and Path(_tts_path).exists():
-                    await self.play_tts(
-                        chat_id=event.source.chat_id,
-                        audio_path=_tts_path,
-                        metadata=_thread_metadata,
-                    )
+                    try:
+                        await self.play_tts(
+                            chat_id=event.source.chat_id,
+                            audio_path=_tts_path,
+                            metadata=_thread_metadata,
+                        )
+                    finally:
+                        try:
+                            os.remove(_tts_path)
+                        except OSError:
+                            pass
 
                 # Send the text portion
                 if text_content:
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index cdf622cf..2679facb 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -108,6 +108,9 @@ class VoiceReceiver:
         # Pause flag: don't capture while bot is playing TTS
         self._paused = False
 
+        # Debug logging counter (instance-level to avoid cross-instance races)
+        self._packet_debug_count = 0
+
     # ------------------------------------------------------------------
     # Lifecycle
     # ------------------------------------------------------------------
@@ -131,10 +134,11 @@ class VoiceReceiver:
             self._vc._connection.remove_socket_listener(self._on_packet)
         except Exception:
             pass
-        self._buffers.clear()
-        self._last_packet_time.clear()
-        self._decoders.clear()
-        self._ssrc_to_user.clear()
+        with self._lock:
+            self._buffers.clear()
+            self._last_packet_time.clear()
+            self._decoders.clear()
+            self._ssrc_to_user.clear()
         logger.info("VoiceReceiver stopped")
 
     def pause(self):
@@ -188,15 +192,13 @@ class VoiceReceiver:
     # Packet handler (called from SocketReader thread)
     # ------------------------------------------------------------------
 
-    _packet_debug_count = 0  # class-level counter for debug logging
-
     def _on_packet(self, data: bytes):
         if not self._running or self._paused:
             return
 
         # Log first few raw packets for debugging
-        VoiceReceiver._packet_debug_count += 1
-        if VoiceReceiver._packet_debug_count <= 5:
+        self._packet_debug_count += 1
+        if self._packet_debug_count <= 5:
             logger.info(
                 "Raw UDP packet: len=%d, first_bytes=%s",
                 len(data), data[:4].hex() if len(data) >= 4 else "short",
@@ -209,7 +211,7 @@ class VoiceReceiver:
         # Lower bits may vary (padding, extension, CSRC count).
         # Payload type (byte 1 lower 7 bits) = 0x78 (120) for voice.
         if (data[0] >> 6) != 2 or (data[1] & 0x7F) != 0x78:
-            if VoiceReceiver._packet_debug_count <= 5:
+            if self._packet_debug_count <= 5:
                 logger.info("Skipped non-RTP: byte0=0x%02x byte1=0x%02x", data[0], data[1])
             return
 
@@ -235,7 +237,7 @@ class VoiceReceiver:
             ext_words = struct.unpack_from(">H", data, ext_preamble_offset + 2)[0]
             ext_data_len = ext_words * 4
 
-        if VoiceReceiver._packet_debug_count <= 10:
+        if self._packet_debug_count <= 10:
             with self._lock:
                 known_user = self._ssrc_to_user.get(ssrc, "unknown")
             logger.info(
@@ -258,7 +260,7 @@ class VoiceReceiver:
             box = nacl.secret.Aead(self._secret_key)
             decrypted = box.decrypt(encrypted, header, bytes(nonce))
         except Exception as e:
-            if VoiceReceiver._packet_debug_count <= 10:
+            if self._packet_debug_count <= 10:
                 logger.warning("NaCl decrypt failed: %s (hdr=%d, enc=%d)", e, header_size, len(encrypted))
             return
 
@@ -271,7 +273,7 @@ class VoiceReceiver:
             with self._lock:
                 user_id = self._ssrc_to_user.get(ssrc, 0)
             if user_id == 0:
-                if VoiceReceiver._packet_debug_count <= 10:
+                if self._packet_debug_count <= 10:
                     logger.warning("DAVE skip: unknown user for ssrc=%d", ssrc)
                 return  # unknown user, can't DAVE-decrypt
             try:
@@ -280,7 +282,7 @@ class VoiceReceiver:
                     user_id, davey.MediaType.audio, decrypted
                 )
             except Exception as e:
-                if VoiceReceiver._packet_debug_count <= 10:
+                if self._packet_debug_count <= 10:
                     logger.warning("DAVE decrypt failed for ssrc=%d: %s", ssrc, e)
                 return
 
@@ -394,6 +396,7 @@ class DiscordAdapter(BasePlatformAdapter):
         self._voice_receivers: Dict[int, VoiceReceiver] = {}  # guild_id -> VoiceReceiver
         self._voice_listen_tasks: Dict[int, asyncio.Task] = {}  # guild_id -> listen loop
         self._voice_input_callback: Optional[Callable] = None  # set by run.py
+        self._on_voice_disconnect: Optional[Callable] = None  # set by run.py
     
     async def connect(self) -> bool:
         """Connect to Discord and start receiving events."""
@@ -751,6 +754,9 @@ class DiscordAdapter(BasePlatformAdapter):
             task.cancel()
         self._voice_text_channels.pop(guild_id, None)
 
+    # Maximum seconds to wait for voice playback before giving up
+    PLAYBACK_TIMEOUT = 120
+
     async def play_in_voice_channel(self, guild_id: int, audio_path: str) -> bool:
         """Play an audio file in the connected voice channel."""
         vc = self._voice_clients.get(guild_id)
@@ -763,12 +769,17 @@ class DiscordAdapter(BasePlatformAdapter):
             receiver.pause()
 
         try:
-            # Wait for current playback to finish
+            # Wait for current playback to finish (with timeout)
+            wait_start = time.monotonic()
             while vc.is_playing():
+                if time.monotonic() - wait_start > self.PLAYBACK_TIMEOUT:
+                    logger.warning("Timed out waiting for previous playback to finish")
+                    vc.stop()
+                    break
                 await asyncio.sleep(0.1)
 
             done = asyncio.Event()
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
 
             def _after(error):
                 if error:
@@ -778,7 +789,11 @@ class DiscordAdapter(BasePlatformAdapter):
             source = discord.FFmpegPCMAudio(audio_path)
             source = discord.PCMVolumeTransformer(source, volume=1.0)
             vc.play(source, after=_after)
-            await done.wait()
+            try:
+                await asyncio.wait_for(done.wait(), timeout=self.PLAYBACK_TIMEOUT)
+            except asyncio.TimeoutError:
+                logger.warning("Voice playback timed out after %ds", self.PLAYBACK_TIMEOUT)
+                vc.stop()
             self._reset_voice_timeout(guild_id)
             return True
         finally:
@@ -814,6 +829,12 @@ class DiscordAdapter(BasePlatformAdapter):
             return
         text_ch_id = self._voice_text_channels.get(guild_id)
         await self.leave_voice_channel(guild_id)
+        # Notify the runner so it can clean up voice_mode state
+        if self._on_voice_disconnect and text_ch_id:
+            try:
+                self._on_voice_disconnect(str(text_ch_id))
+            except Exception:
+                pass
         if text_ch_id and self._client:
             ch = self._client.get_channel(text_ch_id)
             if ch:
diff --git a/gateway/run.py b/gateway/run.py
index b77da6a2..ae5852ed 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2190,10 +2190,12 @@ class GatewayRunner:
         if not voice_channel:
             return "You need to be in a voice channel first."
 
-        # Wire callback BEFORE join so voice input arriving immediately
+        # Wire callbacks BEFORE join so voice input arriving immediately
         # after connection is not lost.
         if hasattr(adapter, "_voice_input_callback"):
             adapter._voice_input_callback = self._handle_voice_channel_input
+        if hasattr(adapter, "_on_voice_disconnect"):
+            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
 
         try:
             success = await adapter.join_voice_channel(voice_channel)
@@ -2236,6 +2238,14 @@ class GatewayRunner:
             adapter._voice_input_callback = None
         return "Left voice channel."
 
+    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
+        """Called by the adapter when a voice channel times out.
+
+        Cleans up runner-side voice_mode state that the adapter cannot reach.
+        """
+        self._voice_mode.pop(chat_id, None)
+        self._save_voice_modes()
+
     async def _handle_voice_channel_input(
         self, guild_id: int, user_id: int, transcript: str
     ):
@@ -2339,6 +2349,9 @@ class GatewayRunner:
 
     async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
         """Generate TTS audio and send as a voice message before the text reply."""
+        import uuid as _uuid
+        audio_path = None
+        actual_path = None
         try:
             from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
 
@@ -2350,7 +2363,7 @@ class GatewayRunner:
             # The TTS tool may convert to .ogg — use file_path from result.
             audio_path = os.path.join(
                 tempfile.gettempdir(), "hermes_voice",
-                f"tts_reply_{int(time.time())}_{id(event) % 10000}.mp3",
+                f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
             )
             os.makedirs(os.path.dirname(audio_path), exist_ok=True)
 
@@ -2387,13 +2400,14 @@ class GatewayRunner:
                 if "metadata" not in sig.parameters:
                     send_kwargs.pop("metadata", None)
                 await adapter.send_voice(**send_kwargs)
-            for p in {audio_path, actual_path}:
+        except Exception as e:
+            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
+        finally:
+            for p in {audio_path, actual_path} - {None}:
                 try:
                     os.unlink(p)
                 except OSError:
                     pass
-        except Exception as e:
-            logger.warning("Auto voice reply failed: %s", e, exc_info=True)
 
     async def _handle_rollback_command(self, event: MessageEvent) -> str:
         """Handle /rollback command — list or restore filesystem checkpoints."""
diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py
index c39844ff..e6fc2731 100644
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -1402,3 +1402,453 @@ class TestStreamTtsToSpeaker:
         t.join(timeout=5)
         assert done_evt.is_set()
         assert len(spoken) >= 1
+
+
+# =====================================================================
+# Bug 1: VoiceReceiver.stop() must hold lock while clearing shared state
+# =====================================================================
+
+class TestStopAcquiresLock:
+    """stop() must acquire _lock before clearing buffers/state."""
+
+    @staticmethod
+    def _make_receiver():
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = None
+        vc._connection.ssrc = 1
+        return VoiceReceiver(vc)
+
+    def test_stop_clears_under_lock(self):
+        """stop() acquires _lock before clearing buffers.
+
+        Verify by holding the lock from another thread and checking that
+        stop() blocks until the lock is released.
+        """
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver._buffers[100] = bytearray(b"\x00" * 500)
+        receiver._last_packet_time[100] = time.monotonic()
+        receiver.map_ssrc(100, 42)
+
+        # Hold the lock from another thread
+        lock_acquired = threading.Event()
+        release_lock = threading.Event()
+
+        def hold_lock():
+            with receiver._lock:
+                lock_acquired.set()
+                release_lock.wait(timeout=5)
+
+        holder = threading.Thread(target=hold_lock, daemon=True)
+        holder.start()
+        lock_acquired.wait(timeout=2)
+
+        # stop() in another thread — should block on the lock
+        stop_done = threading.Event()
+
+        def do_stop():
+            receiver.stop()
+            stop_done.set()
+
+        stopper = threading.Thread(target=do_stop, daemon=True)
+        stopper.start()
+
+        # stop should NOT complete while lock is held
+        assert not stop_done.wait(timeout=0.3), \
+            "stop() should block while _lock is held by another thread"
+
+        # Release the lock — stop should complete
+        release_lock.set()
+        assert stop_done.wait(timeout=2), \
+            "stop() should complete after lock is released"
+
+        # State should be cleared
+        assert len(receiver._buffers) == 0
+        assert len(receiver._ssrc_to_user) == 0
+        holder.join(timeout=2)
+        stopper.join(timeout=2)
+
+    def test_stop_does_not_deadlock_with_on_packet(self):
+        """stop() during _on_packet should not deadlock."""
+        receiver = self._make_receiver()
+        receiver.start()
+
+        blocked = threading.Event()
+        released = threading.Event()
+
+        def hold_lock():
+            with receiver._lock:
+                blocked.set()
+                released.wait(timeout=2)
+
+        t = threading.Thread(target=hold_lock, daemon=True)
+        t.start()
+        blocked.wait(timeout=2)
+
+        stop_done = threading.Event()
+
+        def do_stop():
+            receiver.stop()
+            stop_done.set()
+
+        t2 = threading.Thread(target=do_stop, daemon=True)
+        t2.start()
+
+        # stop should be blocked waiting for lock
+        assert not stop_done.wait(timeout=0.2), \
+            "stop() should wait for lock, not clear without it"
+
+        released.set()
+        assert stop_done.wait(timeout=2), "stop() should complete after lock released"
+        t.join(timeout=2)
+        t2.join(timeout=2)
+
+
+# =====================================================================
+# Bug 2: _packet_debug_count must be instance-level, not class-level
+# =====================================================================
+
+class TestPacketDebugCounterIsInstanceLevel:
+    """Each VoiceReceiver instance has its own debug counter."""
+
+    @staticmethod
+    def _make_receiver():
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = None
+        vc._connection.ssrc = 1
+        return VoiceReceiver(vc)
+
+    def test_counter_is_per_instance(self):
+        """Two receivers have independent counters."""
+        r1 = self._make_receiver()
+        r2 = self._make_receiver()
+
+        r1._packet_debug_count = 10
+        assert r2._packet_debug_count == 0, \
+            "_packet_debug_count must be instance-level, not shared across instances"
+
+    def test_counter_initialized_in_init(self):
+        """Counter is set in __init__, not as a class variable."""
+        r = self._make_receiver()
+        assert "_packet_debug_count" in r.__dict__, \
+            "_packet_debug_count should be in instance __dict__, not class"
+
+
+# =====================================================================
+# Bug 3: play_in_voice_channel uses get_running_loop not get_event_loop
+# =====================================================================
+
+class TestPlayInVoiceChannelUsesRunningLoop:
+    """play_in_voice_channel must use asyncio.get_running_loop()."""
+
+    def test_source_uses_get_running_loop(self):
+        """The method source code calls get_running_loop, not get_event_loop."""
+        import inspect
+        from gateway.platforms.discord import DiscordAdapter
+        source = inspect.getsource(DiscordAdapter.play_in_voice_channel)
+        assert "get_running_loop" in source, \
+            "play_in_voice_channel should use asyncio.get_running_loop()"
+        assert "get_event_loop" not in source, \
+            "play_in_voice_channel should NOT use deprecated asyncio.get_event_loop()"
+
+
+# =====================================================================
+# Bug 4: _send_voice_reply filename uses uuid (no collision)
+# =====================================================================
+
+class TestSendVoiceReplyFilename:
+    """_send_voice_reply uses uuid for unique filenames."""
+
+    def test_filename_uses_uuid(self):
+        """The method uses uuid in the filename, not time-based."""
+        import inspect
+        from gateway.run import GatewayRunner
+        source = inspect.getsource(GatewayRunner._send_voice_reply)
+        assert "uuid" in source, \
+            "_send_voice_reply should use uuid for unique filenames"
+        assert "int(time.time())" not in source, \
+            "_send_voice_reply should not use int(time.time()) — collision risk"
+
+    def test_filenames_are_unique(self):
+        """Two calls produce different filenames."""
+        import uuid
+        names = set()
+        for _ in range(100):
+            name = f"tts_reply_{uuid.uuid4().hex[:12]}.mp3"
+            assert name not in names, f"Collision detected: {name}"
+            names.add(name)
+
+
+# =====================================================================
+# Bug 5: Voice timeout cleans up runner voice_mode via callback
+# =====================================================================
+
+class TestVoiceTimeoutCleansRunnerState:
+    """Timeout disconnect notifies runner to clean voice_mode."""
+
+    @staticmethod
+    def _make_discord_adapter():
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake-token"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_timeout_tasks = {}
+        adapter._voice_receivers = {}
+        adapter._voice_listen_tasks = {}
+        adapter._voice_input_callback = None
+        adapter._on_voice_disconnect = None
+        adapter._client = None
+        adapter._broadcast = AsyncMock()
+        adapter._allowed_user_ids = set()
+        return adapter
+
+    @pytest.fixture
+    def adapter(self):
+        return self._make_discord_adapter()
+
+    def test_adapter_has_on_voice_disconnect_attr(self, adapter):
+        """DiscordAdapter has _on_voice_disconnect callback attribute."""
+        assert hasattr(adapter, "_on_voice_disconnect")
+        assert adapter._on_voice_disconnect is None
+
+    @pytest.mark.asyncio
+    async def test_timeout_calls_disconnect_callback(self, adapter):
+        """_voice_timeout_handler calls _on_voice_disconnect with chat_id."""
+        callback_calls = []
+        adapter._on_voice_disconnect = lambda chat_id: callback_calls.append(chat_id)
+
+        # Set up state as if we're in a voice channel
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        mock_vc.disconnect = AsyncMock()
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 999
+        adapter._voice_timeout_tasks[111] = MagicMock()
+        adapter._voice_receivers[111] = MagicMock()
+        adapter._voice_listen_tasks[111] = MagicMock()
+
+        # Patch sleep to return immediately
+        with patch("asyncio.sleep", new_callable=AsyncMock):
+            await adapter._voice_timeout_handler(111)
+
+        assert "999" in callback_calls, \
+            "_on_voice_disconnect must be called with chat_id on timeout"
+
+    @pytest.mark.asyncio
+    async def test_runner_cleanup_method_removes_voice_mode(self, tmp_path):
+        """_handle_voice_timeout_cleanup removes voice_mode for chat."""
+        runner = _make_runner(tmp_path)
+        runner._voice_mode["999"] = "all"
+
+        runner._handle_voice_timeout_cleanup("999")
+
+        assert "999" not in runner._voice_mode, \
+            "voice_mode must be removed after timeout cleanup"
+
+    @pytest.mark.asyncio
+    async def test_timeout_without_callback_does_not_crash(self, adapter):
+        """Timeout works even without _on_voice_disconnect set."""
+        adapter._on_voice_disconnect = None
+
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        mock_vc.disconnect = AsyncMock()
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 999
+        adapter._voice_timeout_tasks[111] = MagicMock()
+
+        with patch("asyncio.sleep", new_callable=AsyncMock):
+            await adapter._voice_timeout_handler(111)
+
+        assert 111 not in adapter._voice_clients
+
+
+# =====================================================================
+# Bug 6: play_in_voice_channel has playback timeout
+# =====================================================================
+
+class TestPlaybackTimeout:
+    """play_in_voice_channel must time out instead of blocking forever."""
+
+    @staticmethod
+    def _make_discord_adapter():
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake-token"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_timeout_tasks = {}
+        adapter._voice_receivers = {}
+        adapter._voice_listen_tasks = {}
+        adapter._voice_input_callback = None
+        adapter._on_voice_disconnect = None
+        adapter._client = None
+        adapter._broadcast = AsyncMock()
+        adapter._allowed_user_ids = set()
+        return adapter
+
+    def test_source_has_wait_for_timeout(self):
+        """The method uses asyncio.wait_for with timeout."""
+        import inspect
+        from gateway.platforms.discord import DiscordAdapter
+        source = inspect.getsource(DiscordAdapter.play_in_voice_channel)
+        assert "wait_for" in source, \
+            "play_in_voice_channel must use asyncio.wait_for for timeout"
+        assert "PLAYBACK_TIMEOUT" in source, \
+            "play_in_voice_channel must reference PLAYBACK_TIMEOUT constant"
+
+    def test_playback_timeout_constant_exists(self):
+        """PLAYBACK_TIMEOUT constant is defined on DiscordAdapter."""
+        from gateway.platforms.discord import DiscordAdapter
+        assert hasattr(DiscordAdapter, "PLAYBACK_TIMEOUT")
+        assert DiscordAdapter.PLAYBACK_TIMEOUT > 0
+
+    @pytest.mark.asyncio
+    async def test_playback_timeout_fires(self):
+        """When done event is never set, playback times out gracefully."""
+        from gateway.platforms.discord import DiscordAdapter
+        adapter = self._make_discord_adapter()
+
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        mock_vc.is_playing.return_value = False
+        # play() never calls the after callback -> done never set
+        mock_vc.play = MagicMock()
+        mock_vc.stop = MagicMock()
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_timeout_tasks[111] = MagicMock()
+
+        # Use a tiny timeout for test speed
+        original_timeout = DiscordAdapter.PLAYBACK_TIMEOUT
+        DiscordAdapter.PLAYBACK_TIMEOUT = 0.1
+        try:
+            with patch("discord.FFmpegPCMAudio"), \
+                 patch("discord.PCMVolumeTransformer", side_effect=lambda s, **kw: s):
+                result = await adapter.play_in_voice_channel(111, "/tmp/test.mp3")
+            assert result is True
+            # vc.stop() should have been called due to timeout
+            mock_vc.stop.assert_called()
+        finally:
+            DiscordAdapter.PLAYBACK_TIMEOUT = original_timeout
+
+    @pytest.mark.asyncio
+    async def test_is_playing_wait_has_timeout(self):
+        """While loop waiting for previous playback has a timeout."""
+        from gateway.platforms.discord import DiscordAdapter
+        adapter = self._make_discord_adapter()
+
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        # is_playing always returns True — would loop forever without timeout
+        mock_vc.is_playing.return_value = True
+        mock_vc.stop = MagicMock()
+        mock_vc.play = MagicMock()
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_timeout_tasks[111] = MagicMock()
+
+        original_timeout = DiscordAdapter.PLAYBACK_TIMEOUT
+        DiscordAdapter.PLAYBACK_TIMEOUT = 0.2
+        try:
+            with patch("discord.FFmpegPCMAudio"), \
+                 patch("discord.PCMVolumeTransformer", side_effect=lambda s, **kw: s):
+                result = await adapter.play_in_voice_channel(111, "/tmp/test.mp3")
+            assert result is True
+            # stop() called to break out of the is_playing loop
+            mock_vc.stop.assert_called()
+        finally:
+            DiscordAdapter.PLAYBACK_TIMEOUT = original_timeout
+
+
+# =====================================================================
+# Bug 7: _send_voice_reply cleanup in finally block
+# =====================================================================
+
+class TestSendVoiceReplyCleanup:
+    """_send_voice_reply must clean up temp files even on exception."""
+
+    def test_cleanup_in_finally(self):
+        """The method has cleanup in a finally block, not inside try."""
+        import inspect, textwrap, ast
+        from gateway.run import GatewayRunner
+        source = textwrap.dedent(inspect.getsource(GatewayRunner._send_voice_reply))
+        tree = ast.parse(source)
+        func = tree.body[0]
+
+        has_finally_unlink = False
+        for node in ast.walk(func):
+            if isinstance(node, ast.Try) and node.finalbody:
+                finally_source = ast.dump(node.finalbody[0])
+                if "unlink" in finally_source or "remove" in finally_source:
+                    has_finally_unlink = True
+                    break
+
+        assert has_finally_unlink, \
+            "_send_voice_reply must have os.unlink in a finally block"
+
+    @pytest.mark.asyncio
+    async def test_files_cleaned_on_send_exception(self, tmp_path):
+        """Temp files are removed even when send_voice raises."""
+        runner = _make_runner(tmp_path)
+        adapter = MagicMock()
+        adapter.send_voice = AsyncMock(side_effect=RuntimeError("send failed"))
+        adapter.is_in_voice_channel = MagicMock(return_value=False)
+        event = _make_event(message_type=MessageType.VOICE)
+        runner.adapters[event.source.platform] = adapter
+        runner._get_guild_id = MagicMock(return_value=None)
+
+        # Create a fake audio file that TTS would produce
+        fake_audio = tmp_path / "hermes_voice"
+        fake_audio.mkdir()
+        audio_file = fake_audio / "test.mp3"
+        audio_file.write_bytes(b"fake audio")
+
+        tts_result = json.dumps({
+            "success": True,
+            "file_path": str(audio_file),
+        })
+
+        with patch("gateway.run.asyncio.to_thread", new_callable=AsyncMock, return_value=tts_result), \
+             patch("tools.tts_tool._strip_markdown_for_tts", return_value="hello"), \
+             patch("os.path.isfile", return_value=True), \
+             patch("os.makedirs"):
+            await runner._send_voice_reply(event, "Hello world")
+
+        # File should be cleaned up despite exception
+        assert not audio_file.exists(), \
+            "Temp audio file must be cleaned up even when send_voice raises"
+
+
+# =====================================================================
+# Bug 8: Base adapter auto-TTS cleans up temp file after play_tts
+# =====================================================================
+
+class TestAutoTtsTempFileCleanup:
+    """Base adapter auto-TTS must clean up generated audio file."""
+
+    def test_source_has_finally_remove(self):
+        """play_tts call is wrapped in try/finally with os.remove."""
+        import inspect
+        from gateway.platforms.base import BasePlatformAdapter
+        source = inspect.getsource(BasePlatformAdapter._process_message_background)
+        # Find the play_tts section and verify cleanup
+        play_tts_idx = source.find("play_tts")
+        assert play_tts_idx > 0
+        after_play = source[play_tts_idx:]
+        finally_idx = after_play.find("finally")
+        remove_idx = after_play.find("os.remove")
+        assert finally_idx > 0, "play_tts must be in a try/finally block"
+        assert remove_idx > 0, "finally block must call os.remove on _tts_path"
+        assert remove_idx > finally_idx, "os.remove must be inside the finally block"