fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet to prevent race condition with check_silence on different threads - Wire _voice_input_callback BEFORE join_voice_channel to avoid losing voice input during the join window - Add try/except around leave_voice_channel to ensure state cleanup (voice_mode, callback) even if leave raises an exception - Guard against empty text after markdown stripping in base.py auto-TTS - Add 11 tests proving each bug and verifying the fix
2026-03-11 23:36:47 +03:00 · 2026-03-11 23:36:47 +03:00 · c925d2ee76
commit c925d2ee76
parent 34c324ff59
4 changed files with 275 additions and 23 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -739,7 +739,9 @@ class BasePlatformAdapter(ABC):
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
                            import json as _json
-                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000]
+                            speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
+                            if not speech_text:
+                                raise ValueError("Empty text after markdown cleanup")
                            tts_result_str = await asyncio.to_thread(
                                text_to_speech_tool, text=speech_text
                            )
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -289,8 +289,9 @@ class VoiceReceiver:
            if ssrc not in self._decoders:
                self._decoders[ssrc] = discord.opus.Decoder()
            pcm = self._decoders[ssrc].decode(decrypted)
-            self._buffers[ssrc].extend(pcm)
-            self._last_packet_time[ssrc] = time.monotonic()
+            with self._lock:
+                self._buffers[ssrc].extend(pcm)
+                self._last_packet_time[ssrc] = time.monotonic()
        except Exception:
            return

@ -305,24 +306,25 @@ class VoiceReceiver:

        with self._lock:
            ssrc_user_map = dict(self._ssrc_to_user)
+            ssrc_list = list(self._buffers.keys())

-        for ssrc in list(self._buffers.keys()):
-            last_time = self._last_packet_time.get(ssrc, now)
-            silence_duration = now - last_time
-            buf = self._buffers[ssrc]
-            # 48kHz, 16-bit, stereo = 192000 bytes/sec
-            buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
+            for ssrc in ssrc_list:
+                last_time = self._last_packet_time.get(ssrc, now)
+                silence_duration = now - last_time
+                buf = self._buffers[ssrc]
+                # 48kHz, 16-bit, stereo = 192000 bytes/sec
+                buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)

-            if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
-                user_id = ssrc_user_map.get(ssrc, 0)
-                if user_id:
-                    completed.append((user_id, bytes(buf)))
-                self._buffers[ssrc] = bytearray()
-                self._last_packet_time.pop(ssrc, None)
-            elif silence_duration >= self.SILENCE_THRESHOLD * 2:
-                # Stale buffer with no valid user — discard
-                self._buffers.pop(ssrc, None)
-                self._last_packet_time.pop(ssrc, None)
+                if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
+                    user_id = ssrc_user_map.get(ssrc, 0)
+                    if user_id:
+                        completed.append((user_id, bytes(buf)))
+                    self._buffers[ssrc] = bytearray()
+                    self._last_packet_time.pop(ssrc, None)
+                elif silence_duration >= self.SILENCE_THRESHOLD * 2:
+                    # Stale buffer with no valid user — discard
+                    self._buffers.pop(ssrc, None)
+                    self._last_packet_time.pop(ssrc, None)

        return completed