fix: voice pipeline thread safety and error handling bugs

- Add lock protection around VoiceReceiver buffer writes in _on_packet
  to prevent race condition with check_silence on different threads
- Wire _voice_input_callback BEFORE join_voice_channel to avoid
  losing voice input during the join window
- Add try/except around leave_voice_channel to ensure state cleanup
  (voice_mode, callback) even if leave raises an exception
- Guard against empty text after markdown stripping in base.py auto-TTS
- Add 11 tests proving each bug and verifying the fix
This commit is contained in:
0xbyt4 2026-03-11 23:36:47 +03:00
parent 34c324ff59
commit c925d2ee76
4 changed files with 275 additions and 23 deletions

View file

@ -739,7 +739,9 @@ class BasePlatformAdapter(ABC):
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():
import json as _json
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000]
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
if not speech_text:
raise ValueError("Empty text after markdown cleanup")
tts_result_str = await asyncio.to_thread(
text_to_speech_tool, text=speech_text
)

View file

@ -289,8 +289,9 @@ class VoiceReceiver:
if ssrc not in self._decoders:
self._decoders[ssrc] = discord.opus.Decoder()
pcm = self._decoders[ssrc].decode(decrypted)
self._buffers[ssrc].extend(pcm)
self._last_packet_time[ssrc] = time.monotonic()
with self._lock:
self._buffers[ssrc].extend(pcm)
self._last_packet_time[ssrc] = time.monotonic()
except Exception:
return
@ -305,24 +306,25 @@ class VoiceReceiver:
with self._lock:
ssrc_user_map = dict(self._ssrc_to_user)
ssrc_list = list(self._buffers.keys())
for ssrc in list(self._buffers.keys()):
last_time = self._last_packet_time.get(ssrc, now)
silence_duration = now - last_time
buf = self._buffers[ssrc]
# 48kHz, 16-bit, stereo = 192000 bytes/sec
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
for ssrc in ssrc_list:
last_time = self._last_packet_time.get(ssrc, now)
silence_duration = now - last_time
buf = self._buffers[ssrc]
# 48kHz, 16-bit, stereo = 192000 bytes/sec
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
user_id = ssrc_user_map.get(ssrc, 0)
if user_id:
completed.append((user_id, bytes(buf)))
self._buffers[ssrc] = bytearray()
self._last_packet_time.pop(ssrc, None)
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
# Stale buffer with no valid user — discard
self._buffers.pop(ssrc, None)
self._last_packet_time.pop(ssrc, None)
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
user_id = ssrc_user_map.get(ssrc, 0)
if user_id:
completed.append((user_id, bytes(buf)))
self._buffers[ssrc] = bytearray()
self._last_packet_time.pop(ssrc, None)
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
# Stale buffer with no valid user — discard
self._buffers.pop(ssrc, None)
self._last_packet_time.pop(ssrc, None)
return completed

View file

@ -2190,23 +2190,28 @@ class GatewayRunner:
if not voice_channel:
return "You need to be in a voice channel first."
# Wire callback BEFORE join so voice input arriving immediately
# after connection is not lost.
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = self._handle_voice_channel_input
try:
success = await adapter.join_voice_channel(voice_channel)
except Exception as e:
logger.warning("Failed to join voice channel: %s", e)
adapter._voice_input_callback = None
return f"Failed to join voice channel: {e}"
if success:
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
self._voice_mode[event.source.chat_id] = "all"
self._save_voice_modes()
# Wire voice input callback so the adapter can deliver transcripts
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = self._handle_voice_channel_input
return (
f"Joined voice channel **{voice_channel.name}**.\n"
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
)
# Join failed — clear callback
adapter._voice_input_callback = None
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
@ -2220,9 +2225,15 @@ class GatewayRunner:
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
return "Not in a voice channel."
await adapter.leave_voice_channel(guild_id)
try:
await adapter.leave_voice_channel(guild_id)
except Exception as e:
logger.warning("Error leaving voice channel: %s", e)
# Always clean up state even if leave raised an exception
self._voice_mode.pop(event.source.chat_id, None)
self._save_voice_modes()
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = None
return "Left voice channel."
async def _handle_voice_channel_input(