fix: voice pipeline thread safety and error handling bugs
- Add lock protection around VoiceReceiver buffer writes in _on_packet to prevent race condition with check_silence on different threads - Wire _voice_input_callback BEFORE join_voice_channel to avoid losing voice input during the join window - Add try/except around leave_voice_channel to ensure state cleanup (voice_mode, callback) even if leave raises an exception - Guard against empty text after markdown stripping in base.py auto-TTS - Add 11 tests proving each bug and verifying the fix
This commit is contained in:
parent
34c324ff59
commit
c925d2ee76
4 changed files with 275 additions and 23 deletions
|
|
@ -739,7 +739,9 @@ class BasePlatformAdapter(ABC):
|
|||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
import json as _json
|
||||
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000]
|
||||
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
|
||||
if not speech_text:
|
||||
raise ValueError("Empty text after markdown cleanup")
|
||||
tts_result_str = await asyncio.to_thread(
|
||||
text_to_speech_tool, text=speech_text
|
||||
)
|
||||
|
|
|
|||
|
|
@ -289,8 +289,9 @@ class VoiceReceiver:
|
|||
if ssrc not in self._decoders:
|
||||
self._decoders[ssrc] = discord.opus.Decoder()
|
||||
pcm = self._decoders[ssrc].decode(decrypted)
|
||||
self._buffers[ssrc].extend(pcm)
|
||||
self._last_packet_time[ssrc] = time.monotonic()
|
||||
with self._lock:
|
||||
self._buffers[ssrc].extend(pcm)
|
||||
self._last_packet_time[ssrc] = time.monotonic()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
|
@ -305,24 +306,25 @@ class VoiceReceiver:
|
|||
|
||||
with self._lock:
|
||||
ssrc_user_map = dict(self._ssrc_to_user)
|
||||
ssrc_list = list(self._buffers.keys())
|
||||
|
||||
for ssrc in list(self._buffers.keys()):
|
||||
last_time = self._last_packet_time.get(ssrc, now)
|
||||
silence_duration = now - last_time
|
||||
buf = self._buffers[ssrc]
|
||||
# 48kHz, 16-bit, stereo = 192000 bytes/sec
|
||||
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
|
||||
for ssrc in ssrc_list:
|
||||
last_time = self._last_packet_time.get(ssrc, now)
|
||||
silence_duration = now - last_time
|
||||
buf = self._buffers[ssrc]
|
||||
# 48kHz, 16-bit, stereo = 192000 bytes/sec
|
||||
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
|
||||
|
||||
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
|
||||
user_id = ssrc_user_map.get(ssrc, 0)
|
||||
if user_id:
|
||||
completed.append((user_id, bytes(buf)))
|
||||
self._buffers[ssrc] = bytearray()
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
|
||||
# Stale buffer with no valid user — discard
|
||||
self._buffers.pop(ssrc, None)
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
|
||||
user_id = ssrc_user_map.get(ssrc, 0)
|
||||
if user_id:
|
||||
completed.append((user_id, bytes(buf)))
|
||||
self._buffers[ssrc] = bytearray()
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
|
||||
# Stale buffer with no valid user — discard
|
||||
self._buffers.pop(ssrc, None)
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
|
||||
return completed
|
||||
|
||||
|
|
|
|||
|
|
@ -2190,23 +2190,28 @@ class GatewayRunner:
|
|||
if not voice_channel:
|
||||
return "You need to be in a voice channel first."
|
||||
|
||||
# Wire callback BEFORE join so voice input arriving immediately
|
||||
# after connection is not lost.
|
||||
if hasattr(adapter, "_voice_input_callback"):
|
||||
adapter._voice_input_callback = self._handle_voice_channel_input
|
||||
|
||||
try:
|
||||
success = await adapter.join_voice_channel(voice_channel)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to join voice channel: %s", e)
|
||||
adapter._voice_input_callback = None
|
||||
return f"Failed to join voice channel: {e}"
|
||||
|
||||
if success:
|
||||
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
|
||||
self._voice_mode[event.source.chat_id] = "all"
|
||||
self._save_voice_modes()
|
||||
# Wire voice input callback so the adapter can deliver transcripts
|
||||
if hasattr(adapter, "_voice_input_callback"):
|
||||
adapter._voice_input_callback = self._handle_voice_channel_input
|
||||
return (
|
||||
f"Joined voice channel **{voice_channel.name}**.\n"
|
||||
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
|
||||
)
|
||||
# Join failed — clear callback
|
||||
adapter._voice_input_callback = None
|
||||
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
|
||||
|
||||
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
|
||||
|
|
@ -2220,9 +2225,15 @@ class GatewayRunner:
|
|||
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
|
||||
return "Not in a voice channel."
|
||||
|
||||
await adapter.leave_voice_channel(guild_id)
|
||||
try:
|
||||
await adapter.leave_voice_channel(guild_id)
|
||||
except Exception as e:
|
||||
logger.warning("Error leaving voice channel: %s", e)
|
||||
# Always clean up state even if leave raised an exception
|
||||
self._voice_mode.pop(event.source.chat_id, None)
|
||||
self._save_voice_modes()
|
||||
if hasattr(adapter, "_voice_input_callback"):
|
||||
adapter._voice_input_callback = None
|
||||
return "Left voice channel."
|
||||
|
||||
async def _handle_voice_channel_input(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue