fix: voice pipeline thread safety and error handling bugs
- Add lock protection around VoiceReceiver buffer writes in _on_packet to prevent race condition with check_silence on different threads - Wire _voice_input_callback BEFORE join_voice_channel to avoid losing voice input during the join window - Add try/except around leave_voice_channel to ensure state cleanup (voice_mode, callback) even if leave raises an exception - Guard against empty text after markdown stripping in base.py auto-TTS - Add 11 tests proving each bug and verifying the fix
This commit is contained in:
parent
34c324ff59
commit
c925d2ee76
4 changed files with 275 additions and 23 deletions
|
|
@ -739,7 +739,9 @@ class BasePlatformAdapter(ABC):
|
|||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
import json as _json
|
||||
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000]
|
||||
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
|
||||
if not speech_text:
|
||||
raise ValueError("Empty text after markdown cleanup")
|
||||
tts_result_str = await asyncio.to_thread(
|
||||
text_to_speech_tool, text=speech_text
|
||||
)
|
||||
|
|
|
|||
|
|
@ -289,8 +289,9 @@ class VoiceReceiver:
|
|||
if ssrc not in self._decoders:
|
||||
self._decoders[ssrc] = discord.opus.Decoder()
|
||||
pcm = self._decoders[ssrc].decode(decrypted)
|
||||
self._buffers[ssrc].extend(pcm)
|
||||
self._last_packet_time[ssrc] = time.monotonic()
|
||||
with self._lock:
|
||||
self._buffers[ssrc].extend(pcm)
|
||||
self._last_packet_time[ssrc] = time.monotonic()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
|
@ -305,24 +306,25 @@ class VoiceReceiver:
|
|||
|
||||
with self._lock:
|
||||
ssrc_user_map = dict(self._ssrc_to_user)
|
||||
ssrc_list = list(self._buffers.keys())
|
||||
|
||||
for ssrc in list(self._buffers.keys()):
|
||||
last_time = self._last_packet_time.get(ssrc, now)
|
||||
silence_duration = now - last_time
|
||||
buf = self._buffers[ssrc]
|
||||
# 48kHz, 16-bit, stereo = 192000 bytes/sec
|
||||
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
|
||||
for ssrc in ssrc_list:
|
||||
last_time = self._last_packet_time.get(ssrc, now)
|
||||
silence_duration = now - last_time
|
||||
buf = self._buffers[ssrc]
|
||||
# 48kHz, 16-bit, stereo = 192000 bytes/sec
|
||||
buf_duration = len(buf) / (self.SAMPLE_RATE * self.CHANNELS * 2)
|
||||
|
||||
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
|
||||
user_id = ssrc_user_map.get(ssrc, 0)
|
||||
if user_id:
|
||||
completed.append((user_id, bytes(buf)))
|
||||
self._buffers[ssrc] = bytearray()
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
|
||||
# Stale buffer with no valid user — discard
|
||||
self._buffers.pop(ssrc, None)
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
|
||||
user_id = ssrc_user_map.get(ssrc, 0)
|
||||
if user_id:
|
||||
completed.append((user_id, bytes(buf)))
|
||||
self._buffers[ssrc] = bytearray()
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
elif silence_duration >= self.SILENCE_THRESHOLD * 2:
|
||||
# Stale buffer with no valid user — discard
|
||||
self._buffers.pop(ssrc, None)
|
||||
self._last_packet_time.pop(ssrc, None)
|
||||
|
||||
return completed
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue