fix: 8 voice pipeline bugs with tests proving each fix

1. VoiceReceiver.stop() now acquires _lock before clearing shared state
   to prevent race with _on_packet on the socket reader thread
2. _packet_debug_count moved from class-level to instance-level to avoid
   cross-instance race condition in multi-guild setups
3. play_in_voice_channel uses asyncio.get_running_loop() instead of
   deprecated asyncio.get_event_loop()
4. _send_voice_reply uses uuid for filenames instead of time-based names
   that can collide when two replies happen in the same second
5. Voice timeout now notifies runner via _on_voice_disconnect callback
   so runner cleans up _voice_mode state (prevents orphaned TTS replies)
6. play_in_voice_channel adds PLAYBACK_TIMEOUT (120s) to prevent
   infinite blocking when FFmpeg callback is never called
7. _send_voice_reply moves temp file cleanup to finally block so files
   are always cleaned up even when send_voice/play raises
8. Base adapter auto-TTS wraps play_tts in try/finally with os.remove
   to clean up generated audio files after playback

18 new tests (120 total voice tests)
This commit is contained in:
0xbyt4 2026-03-11 23:57:42 +03:00
parent c925d2ee76
commit 9722bd8be0
4 changed files with 517 additions and 26 deletions

View file

@ -2190,10 +2190,12 @@ class GatewayRunner:
if not voice_channel:
return "You need to be in a voice channel first."
# Wire callback BEFORE join so voice input arriving immediately
# Wire callbacks BEFORE join so voice input arriving immediately
# after connection is not lost.
if hasattr(adapter, "_voice_input_callback"):
adapter._voice_input_callback = self._handle_voice_channel_input
if hasattr(adapter, "_on_voice_disconnect"):
adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
try:
success = await adapter.join_voice_channel(voice_channel)
@ -2236,6 +2238,14 @@ class GatewayRunner:
adapter._voice_input_callback = None
return "Left voice channel."
def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
"""Called by the adapter when a voice channel times out.
Cleans up runner-side voice_mode state that the adapter cannot reach.
"""
self._voice_mode.pop(chat_id, None)
self._save_voice_modes()
async def _handle_voice_channel_input(
self, guild_id: int, user_id: int, transcript: str
):
@ -2339,6 +2349,9 @@ class GatewayRunner:
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
"""Generate TTS audio and send as a voice message before the text reply."""
import uuid as _uuid
audio_path = None
actual_path = None
try:
from tools.tts_tool import text_to_speech_tool, _strip_markdown_for_tts
@ -2350,7 +2363,7 @@ class GatewayRunner:
# The TTS tool may convert to .ogg — use file_path from result.
audio_path = os.path.join(
tempfile.gettempdir(), "hermes_voice",
f"tts_reply_{int(time.time())}_{id(event) % 10000}.mp3",
f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
)
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
@ -2387,13 +2400,14 @@ class GatewayRunner:
if "metadata" not in sig.parameters:
send_kwargs.pop("metadata", None)
await adapter.send_voice(**send_kwargs)
for p in {audio_path, actual_path}:
except Exception as e:
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
finally:
for p in {audio_path, actual_path} - {None}:
try:
os.unlink(p)
except OSError:
pass
except Exception as e:
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
async def _handle_rollback_command(self, event: MessageEvent) -> str:
"""Handle /rollback command — list or restore filesystem checkpoints."""