fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
This commit is contained in:
Teknium 2026-03-21 08:08:37 -07:00 committed by GitHub
parent 06f4df52f1
commit 28bb0e770f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 60 additions and 12 deletions

View file

@ -2467,7 +2467,8 @@ class TestVoiceTTSPlayback:
runner.adapters = {}
return runner
def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", agent_msgs=None):
def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello",
agent_msgs=None, already_sent=False):
from gateway.platforms.base import MessageType, MessageEvent, SessionSource
from gateway.config import Platform
runner._voice_mode["ch1"] = voice_mode
@ -2476,28 +2477,32 @@ class TestVoiceTTSPlayback:
user_id="1", user_name="test", chat_type="channel",
)
event = MessageEvent(source=source, text="test", message_type=msg_type)
return runner._should_send_voice_reply(event, response, agent_msgs or [])
return runner._should_send_voice_reply(
event, response, agent_msgs or [], already_sent=already_sent,
)
# -- Streaming OFF (existing behavior, must not change) --
def test_voice_input_runner_skips(self):
"""Voice input: runner skips — base adapter handles via play_tts."""
"""Streaming OFF + voice input: runner skips — base adapter handles."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE) is False
assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=False) is False
def test_text_input_voice_all_runner_fires(self):
"""Text input + voice_mode=all: runner generates TTS."""
"""Streaming OFF + text input + voice_mode=all: runner generates TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.TEXT) is True
assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=False) is True
def test_text_input_voice_off_no_tts(self):
"""Text input + voice_mode=off: no TTS."""
"""Streaming OFF + text input + voice_mode=off: no TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "off", MessageType.TEXT) is False
def test_text_input_voice_only_no_tts(self):
"""Text input + voice_mode=voice_only: no TTS for text."""
"""Streaming OFF + text input + voice_mode=voice_only: no TTS for text."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "voice_only", MessageType.TEXT) is False
@ -2523,6 +2528,43 @@ class TestVoiceTTSPlayback:
]}]
assert self._call_should_reply(runner, "all", MessageType.TEXT, agent_msgs=agent_msgs) is False
# -- Streaming ON (already_sent=True) --
def test_streaming_on_voice_input_runner_fires(self):
"""Streaming ON + voice input: runner handles TTS (base adapter has no text)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=True) is True
def test_streaming_on_text_input_runner_fires(self):
"""Streaming ON + text input: runner handles TTS (same as before)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=True) is True
def test_streaming_on_voice_off_no_tts(self):
"""Streaming ON + voice_mode=off: no TTS regardless of streaming."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "off", MessageType.VOICE, already_sent=True) is False
def test_streaming_on_empty_response_no_tts(self):
"""Streaming ON + empty response: no TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE, response="", already_sent=True) is False
def test_streaming_on_agent_tts_dedup(self):
"""Streaming ON + agent called TTS: runner skips (dedup still works)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
agent_msgs = [{"role": "assistant", "tool_calls": [
{"id": "1", "type": "function", "function": {"name": "text_to_speech", "arguments": "{}"}}
]}]
assert self._call_should_reply(
runner, "all", MessageType.VOICE, agent_msgs=agent_msgs, already_sent=True,
) is False
class TestUDPKeepalive:
"""UDP keepalive prevents Discord from dropping the voice session."""