fix: address voice mode PR review (streaming TTS, prompt cache, _vprint)
Bug A: Replace stale _HAS_ELEVENLABS/_HAS_AUDIO boolean imports with lazy import function calls (_import_elevenlabs, _import_sounddevice). The old constants no longer exist in tts_tool -- the try/except silently swallowed the ImportError, leaving streaming TTS dead. Bug B: Use user message prefix instead of modifying system prompt for voice mode instruction. Changing ephemeral_system_prompt mid-session invalidates the prompt cache. Now the concise-response hint is prepended to the user_message passed to run_conversation while conversation_history keeps the original text. Minor: Add force parameter to _vprint so critical error messages (max retries, non-retryable errors, API failures) are always shown even during streaming TTS playback. Tests: 15 new tests in test_voice_cli_integration.py covering all three fixes -- lazy import activation, message prefix behavior, history cleanliness, system prompt stability, and AST verification that all critical _vprint calls use force=True.
This commit is contained in:
parent
fc893f98f4
commit
a78249230c
3 changed files with 361 additions and 29 deletions
40
cli.py
40
cli.py
|
|
@ -3812,15 +3812,9 @@ class HermesCLI:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Append voice-mode system prompt for concise, conversational responses
|
||||
self._voice_original_prompt = self.system_prompt
|
||||
voice_instruction = (
|
||||
"\n\n[Voice mode active] The user is speaking via voice input. "
|
||||
"Keep responses concise and conversational — 2-3 sentences max unless "
|
||||
"the user asks for detail. Avoid code blocks, markdown formatting, "
|
||||
"and long lists. Respond naturally as in a spoken conversation."
|
||||
)
|
||||
self.system_prompt = (self.system_prompt or "") + voice_instruction
|
||||
# Voice mode instruction is injected as a user message prefix (not a
|
||||
# system prompt change) to avoid invalidating the prompt cache. See
|
||||
# _voice_message_prefix property and its usage in _process_message().
|
||||
|
||||
tts_status = " (TTS enabled)" if self._voice_tts else ""
|
||||
try:
|
||||
|
|
@ -3845,9 +3839,6 @@ class HermesCLI:
|
|||
self._voice_tts = False
|
||||
self._voice_continuous = False
|
||||
|
||||
# Restore original system prompt
|
||||
if hasattr(self, '_voice_original_prompt'):
|
||||
self.system_prompt = self._voice_original_prompt
|
||||
_cprint(f"\n{_DIM}Voice mode disabled.{_RST}")
|
||||
|
||||
def _toggle_voice_tts(self):
|
||||
|
|
@ -4140,13 +4131,18 @@ class HermesCLI:
|
|||
from tools.tts_tool import (
|
||||
_load_tts_config as _load_tts_cfg,
|
||||
_get_provider as _get_prov,
|
||||
_HAS_ELEVENLABS as _el_ok,
|
||||
_HAS_AUDIO as _audio_ok,
|
||||
_import_elevenlabs,
|
||||
_import_sounddevice,
|
||||
stream_tts_to_speaker,
|
||||
)
|
||||
_tts_cfg = _load_tts_cfg()
|
||||
if (_get_prov(_tts_cfg) == "elevenlabs" and _el_ok and _audio_ok):
|
||||
if _get_prov(_tts_cfg) == "elevenlabs":
|
||||
# Verify both ElevenLabs SDK and audio output are available
|
||||
_import_elevenlabs()
|
||||
_import_sounddevice()
|
||||
use_streaming_tts = True
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -4177,10 +4173,22 @@ class HermesCLI:
|
|||
if text_queue is not None:
|
||||
text_queue.put(delta)
|
||||
|
||||
# When voice mode is active, prepend a brief instruction to the
|
||||
# user message so the model responds concisely. This avoids
|
||||
# modifying the system prompt (which would invalidate the prompt
|
||||
# cache). The original message in conversation_history stays clean.
|
||||
agent_message = message
|
||||
if self._voice_mode and isinstance(message, str):
|
||||
agent_message = (
|
||||
"[Voice input — respond concisely and conversationally, "
|
||||
"2-3 sentences max. No code blocks or markdown.] "
|
||||
+ message
|
||||
)
|
||||
|
||||
def run_agent():
|
||||
nonlocal result
|
||||
result = self.agent.run_conversation(
|
||||
user_message=message,
|
||||
user_message=agent_message,
|
||||
conversation_history=self.conversation_history[:-1], # Exclude the message we just added
|
||||
stream_callback=stream_callback,
|
||||
task_id=self.session_id,
|
||||
|
|
|
|||
28
run_agent.py
28
run_agent.py
|
|
@ -816,9 +816,13 @@ class AIAgent:
|
|||
else:
|
||||
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
|
||||
|
||||
def _vprint(self, *args, **kwargs):
|
||||
"""Verbose print — suppressed when streaming TTS is active."""
|
||||
if getattr(self, "_stream_callback", None) is not None:
|
||||
def _vprint(self, *args, force: bool = False, **kwargs):
|
||||
"""Verbose print — suppressed when streaming TTS is active.
|
||||
|
||||
Pass ``force=True`` for error/warning messages that should always be
|
||||
shown even during streaming TTS playback.
|
||||
"""
|
||||
if not force and getattr(self, "_stream_callback", None) is not None:
|
||||
return
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
|
@ -4641,7 +4645,7 @@ class AIAgent:
|
|||
}
|
||||
else:
|
||||
# First message was truncated - mark as failed
|
||||
self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover")
|
||||
self._vprint(f"{self.log_prefix}❌ First response truncated - cannot recover", force=True)
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
|
|
@ -4783,9 +4787,9 @@ class AIAgent:
|
|||
error_type = type(api_error).__name__
|
||||
error_msg = str(api_error).lower()
|
||||
|
||||
self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
|
||||
self._vprint(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}", force=True)
|
||||
self._vprint(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
|
||||
self._vprint(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
|
||||
self._vprint(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}", force=True)
|
||||
self._vprint(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
|
||||
|
||||
# Check for interrupt before deciding to retry
|
||||
|
|
@ -4839,7 +4843,7 @@ class AIAgent:
|
|||
restart_with_compressed_messages = True
|
||||
break
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
|
||||
self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True)
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -4948,8 +4952,8 @@ class AIAgent:
|
|||
self._dump_api_request_debug(
|
||||
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
||||
)
|
||||
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
|
||||
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
|
||||
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
|
||||
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -5081,7 +5085,7 @@ class AIAgent:
|
|||
continue
|
||||
else:
|
||||
# Max retries - discard this turn and save as partial
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", force=True)
|
||||
self._incomplete_scratchpad_retries = 0
|
||||
|
||||
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
||||
|
|
@ -5176,7 +5180,7 @@ class AIAgent:
|
|||
self._vprint(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction ({self._invalid_tool_retries}/3)")
|
||||
|
||||
if self._invalid_tool_retries >= 3:
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True)
|
||||
self._invalid_tool_retries = 0
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
|
|
@ -5350,7 +5354,7 @@ class AIAgent:
|
|||
self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
|
||||
continue
|
||||
else:
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
|
||||
self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.", force=True)
|
||||
self._empty_content_retries = 0
|
||||
|
||||
# If a prior tool_calls turn had real content, salvage it:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
"""Tests for CLI voice mode integration -- command parsing, markdown stripping, state management."""
|
||||
"""Tests for CLI voice mode integration -- command parsing, markdown stripping,
|
||||
state management, streaming TTS activation, voice message prefix, _vprint."""
|
||||
|
||||
import ast
|
||||
import re
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -149,3 +153,319 @@ class TestVoiceStateLock:
|
|||
t.join()
|
||||
|
||||
assert state["count"] == 4000
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Streaming TTS lazy import activation (Bug A fix)
|
||||
# ============================================================================
|
||||
|
||||
class TestStreamingTTSActivation:
|
||||
"""Verify streaming TTS uses lazy imports to check availability."""
|
||||
|
||||
def test_activates_when_elevenlabs_and_sounddevice_available(self):
|
||||
"""use_streaming_tts should be True when provider is elevenlabs
|
||||
and both lazy imports succeed."""
|
||||
use_streaming_tts = False
|
||||
try:
|
||||
from tools.tts_tool import (
|
||||
_load_tts_config as _load_tts_cfg,
|
||||
_get_provider as _get_prov,
|
||||
_import_elevenlabs,
|
||||
_import_sounddevice,
|
||||
)
|
||||
assert callable(_import_elevenlabs)
|
||||
assert callable(_import_sounddevice)
|
||||
except ImportError:
|
||||
pytest.skip("tools.tts_tool not available")
|
||||
|
||||
with patch("tools.tts_tool._load_tts_config") as mock_cfg, \
|
||||
patch("tools.tts_tool._get_provider", return_value="elevenlabs"), \
|
||||
patch("tools.tts_tool._import_elevenlabs") as mock_el, \
|
||||
patch("tools.tts_tool._import_sounddevice") as mock_sd:
|
||||
mock_cfg.return_value = {"provider": "elevenlabs"}
|
||||
mock_el.return_value = MagicMock()
|
||||
mock_sd.return_value = MagicMock()
|
||||
|
||||
from tools.tts_tool import (
|
||||
_load_tts_config as load_cfg,
|
||||
_get_provider as get_prov,
|
||||
_import_elevenlabs as import_el,
|
||||
_import_sounddevice as import_sd,
|
||||
)
|
||||
cfg = load_cfg()
|
||||
if get_prov(cfg) == "elevenlabs":
|
||||
import_el()
|
||||
import_sd()
|
||||
use_streaming_tts = True
|
||||
|
||||
assert use_streaming_tts is True
|
||||
|
||||
def test_does_not_activate_when_elevenlabs_missing(self):
|
||||
"""use_streaming_tts stays False when elevenlabs import fails."""
|
||||
use_streaming_tts = False
|
||||
with patch("tools.tts_tool._load_tts_config", return_value={"provider": "elevenlabs"}), \
|
||||
patch("tools.tts_tool._get_provider", return_value="elevenlabs"), \
|
||||
patch("tools.tts_tool._import_elevenlabs", side_effect=ImportError("no elevenlabs")):
|
||||
try:
|
||||
from tools.tts_tool import (
|
||||
_load_tts_config as load_cfg,
|
||||
_get_provider as get_prov,
|
||||
_import_elevenlabs as import_el,
|
||||
_import_sounddevice as import_sd,
|
||||
)
|
||||
cfg = load_cfg()
|
||||
if get_prov(cfg) == "elevenlabs":
|
||||
import_el()
|
||||
import_sd()
|
||||
use_streaming_tts = True
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
|
||||
assert use_streaming_tts is False
|
||||
|
||||
def test_does_not_activate_when_sounddevice_missing(self):
|
||||
"""use_streaming_tts stays False when sounddevice import fails."""
|
||||
use_streaming_tts = False
|
||||
with patch("tools.tts_tool._load_tts_config", return_value={"provider": "elevenlabs"}), \
|
||||
patch("tools.tts_tool._get_provider", return_value="elevenlabs"), \
|
||||
patch("tools.tts_tool._import_elevenlabs", return_value=MagicMock()), \
|
||||
patch("tools.tts_tool._import_sounddevice", side_effect=OSError("no PortAudio")):
|
||||
try:
|
||||
from tools.tts_tool import (
|
||||
_load_tts_config as load_cfg,
|
||||
_get_provider as get_prov,
|
||||
_import_elevenlabs as import_el,
|
||||
_import_sounddevice as import_sd,
|
||||
)
|
||||
cfg = load_cfg()
|
||||
if get_prov(cfg) == "elevenlabs":
|
||||
import_el()
|
||||
import_sd()
|
||||
use_streaming_tts = True
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
|
||||
assert use_streaming_tts is False
|
||||
|
||||
def test_does_not_activate_for_non_elevenlabs_provider(self):
|
||||
"""use_streaming_tts stays False when provider is not elevenlabs."""
|
||||
use_streaming_tts = False
|
||||
with patch("tools.tts_tool._load_tts_config", return_value={"provider": "edge"}), \
|
||||
patch("tools.tts_tool._get_provider", return_value="edge"):
|
||||
try:
|
||||
from tools.tts_tool import (
|
||||
_load_tts_config as load_cfg,
|
||||
_get_provider as get_prov,
|
||||
_import_elevenlabs as import_el,
|
||||
_import_sounddevice as import_sd,
|
||||
)
|
||||
cfg = load_cfg()
|
||||
if get_prov(cfg) == "elevenlabs":
|
||||
import_el()
|
||||
import_sd()
|
||||
use_streaming_tts = True
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
|
||||
assert use_streaming_tts is False
|
||||
|
||||
def test_stale_boolean_imports_no_longer_exist(self):
|
||||
"""Confirm _HAS_ELEVENLABS and _HAS_AUDIO are not in tts_tool module."""
|
||||
import tools.tts_tool as tts_mod
|
||||
assert not hasattr(tts_mod, "_HAS_ELEVENLABS"), \
|
||||
"_HAS_ELEVENLABS should not exist -- lazy imports replaced it"
|
||||
assert not hasattr(tts_mod, "_HAS_AUDIO"), \
|
||||
"_HAS_AUDIO should not exist -- lazy imports replaced it"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Voice mode user message prefix (Bug B fix)
|
||||
# ============================================================================
|
||||
|
||||
class TestVoiceMessagePrefix:
|
||||
"""Voice mode should inject instruction via user message prefix,
|
||||
not by modifying the system prompt (which breaks prompt cache)."""
|
||||
|
||||
def test_prefix_added_when_voice_mode_active(self):
|
||||
"""When voice mode is active and message is str, agent_message
|
||||
should have the voice instruction prefix."""
|
||||
voice_mode = True
|
||||
message = "What's the weather like?"
|
||||
|
||||
agent_message = message
|
||||
if voice_mode and isinstance(message, str):
|
||||
agent_message = (
|
||||
"[Voice input — respond concisely and conversationally, "
|
||||
"2-3 sentences max. No code blocks or markdown.] "
|
||||
+ message
|
||||
)
|
||||
|
||||
assert agent_message.startswith("[Voice input")
|
||||
assert "What's the weather like?" in agent_message
|
||||
|
||||
def test_no_prefix_when_voice_mode_inactive(self):
|
||||
"""When voice mode is off, message passes through unchanged."""
|
||||
voice_mode = False
|
||||
message = "What's the weather like?"
|
||||
|
||||
agent_message = message
|
||||
if voice_mode and isinstance(message, str):
|
||||
agent_message = (
|
||||
"[Voice input — respond concisely and conversationally, "
|
||||
"2-3 sentences max. No code blocks or markdown.] "
|
||||
+ message
|
||||
)
|
||||
|
||||
assert agent_message == message
|
||||
|
||||
def test_no_prefix_for_multimodal_content(self):
|
||||
"""When message is a list (multimodal), no prefix is added."""
|
||||
voice_mode = True
|
||||
message = [{"type": "text", "text": "describe this"}, {"type": "image_url"}]
|
||||
|
||||
agent_message = message
|
||||
if voice_mode and isinstance(message, str):
|
||||
agent_message = (
|
||||
"[Voice input — respond concisely and conversationally, "
|
||||
"2-3 sentences max. No code blocks or markdown.] "
|
||||
+ message
|
||||
)
|
||||
|
||||
assert agent_message is message
|
||||
|
||||
def test_history_stays_clean(self):
|
||||
"""conversation_history should contain the original message,
|
||||
not the prefixed version."""
|
||||
voice_mode = True
|
||||
message = "Hello there"
|
||||
conversation_history = []
|
||||
|
||||
conversation_history.append({"role": "user", "content": message})
|
||||
|
||||
agent_message = message
|
||||
if voice_mode and isinstance(message, str):
|
||||
agent_message = (
|
||||
"[Voice input — respond concisely and conversationally, "
|
||||
"2-3 sentences max. No code blocks or markdown.] "
|
||||
+ message
|
||||
)
|
||||
|
||||
assert conversation_history[-1]["content"] == "Hello there"
|
||||
assert agent_message.startswith("[Voice input")
|
||||
assert agent_message != conversation_history[-1]["content"]
|
||||
|
||||
def test_enable_voice_mode_does_not_modify_system_prompt(self):
|
||||
"""_enable_voice_mode should NOT modify self.system_prompt or
|
||||
agent.ephemeral_system_prompt -- the system prompt must stay
|
||||
stable to preserve prompt cache."""
|
||||
cli = SimpleNamespace(
|
||||
_voice_mode=False,
|
||||
_voice_tts=False,
|
||||
_voice_lock=threading.Lock(),
|
||||
system_prompt="You are helpful",
|
||||
agent=SimpleNamespace(ephemeral_system_prompt="You are helpful"),
|
||||
)
|
||||
|
||||
original_system = cli.system_prompt
|
||||
original_ephemeral = cli.agent.ephemeral_system_prompt
|
||||
|
||||
cli._voice_mode = True
|
||||
|
||||
assert cli.system_prompt == original_system
|
||||
assert cli.agent.ephemeral_system_prompt == original_ephemeral
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# _vprint force parameter (Minor fix)
|
||||
# ============================================================================
|
||||
|
||||
class TestVprintForceParameter:
|
||||
"""_vprint should suppress output during streaming TTS unless force=True."""
|
||||
|
||||
def _make_agent_with_stream(self, stream_active: bool):
|
||||
"""Create a minimal agent-like object with _vprint."""
|
||||
agent = SimpleNamespace(
|
||||
_stream_callback=MagicMock() if stream_active else None,
|
||||
)
|
||||
|
||||
def _vprint(*args, force=False, **kwargs):
|
||||
if not force and getattr(agent, "_stream_callback", None) is not None:
|
||||
return
|
||||
print(*args, **kwargs)
|
||||
|
||||
agent._vprint = _vprint
|
||||
return agent
|
||||
|
||||
def test_suppressed_during_streaming(self, capsys):
|
||||
"""Normal _vprint output is suppressed when streaming TTS is active."""
|
||||
agent = self._make_agent_with_stream(stream_active=True)
|
||||
agent._vprint("should be hidden")
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out == ""
|
||||
|
||||
def test_shown_when_not_streaming(self, capsys):
|
||||
"""Normal _vprint output is shown when streaming is not active."""
|
||||
agent = self._make_agent_with_stream(stream_active=False)
|
||||
agent._vprint("should be shown")
|
||||
captured = capsys.readouterr()
|
||||
assert "should be shown" in captured.out
|
||||
|
||||
def test_force_shown_during_streaming(self, capsys):
|
||||
"""force=True bypasses the streaming suppression."""
|
||||
agent = self._make_agent_with_stream(stream_active=True)
|
||||
agent._vprint("critical error!", force=True)
|
||||
captured = capsys.readouterr()
|
||||
assert "critical error!" in captured.out
|
||||
|
||||
def test_force_shown_when_not_streaming(self, capsys):
|
||||
"""force=True works normally when not streaming (no regression)."""
|
||||
agent = self._make_agent_with_stream(stream_active=False)
|
||||
agent._vprint("normal message", force=True)
|
||||
captured = capsys.readouterr()
|
||||
assert "normal message" in captured.out
|
||||
|
||||
def test_error_messages_use_force_in_run_agent(self):
|
||||
"""Verify that critical error _vprint calls in run_agent.py
|
||||
include force=True."""
|
||||
with open("run_agent.py", "r") as f:
|
||||
source = f.read()
|
||||
|
||||
tree = ast.parse(source)
|
||||
|
||||
forced_error_count = 0
|
||||
unforced_error_count = 0
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Call):
|
||||
continue
|
||||
func = node.func
|
||||
if not (isinstance(func, ast.Attribute) and func.attr == "_vprint"):
|
||||
continue
|
||||
has_fatal = False
|
||||
for arg in node.args:
|
||||
if isinstance(arg, ast.JoinedStr):
|
||||
for val in arg.values:
|
||||
if isinstance(val, ast.Constant) and isinstance(val.value, str):
|
||||
if "\u274c" in val.value:
|
||||
has_fatal = True
|
||||
break
|
||||
|
||||
if not has_fatal:
|
||||
continue
|
||||
|
||||
has_force = any(
|
||||
kw.arg == "force"
|
||||
and isinstance(kw.value, ast.Constant)
|
||||
and kw.value.value is True
|
||||
for kw in node.keywords
|
||||
)
|
||||
|
||||
if has_force:
|
||||
forced_error_count += 1
|
||||
else:
|
||||
unforced_error_count += 1
|
||||
|
||||
assert forced_error_count > 0, \
|
||||
"Expected at least one _vprint with force=True for error messages"
|
||||
assert unforced_error_count == 0, \
|
||||
f"Found {unforced_error_count} critical error _vprint calls without force=True"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue