feat: add Phase 4 low-latency features for voice mode
- Audio cues: beep on record start (880Hz), double beep on stop (660Hz) - Silence detection: auto-stop recording after 3s of silence (RMS-based) - Continuous mode: auto-restart recording after agent responds - Ctrl+R starts continuous mode, Ctrl+R during recording exits it - Waits for TTS to finish before restarting to avoid recording speaker - Tests: 7 new tests for beep generation and silence detection
This commit is contained in:
parent
a69bd55b5a
commit
bfd9c97705
3 changed files with 283 additions and 6 deletions
56
cli.py
56
cli.py
|
|
@ -3539,10 +3539,27 @@ class HermesCLI:
|
|||
if self._voice_recorder is None:
|
||||
self._voice_recorder = AudioRecorder()
|
||||
|
||||
self._voice_recorder.start()
|
||||
def _on_silence():
|
||||
"""Called by AudioRecorder when silence is detected after speech."""
|
||||
with self._voice_lock:
|
||||
if not self._voice_recording:
|
||||
return
|
||||
_cprint(f"\n{_DIM}Silence detected, auto-stopping...{_RST}")
|
||||
if hasattr(self, '_app') and self._app:
|
||||
self._app.invalidate()
|
||||
self._voice_stop_and_transcribe()
|
||||
|
||||
self._voice_recorder.start(on_silence_stop=_on_silence)
|
||||
with self._voice_lock:
|
||||
self._voice_recording = True
|
||||
_cprint(f"\n{_GOLD}● Recording...{_RST} {_DIM}(Ctrl+R to stop, Ctrl+C to cancel){_RST}")
|
||||
|
||||
# Audio cue: single beep on recording start
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
threading.Thread(target=play_beep, kwargs={"frequency": 880, "count": 1}, daemon=True).start()
|
||||
except Exception:
|
||||
pass
|
||||
_cprint(f"\n{_GOLD}● Recording...{_RST} {_DIM}(auto-stops on silence | Ctrl+R to stop & exit continuous){_RST}")
|
||||
|
||||
def _voice_stop_and_transcribe(self):
|
||||
"""Stop recording, transcribe via STT, and queue the transcript as input."""
|
||||
|
|
@ -3554,6 +3571,13 @@ class HermesCLI:
|
|||
with self._voice_lock:
|
||||
self._voice_recording = False
|
||||
|
||||
# Audio cue: double beep on recording stop
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
threading.Thread(target=play_beep, kwargs={"frequency": 660, "count": 2}, daemon=True).start()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if wav_path is None:
|
||||
_cprint(f"{_DIM}No speech detected (recording too short).{_RST}")
|
||||
return
|
||||
|
|
@ -3603,6 +3627,7 @@ class HermesCLI:
|
|||
"""Speak the agent's response aloud using TTS (runs in background thread)."""
|
||||
if not self._voice_tts:
|
||||
return
|
||||
self._voice_tts_done.clear()
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
from tools.voice_mode import play_audio_file
|
||||
|
|
@ -3649,6 +3674,8 @@ class HermesCLI:
|
|||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_cprint(f"{_DIM}TTS playback failed: {e}{_RST}")
|
||||
finally:
|
||||
self._voice_tts_done.set()
|
||||
|
||||
def _handle_voice_command(self, command: str):
|
||||
"""Handle /voice [on|off|tts|status] command."""
|
||||
|
|
@ -3714,6 +3741,7 @@ class HermesCLI:
|
|||
self._voice_recording = False
|
||||
self._voice_mode = False
|
||||
self._voice_tts = False
|
||||
self._voice_continuous = False
|
||||
_cprint(f"\n{_DIM}Voice mode disabled.{_RST}")
|
||||
|
||||
def _toggle_voice_tts(self):
|
||||
|
|
@ -4331,6 +4359,9 @@ class HermesCLI:
|
|||
self._voice_recorder = None # AudioRecorder instance (lazy init)
|
||||
self._voice_recording = False # Whether currently recording
|
||||
self._voice_processing = False # Whether STT is in progress
|
||||
self._voice_continuous = False # Whether to auto-restart after agent responds
|
||||
self._voice_tts_done = threading.Event() # Signals TTS playback finished
|
||||
self._voice_tts_done.set() # Initially "done" (no TTS pending)
|
||||
|
||||
# Register callbacks so terminal_tool prompts route through our UI
|
||||
set_sudo_password_callback(self._sudo_password_callback)
|
||||
|
|
@ -4650,7 +4681,10 @@ class HermesCLI:
|
|||
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state:
|
||||
return
|
||||
if cli_ref._voice_recording:
|
||||
cli_ref._voice_recording = False
|
||||
# Manual stop via Ctrl+R: stop continuous mode
|
||||
with cli_ref._voice_lock:
|
||||
cli_ref._voice_continuous = False
|
||||
cli_ref._voice_recording = False
|
||||
event.app.invalidate()
|
||||
threading.Thread(
|
||||
target=cli_ref._voice_stop_and_transcribe,
|
||||
|
|
@ -4658,6 +4692,8 @@ class HermesCLI:
|
|||
).start()
|
||||
else:
|
||||
try:
|
||||
with cli_ref._voice_lock:
|
||||
cli_ref._voice_continuous = True
|
||||
cli_ref._voice_start_recording()
|
||||
event.app.invalidate()
|
||||
except Exception as e:
|
||||
|
|
@ -5267,13 +5303,25 @@ class HermesCLI:
|
|||
# Regular chat - run agent
|
||||
self._agent_running = True
|
||||
app.invalidate() # Refresh status line
|
||||
|
||||
|
||||
try:
|
||||
self.chat(user_input, images=submit_images or None)
|
||||
finally:
|
||||
self._agent_running = False
|
||||
self._spinner_text = ""
|
||||
app.invalidate() # Refresh status line
|
||||
|
||||
# Continuous voice: auto-restart recording after agent responds
|
||||
if self._voice_mode and self._voice_continuous and not self._voice_recording:
|
||||
try:
|
||||
# Wait for TTS to finish so we don't record the speaker
|
||||
if self._voice_tts:
|
||||
self._voice_tts_done.wait(timeout=60)
|
||||
time.sleep(0.3) # Brief pause after TTS ends
|
||||
self._voice_start_recording()
|
||||
app.invalidate()
|
||||
except Exception as e:
|
||||
_cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue