fix: make STT config env-overridable and fix doc issues

Code fixes:
- STT model, Groq base URL, and OpenAI STT base URL are now
  configurable via env vars (STT_GROQ_MODEL, STT_OPENAI_MODEL,
  GROQ_BASE_URL, STT_OPENAI_BASE_URL) instead of hardcoded
- Gateway and Discord VC now read stt.model from config.yaml
  (previously only CLI did this — gateway always used defaults)

Doc fixes:
- voice-mode.md: move Web UI troubleshooting to web.md (was duplicated)
- voice-mode.md: simplify "How It Works" for end users (remove NaCl,
  DAVE, RTP internals)
- voice-mode.md: clarify STT priority (OpenAI used first if both keys
  set, Groq recommended for free tier)
- voice-mode.md: document new STT env overrides in config reference
- web.md: remove duplicate Quick Start / Step 1-3 sections
- web.md: add mobile HTTPS mic workarounds (moved from voice-mode.md)
- web.md: clarify STT fallback order
This commit is contained in:
0xbyt4 2026-03-12 00:15:38 +03:00
parent 79ed0effdd
commit 238a431545
5 changed files with 78 additions and 118 deletions

View file

@ -881,7 +881,18 @@ class DiscordAdapter(BasePlatformAdapter):
await asyncio.to_thread(VoiceReceiver.pcm_to_wav, pcm_data, wav_path)
from tools.transcription_tools import transcribe_audio
result = await asyncio.to_thread(transcribe_audio, wav_path)
# Read STT model from config.yaml
stt_model = None
try:
import yaml as _y
from pathlib import Path as _P
_cfg = _P(os.getenv("HERMES_HOME", _P.home() / ".hermes")) / "config.yaml"
if _cfg.exists():
with open(_cfg) as _f:
stt_model = (_y.safe_load(_f) or {}).get("stt", {}).get("model")
except Exception:
pass
result = await asyncio.to_thread(transcribe_audio, wav_path, model=stt_model)
if not result.get("success"):
return

View file

@ -3326,11 +3326,23 @@ class GatewayRunner:
from tools.transcription_tools import transcribe_audio
import asyncio
# Read STT model from config.yaml (same key the CLI uses)
stt_model = None
try:
import yaml as _y
_cfg = _hermes_home / "config.yaml"
if _cfg.exists():
with open(_cfg) as _f:
_data = _y.safe_load(_f) or {}
stt_model = _data.get("stt", {}).get("model")
except Exception:
pass
enriched_parts = []
for path in audio_paths:
try:
logger.debug("Transcribing user voice: %s", path)
result = await asyncio.to_thread(transcribe_audio, path)
result = await asyncio.to_thread(transcribe_audio, path, model=stt_model)
if result["success"]:
transcript = result["transcript"]
enriched_parts.append(