fix: make STT config env-overridable and fix doc issues

Code fixes: - STT model, Groq base URL, and OpenAI STT base URL are now configurable via env vars (STT_GROQ_MODEL, STT_OPENAI_MODEL, GROQ_BASE_URL, STT_OPENAI_BASE_URL) instead of hardcoded - Gateway and Discord VC now read stt.model from config.yaml (previously only CLI did this — gateway always used defaults) Doc fixes: - voice-mode.md: move Web UI troubleshooting to web.md (was duplicated) - voice-mode.md: simplify "How It Works" for end users (remove NaCl, DAVE, RTP internals) - voice-mode.md: clarify STT priority (OpenAI used first if both keys set, Groq recommended for free tier) - voice-mode.md: document new STT env overrides in config reference - web.md: remove duplicate Quick Start / Step 1-3 sections - web.md: add mobile HTTPS mic workarounds (moved from voice-mode.md) - web.md: clarify STT fallback order
2026-03-12 00:15:38 +03:00 · 2026-03-12 00:15:38 +03:00 · 238a431545
commit 238a431545
parent 79ed0effdd
5 changed files with 78 additions and 118 deletions
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -881,7 +881,18 @@ class DiscordAdapter(BasePlatformAdapter):
            await asyncio.to_thread(VoiceReceiver.pcm_to_wav, pcm_data, wav_path)

            from tools.transcription_tools import transcribe_audio
-            result = await asyncio.to_thread(transcribe_audio, wav_path)
+            # Read STT model from config.yaml
+            stt_model = None
+            try:
+                import yaml as _y
+                from pathlib import Path as _P
+                _cfg = _P(os.getenv("HERMES_HOME", _P.home() / ".hermes")) / "config.yaml"
+                if _cfg.exists():
+                    with open(_cfg) as _f:
+                        stt_model = (_y.safe_load(_f) or {}).get("stt", {}).get("model")
+            except Exception:
+                pass
+            result = await asyncio.to_thread(transcribe_audio, wav_path, model=stt_model)

            if not result.get("success"):
                return
--- a/gateway/run.py
+++ b/gateway/run.py
@ -3326,11 +3326,23 @@ class GatewayRunner:
        from tools.transcription_tools import transcribe_audio
        import asyncio

+        # Read STT model from config.yaml (same key the CLI uses)
+        stt_model = None
+        try:
+            import yaml as _y
+            _cfg = _hermes_home / "config.yaml"
+            if _cfg.exists():
+                with open(_cfg) as _f:
+                    _data = _y.safe_load(_f) or {}
+                stt_model = _data.get("stt", {}).get("model")
+        except Exception:
+            pass
+
        enriched_parts = []
        for path in audio_paths:
            try:
                logger.debug("Transcribing user voice: %s", path)
-                result = await asyncio.to_thread(transcribe_audio, path)
+                result = await asyncio.to_thread(transcribe_audio, path, model=stt_model)
                if result["success"]:
                    transcript = result["transcript"]
                    enriched_parts.append(