fix(stt): respect explicit provider config instead of env-var fallback (#1775)
* fix(session): skip corrupt lines in load_transcript instead of crashing Wrap json.loads() in load_transcript() with try/except JSONDecodeError so that partial JSONL lines (from mid-write crashes like OOM/SIGKILL) are skipped with a warning instead of crashing the entire transcript load. The rest of the history loads fine. Adds a logger.warning with the session ID and truncated corrupt line content for debugging visibility. Salvaged from PR #1193 by alireza78a. Closes #1193 * fix(stt): respect explicit provider config instead of env-var fallback Rework _get_provider() to separate explicit config from auto-detect. When stt.provider is explicitly set in config.yaml, that choice is authoritative — no silent cross-provider fallback based on which env vars happen to be set. When no provider is configured, auto-detect still tries: local > groq > openai. This fixes the reported scenario where provider: local + a placeholder OPENAI_API_KEY caused the system to silently select OpenAI and fail with a 401. Closes #1774
This commit is contained in:
parent
088d65605a
commit
9a1e971126
3 changed files with 151 additions and 115 deletions
|
|
@ -66,19 +66,12 @@ class TestGetProviderGroq:
|
|||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "groq"}) == "groq"
|
||||
|
||||
def test_groq_fallback_to_local(self, monkeypatch):
|
||||
def test_groq_explicit_no_fallback(self, monkeypatch):
|
||||
"""Explicit groq with no key returns none — no cross-provider fallback."""
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "groq"}) == "local"
|
||||
|
||||
def test_groq_fallback_to_openai(self, monkeypatch):
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "groq"}) == "openai"
|
||||
assert _get_provider({"provider": "groq"}) == "none"
|
||||
|
||||
def test_groq_nothing_available(self, monkeypatch):
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
|
|
@ -90,36 +83,25 @@ class TestGetProviderGroq:
|
|||
|
||||
|
||||
class TestGetProviderFallbackPriority:
|
||||
"""Cross-provider fallback priority tests."""
|
||||
"""Auto-detect fallback priority and explicit provider behaviour."""
|
||||
|
||||
def test_local_fallback_prefers_groq_over_openai(self, monkeypatch):
|
||||
"""When local unavailable, groq (free) is preferred over openai (paid)."""
|
||||
def test_auto_detect_prefers_local(self):
|
||||
"""Auto-detect prefers local over any cloud provider."""
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({}) == "local"
|
||||
|
||||
def test_auto_detect_prefers_groq_over_openai(self, monkeypatch):
|
||||
"""Auto-detect: groq (free) is preferred over openai (paid)."""
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "local"}) == "groq"
|
||||
assert _get_provider({}) == "groq"
|
||||
|
||||
def test_local_fallback_to_groq_only(self, monkeypatch):
|
||||
"""When only groq key available, falls back to groq."""
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "local"}) == "groq"
|
||||
|
||||
def test_openai_fallback_to_groq(self, monkeypatch):
|
||||
"""When openai key missing but groq available, falls back to groq."""
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "openai"}) == "groq"
|
||||
|
||||
def test_openai_nothing_available(self, monkeypatch):
|
||||
"""When no openai key and no local, returns none."""
|
||||
def test_explicit_openai_no_key_returns_none(self, monkeypatch):
|
||||
"""Explicit openai with no key returns none — no cross-provider fallback."""
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
|
|
@ -136,18 +118,83 @@ class TestGetProviderFallbackPriority:
|
|||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({}) == "local"
|
||||
|
||||
def test_openai_fallback_to_local_command(self, monkeypatch):
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
|
||||
# ============================================================================
|
||||
# Explicit provider config respected (GH-1774)
|
||||
# ============================================================================
|
||||
|
||||
class TestExplicitProviderRespected:
|
||||
"""When stt.provider is explicitly set, that choice is authoritative.
|
||||
No silent fallback to a different cloud provider."""
|
||||
|
||||
def test_explicit_local_no_fallback_to_openai(self, monkeypatch):
|
||||
"""GH-1774: provider=local must not silently fall back to openai
|
||||
even when an OpenAI API key is set."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-real-key-here")
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
result = _get_provider({"provider": "local"})
|
||||
assert result == "none", f"Expected 'none' but got {result!r}"
|
||||
|
||||
def test_explicit_local_no_fallback_to_groq(self, monkeypatch):
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
result = _get_provider({"provider": "local"})
|
||||
assert result == "none"
|
||||
|
||||
def test_explicit_local_uses_local_command_fallback(self, monkeypatch):
|
||||
"""Local-to-local_command fallback is fine — both are local."""
|
||||
monkeypatch.setenv(
|
||||
"HERMES_LOCAL_STT_COMMAND",
|
||||
"whisper {input_path} --output_dir {output_dir} --language {language}",
|
||||
)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False):
|
||||
from tools.transcription_tools import _get_provider
|
||||
result = _get_provider({"provider": "local"})
|
||||
assert result == "local_command"
|
||||
|
||||
def test_explicit_groq_no_fallback_to_openai(self, monkeypatch):
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-real-key")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
assert _get_provider({"provider": "openai"}) == "local_command"
|
||||
result = _get_provider({"provider": "groq"})
|
||||
assert result == "none"
|
||||
|
||||
def test_explicit_openai_no_fallback_to_groq(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
result = _get_provider({"provider": "openai"})
|
||||
assert result == "none"
|
||||
|
||||
def test_auto_detect_still_falls_back_to_cloud(self, monkeypatch):
|
||||
"""When no provider is explicitly set, auto-detect cloud fallback works."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-real-key")
|
||||
monkeypatch.delenv("GROQ_API_KEY", raising=False)
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
# Empty dict = no explicit provider, uses DEFAULT_PROVIDER auto-detect
|
||||
result = _get_provider({})
|
||||
assert result == "openai"
|
||||
|
||||
def test_auto_detect_prefers_groq_over_openai(self, monkeypatch):
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-real-key")
|
||||
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import _get_provider
|
||||
result = _get_provider({})
|
||||
assert result == "groq"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
|
@ -686,28 +733,19 @@ class TestTranscribeAudioDispatch:
|
|||
assert "faster-whisper" in result["error"]
|
||||
assert "GROQ_API_KEY" in result["error"]
|
||||
|
||||
def test_openai_provider_falls_back_to_local_command(self, monkeypatch, sample_ogg):
|
||||
def test_explicit_openai_no_key_returns_error(self, monkeypatch, sample_ogg):
|
||||
"""Explicit provider=openai with no key returns an error, not a fallback."""
|
||||
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.setenv(
|
||||
"HERMES_LOCAL_STT_COMMAND",
|
||||
"whisper {input_path} --model {model} --output_dir {output_dir} --language {language}",
|
||||
)
|
||||
|
||||
with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "openai"}), \
|
||||
patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True), \
|
||||
patch("tools.transcription_tools._transcribe_local_command", return_value={
|
||||
"success": True,
|
||||
"transcript": "hello from fallback",
|
||||
"provider": "local_command",
|
||||
}) as mock_local_command:
|
||||
patch("tools.transcription_tools._HAS_OPENAI", True):
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = transcribe_audio(sample_ogg)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["transcript"] == "hello from fallback"
|
||||
mock_local_command.assert_called_once_with(sample_ogg, "base")
|
||||
assert result["success"] is False
|
||||
assert "No STT provider" in result["error"]
|
||||
|
||||
def test_invalid_file_short_circuits(self):
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue