From 2046a4c08cb24323444c4f161371a8e24b5df8b3 Mon Sep 17 00:00:00 2001 From: "ac (sourcetree)" Date: Wed, 11 Mar 2026 17:44:37 -0700 Subject: [PATCH 1/3] fix: backfill model on gateway sessions after agent runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gateway sessions end up with model=NULL because the session row is created before AIAgent is constructed. After the agent responds, update_session() writes token counts but never fills in the model. Thread agent.model through _run_agent()'s return dict into update_session() → update_token_counts(). The SQL uses COALESCE(model, ?) so it only fills NULL rows — never overwrites a model already set at creation time (e.g. CLI sessions). If the agent falls back to a different provider, agent.model is updated in-place by _try_activate_fallback(), so the recorded value reflects whichever model actually produced the response. Fixes #987 --- gateway/run.py | 5 ++++- gateway/session.py | 4 +++- hermes_state.py | 10 ++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 221f8f91..bc16b224 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1578,10 +1578,11 @@ class GatewayRunner: skip_db=agent_persisted, ) - # Update session with actual prompt token count from the agent + # Update session with actual prompt token count and model from the agent self.session_store.update_session( session_entry.session_key, last_prompt_tokens=agent_result.get("last_prompt_tokens", 0), + model=agent_result.get("model"), ) return response @@ -3586,6 +3587,7 @@ class GatewayRunner: "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, + "model": agent_holder[0].model if agent_holder[0] else None, } # Scan tool results for MEDIA: tags that need to be delivered @@ -3648,6 +3650,7 @@ class GatewayRunner: "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, + "model": agent_holder[0].model if agent_holder[0] else None, "session_id": effective_session_id, } diff --git a/gateway/session.py b/gateway/session.py index 3e42db4f..965f6079 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -590,6 +590,7 @@ class SessionStore: input_tokens: int = 0, output_tokens: int = 0, last_prompt_tokens: int = None, + model: str = None, ) -> None: """Update a session's metadata after an interaction.""" self._ensure_loaded() @@ -607,7 +608,8 @@ class SessionStore: if self._db: try: self._db.update_token_counts( - entry.session_id, input_tokens, output_tokens + entry.session_id, input_tokens, output_tokens, + model=model, ) except Exception as e: logger.debug("Session DB operation failed: %s", e) diff --git a/hermes_state.py b/hermes_state.py index 5e29321e..8945e195 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -227,15 +227,17 @@ class SessionDB: self._conn.commit() def update_token_counts( - self, session_id: str, input_tokens: int = 0, output_tokens: int = 0 + self, session_id: str, input_tokens: int = 0, output_tokens: int = 0, + model: str = None, ) -> None: - """Increment token counters on a session.""" + """Increment token counters and backfill model if not already set.""" self._conn.execute( """UPDATE sessions SET input_tokens = input_tokens + ?, - output_tokens = output_tokens + ? + output_tokens = output_tokens + ?, + model = COALESCE(model, ?) WHERE id = ?""", - (input_tokens, output_tokens, session_id), + (input_tokens, output_tokens, model, session_id), ) self._conn.commit() From 8602e61fca868c5437552c0920ac26f1c0fc7bd3 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Mar 2026 06:44:14 -0700 Subject: [PATCH 2/3] test: cover gateway session model backfill Add regression coverage for backfilling NULL gateway session models in SQLite, preserving existing models, and forwarding the resolved agent model through SessionStore updates. --- tests/gateway/test_session.py | 25 +++++++++++++++++++++++++ tests/test_hermes_state.py | 16 +++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index b5808a99..0737f18d 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -577,3 +577,28 @@ class TestLastPromptTokens: store.update_session("k1", last_prompt_tokens=0) assert entry.last_prompt_tokens == 0 + + def test_update_session_passes_model_to_db(self, tmp_path): + """Gateway session updates should forward the resolved model to SQLite.""" + config = GatewayConfig() + with patch("gateway.session.SessionStore._ensure_loaded"): + store = SessionStore(sessions_dir=tmp_path, config=config) + store._loaded = True + store._save = MagicMock() + store._db = MagicMock() + + from gateway.session import SessionEntry + from datetime import datetime + entry = SessionEntry( + session_key="k1", + session_id="s1", + created_at=datetime.now(), + updated_at=datetime.now(), + ) + store._entries = {"k1": entry} + + store.update_session("k1", model="openai/gpt-5.4") + + store._db.update_token_counts.assert_called_once_with( + "s1", 0, 0, model="openai/gpt-5.4" + ) diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 329ae6f4..81e922c7 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -55,13 +55,27 @@ class TestSessionLifecycle: def test_update_token_counts(self, db): db.create_session(session_id="s1", source="cli") - db.update_token_counts("s1", input_tokens=100, output_tokens=50) db.update_token_counts("s1", input_tokens=200, output_tokens=100) + db.update_token_counts("s1", input_tokens=100, output_tokens=50) session = db.get_session("s1") assert session["input_tokens"] == 300 assert session["output_tokens"] == 150 + def test_update_token_counts_backfills_model_when_null(self, db): + db.create_session(session_id="s1", source="telegram") + db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4") + + session = db.get_session("s1") + assert session["model"] == "openai/gpt-5.4" + + def test_update_token_counts_preserves_existing_model(self, db): + db.create_session(session_id="s1", source="cli", model="anthropic/claude-opus-4.6") + db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4") + + session = db.get_session("s1") + assert session["model"] == "anthropic/claude-opus-4.6" + def test_parent_session(self, db): db.create_session(session_id="parent", source="cli") db.create_session(session_id="child", source="cli", parent_session_id="parent") From 19f4f8970af01a3ea78b98d680854b1b488862ef Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Mar 2026 06:47:39 -0700 Subject: [PATCH 3/3] fix: tolerate test doubles without model attr Use getattr() when returning model metadata from GatewayRunner._run_agent so fake agents and minimal stubs without a model attribute do not break unrelated gateway flows while preserving the session-model backfill behavior. --- gateway/run.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index bc16b224..235ca336 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3577,6 +3577,7 @@ class GatewayRunner: _agent = agent_holder[0] if _agent and hasattr(_agent, "context_compressor"): _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0) + _resolved_model = getattr(_agent, "model", None) if _agent else None if not final_response: error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)" @@ -3587,7 +3588,7 @@ class GatewayRunner: "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, - "model": agent_holder[0].model if agent_holder[0] else None, + "model": _resolved_model, } # Scan tool results for MEDIA: tags that need to be delivered @@ -3650,7 +3651,7 @@ class GatewayRunner: "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, - "model": agent_holder[0].model if agent_holder[0] else None, + "model": _resolved_model, "session_id": effective_session_id, }