From 8f8dd834432c054841e7a12bdee45739bc8118d3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 13 Mar 2026 04:14:35 -0700 Subject: [PATCH] fix: sync session_id after mid-run context compression Critical bug: when the agent's context compressor fires during a tool loop (_compress_context), it creates a new session_id and writes the compressed messages there. But the gateway's session_entry still pointed to the old session_id. On the next message, load_transcript() loaded the stale pre-compression transcript, causing: - Context bloat returning every turn - Repeated compression cycles - Loss of carefully compressed context Fix: after run_conversation() returns, check if the agent's session_id changed (compression split) and sync it back to the session store entry. Also pass the effective session_id in the result dict so _handle_message writes transcript entries to the correct session. This affects ALL gateway adapters, not just webhook. --- gateway/run.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 166bc6f9..103f8813 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1446,6 +1446,11 @@ class GatewayRunner: response = agent_result.get("final_response", "") agent_messages = agent_result.get("messages", []) + # If the agent's session_id changed during compression, update + # session_entry so transcript writes below go to the right session. + if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id: + session_entry.session_id = agent_result["session_id"] + # Prepend reasoning/thinking if display is enabled if getattr(self, "_show_reasoning", False) and response: last_reasoning = agent_result.get("last_reasoning") @@ -3495,6 +3500,23 @@ class GatewayRunner: unique_tags.insert(0, "[[audio_as_voice]]") final_response = final_response + "\n" + "\n".join(unique_tags) + # Sync session_id: the agent may have created a new session during + # mid-run context compression (_compress_context splits sessions). + # If so, update the session store entry so the NEXT message loads + # the compressed transcript, not the stale pre-compression one. + agent = agent_holder[0] + if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id: + logger.info( + "Session split detected: %s → %s (compression)", + session_id, agent.session_id, + ) + entry = self.session_store._entries.get(session_key) + if entry: + entry.session_id = agent.session_id + self.session_store._save() + + effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id + return { "final_response": final_response, "last_reasoning": result.get("last_reasoning"), @@ -3503,6 +3525,7 @@ class GatewayRunner: "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, + "session_id": effective_session_id, } # Start progress message sender if enabled