From 8f8dd834432c054841e7a12bdee45739bc8118d3 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 13 Mar 2026 04:14:35 -0700
Subject: [PATCH] fix: sync session_id after mid-run context compression

Critical bug: when the agent's context compressor fires during a tool
loop (_compress_context), it creates a new session_id and writes the
compressed messages there. But the gateway's session_entry still pointed
to the old session_id. On the next message, load_transcript() loaded
the stale pre-compression transcript, causing:

- Context bloat returning every turn
- Repeated compression cycles
- Loss of carefully compressed context

Fix: after run_conversation() returns, check if the agent's session_id
changed (compression split) and sync it back to the session store entry.
Also pass the effective session_id in the result dict so _handle_message
writes transcript entries to the correct session.

This affects ALL gateway adapters, not just webhook.
---
 gateway/run.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index 166bc6f9..103f8813 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1446,6 +1446,11 @@ class GatewayRunner:
             response = agent_result.get("final_response", "")
             agent_messages = agent_result.get("messages", [])
 
+            # If the agent's session_id changed during compression, update
+            # session_entry so transcript writes below go to the right session.
+            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
+                session_entry.session_id = agent_result["session_id"]
+
             # Prepend reasoning/thinking if display is enabled
             if getattr(self, "_show_reasoning", False) and response:
                 last_reasoning = agent_result.get("last_reasoning")
@@ -3495,6 +3500,23 @@ class GatewayRunner:
                         unique_tags.insert(0, "[[audio_as_voice]]")
                     final_response = final_response + "\n" + "\n".join(unique_tags)
             
+            # Sync session_id: the agent may have created a new session during
+            # mid-run context compression (_compress_context splits sessions).
+            # If so, update the session store entry so the NEXT message loads
+            # the compressed transcript, not the stale pre-compression one.
+            agent = agent_holder[0]
+            if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
+                logger.info(
+                    "Session split detected: %s → %s (compression)",
+                    session_id, agent.session_id,
+                )
+                entry = self.session_store._entries.get(session_key)
+                if entry:
+                    entry.session_id = agent.session_id
+                    self.session_store._save()
+
+            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
+
             return {
                 "final_response": final_response,
                 "last_reasoning": result.get("last_reasoning"),
@@ -3503,6 +3525,7 @@ class GatewayRunner:
                 "tools": tools_holder[0] or [],
                 "history_offset": len(agent_history),
                 "last_prompt_tokens": _last_prompt_toks,
+                "session_id": effective_session_id,
             }
         
         # Start progress message sender if enabled