fix(honcho): isolate session routing for multi-user gateway (#1500)
Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com>
This commit is contained in:
parent
eb4f0348e1
commit
dd7921d514
17 changed files with 522 additions and 252 deletions
|
|
@ -90,6 +90,7 @@ class TestGatewayHonchoLifecycle:
|
|||
runner = _make_runner()
|
||||
event = _make_event()
|
||||
runner._shutdown_gateway_honcho = MagicMock()
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store._generate_session_key.return_value = "gateway-key"
|
||||
runner.session_store._entries = {
|
||||
|
|
@ -100,4 +101,31 @@ class TestGatewayHonchoLifecycle:
|
|||
result = await runner._handle_reset_command(event)
|
||||
|
||||
runner._shutdown_gateway_honcho.assert_called_once_with("gateway-key")
|
||||
runner._async_flush_memories.assert_called_once_with("old-session", "gateway-key")
|
||||
assert "Session reset" in result
|
||||
|
||||
def test_flush_memories_reuses_gateway_session_key_and_skips_honcho_sync(self):
|
||||
runner = _make_runner()
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store.load_transcript.return_value = [
|
||||
{"role": "user", "content": "a"},
|
||||
{"role": "assistant", "content": "b"},
|
||||
{"role": "user", "content": "c"},
|
||||
{"role": "assistant", "content": "d"},
|
||||
]
|
||||
tmp_agent = MagicMock()
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="model-name"),
|
||||
patch("run_agent.AIAgent", return_value=tmp_agent) as mock_agent_cls,
|
||||
):
|
||||
runner._flush_memories_for_session("old-session", "gateway-key")
|
||||
|
||||
mock_agent_cls.assert_called_once()
|
||||
_, kwargs = mock_agent_cls.call_args
|
||||
assert kwargs["session_id"] == "old-session"
|
||||
assert kwargs["honcho_session_key"] == "gateway-key"
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
_, run_kwargs = tmp_agent.run_conversation.call_args
|
||||
assert run_kwargs["sync_honcho"] is False
|
||||
|
|
|
|||
|
|
@ -199,3 +199,28 @@ class TestHandleResumeCommand:
|
|||
|
||||
assert real_key not in runner._running_agents
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_flushes_memories_with_gateway_session_key(self, tmp_path):
|
||||
"""Resume should preserve the gateway session key for Honcho flushes."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("old_session", "telegram")
|
||||
db.set_session_title("old_session", "Old Work")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Old Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
|
||||
await runner._handle_resume_command(event)
|
||||
|
||||
runner._async_flush_memories.assert_called_once_with(
|
||||
"current_session_001",
|
||||
_session_key_for_event(event),
|
||||
)
|
||||
db.close()
|
||||
|
|
|
|||
|
|
@ -26,6 +26,22 @@ class TestGatewayPidState:
|
|||
assert status.get_running_pid() is None
|
||||
assert not pid_path.exists()
|
||||
|
||||
def test_get_running_pid_accepts_gateway_metadata_when_cmdline_unavailable(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
pid_path = tmp_path / "gateway.pid"
|
||||
pid_path.write_text(json.dumps({
|
||||
"pid": os.getpid(),
|
||||
"kind": "hermes-gateway",
|
||||
"argv": ["python", "-m", "hermes_cli.main", "gateway"],
|
||||
"start_time": 123,
|
||||
}))
|
||||
|
||||
monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
|
||||
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
|
||||
monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
|
||||
|
||||
assert status.get_running_pid() == os.getpid()
|
||||
|
||||
|
||||
class TestGatewayRuntimeStatus:
|
||||
def test_write_runtime_status_records_platform_failure(self, tmp_path, monkeypatch):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue