fix(gateway): persist watcher metadata in checkpoint for crash recovery (#1706)
Salvaged from PR #1573 by @eren-karakus0. Cherry-picked with authorship preserved. Fixes #1143 — background process notifications resume after gateway restart. Co-authored-by: Muhammet Eren Karakuş <erenkar950@gmail.com>
This commit is contained in:
parent
ce7418e274
commit
d87655afff
5 changed files with 151 additions and 5 deletions
|
|
@ -50,13 +50,16 @@ def _build_runner(monkeypatch, tmp_path, mode: str) -> GatewayRunner:
|
|||
return runner
|
||||
|
||||
|
||||
def _watcher_dict(session_id="proc_test"):
|
||||
return {
|
||||
def _watcher_dict(session_id="proc_test", thread_id=""):
|
||||
d = {
|
||||
"session_id": session_id,
|
||||
"check_interval": 0,
|
||||
"platform": "telegram",
|
||||
"chat_id": "123",
|
||||
}
|
||||
if thread_id:
|
||||
d["thread_id"] = thread_id
|
||||
return d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -196,3 +199,47 @@ async def test_run_process_watcher_respects_notification_mode(
|
|||
if expected_fragment is not None:
|
||||
sent_message = adapter.send.await_args.args[1]
|
||||
assert expected_fragment in sent_message
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_id_passed_to_send(monkeypatch, tmp_path):
|
||||
"""thread_id from watcher dict is forwarded as metadata to adapter.send()."""
|
||||
import tools.process_registry as pr_module
|
||||
|
||||
sessions = [SimpleNamespace(output_buffer="done\n", exited=True, exit_code=0)]
|
||||
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
||||
|
||||
async def _instant_sleep(*_a, **_kw):
|
||||
pass
|
||||
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
||||
|
||||
runner = _build_runner(monkeypatch, tmp_path, "all")
|
||||
adapter = runner.adapters[Platform.TELEGRAM]
|
||||
|
||||
await runner._run_process_watcher(_watcher_dict(thread_id="42"))
|
||||
|
||||
assert adapter.send.await_count == 1
|
||||
_, kwargs = adapter.send.call_args
|
||||
assert kwargs["metadata"] == {"thread_id": "42"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_thread_id_sends_no_metadata(monkeypatch, tmp_path):
|
||||
"""When thread_id is empty, metadata should be None (general topic)."""
|
||||
import tools.process_registry as pr_module
|
||||
|
||||
sessions = [SimpleNamespace(output_buffer="done\n", exited=True, exit_code=0)]
|
||||
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
||||
|
||||
async def _instant_sleep(*_a, **_kw):
|
||||
pass
|
||||
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
||||
|
||||
runner = _build_runner(monkeypatch, tmp_path, "all")
|
||||
adapter = runner.adapters[Platform.TELEGRAM]
|
||||
|
||||
await runner._run_process_watcher(_watcher_dict())
|
||||
|
||||
assert adapter.send.await_count == 1
|
||||
_, kwargs = adapter.send.call_args
|
||||
assert kwargs["metadata"] is None
|
||||
|
|
|
|||
|
|
@ -294,6 +294,61 @@ class TestCheckpoint:
|
|||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 0
|
||||
|
||||
def test_write_checkpoint_includes_watcher_metadata(self, registry, tmp_path):
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
|
||||
s = _make_session()
|
||||
s.watcher_platform = "telegram"
|
||||
s.watcher_chat_id = "999"
|
||||
s.watcher_thread_id = "42"
|
||||
s.watcher_interval = 60
|
||||
registry._running[s.id] = s
|
||||
registry._write_checkpoint()
|
||||
|
||||
data = json.loads((tmp_path / "procs.json").read_text())
|
||||
assert len(data) == 1
|
||||
assert data[0]["watcher_platform"] == "telegram"
|
||||
assert data[0]["watcher_chat_id"] == "999"
|
||||
assert data[0]["watcher_thread_id"] == "42"
|
||||
assert data[0]["watcher_interval"] == 60
|
||||
|
||||
def test_recover_enqueues_watchers(self, registry, tmp_path):
|
||||
checkpoint = tmp_path / "procs.json"
|
||||
checkpoint.write_text(json.dumps([{
|
||||
"session_id": "proc_live",
|
||||
"command": "sleep 999",
|
||||
"pid": os.getpid(), # current process — guaranteed alive
|
||||
"task_id": "t1",
|
||||
"session_key": "sk1",
|
||||
"watcher_platform": "telegram",
|
||||
"watcher_chat_id": "123",
|
||||
"watcher_thread_id": "42",
|
||||
"watcher_interval": 60,
|
||||
}]))
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
|
||||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 1
|
||||
assert len(registry.pending_watchers) == 1
|
||||
w = registry.pending_watchers[0]
|
||||
assert w["session_id"] == "proc_live"
|
||||
assert w["platform"] == "telegram"
|
||||
assert w["chat_id"] == "123"
|
||||
assert w["thread_id"] == "42"
|
||||
assert w["check_interval"] == 60
|
||||
|
||||
def test_recover_skips_watcher_when_no_interval(self, registry, tmp_path):
|
||||
checkpoint = tmp_path / "procs.json"
|
||||
checkpoint.write_text(json.dumps([{
|
||||
"session_id": "proc_live",
|
||||
"command": "sleep 999",
|
||||
"pid": os.getpid(),
|
||||
"task_id": "t1",
|
||||
"watcher_interval": 0,
|
||||
}]))
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
|
||||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 1
|
||||
assert len(registry.pending_watchers) == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Kill process
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue