fix(gateway): persist watcher metadata in checkpoint for crash recovery (#1706)

Salvaged from PR #1573 by @eren-karakus0. Cherry-picked with authorship preserved.

Fixes #1143 — background process notifications resume after gateway restart.

Co-authored-by: Muhammet Eren Karakuş <erenkar950@gmail.com>
This commit is contained in:
Teknium 2026-03-17 03:52:15 -07:00 committed by GitHub
parent ce7418e274
commit d87655afff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 151 additions and 5 deletions

View file

@ -984,6 +984,16 @@ class GatewayRunner:
):
self._schedule_update_notification_watch()
# Drain any recovered process watchers (from crash recovery checkpoint)
try:
from tools.process_registry import process_registry
while process_registry.pending_watchers:
watcher = process_registry.pending_watchers.pop(0)
asyncio.create_task(self._run_process_watcher(watcher))
logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
except Exception as e:
logger.error("Recovered watcher setup error: %s", e)
# Start background session expiry watcher for proactive memory flushing
asyncio.create_task(self._session_expiry_watcher())