fix: harden gateway restart recovery

- store gateway PID metadata and validate the live process before trusting gateway.pid
- auto-refresh outdated systemd user units before start/restart so installs pick up --replace fixes
- sweep stray manual gateway processes after service stops
- add regression tests for PID validation and service drift recovery
This commit is contained in:
teknium1 2026-03-14 07:42:31 -07:00
parent 917adcbaf4
commit eb8316ea69
4 changed files with 251 additions and 15 deletions

View file

@ -0,0 +1,27 @@
"""Tests for gateway runtime status tracking."""
import json
import os
from gateway import status
class TestGatewayPidState:
def test_write_pid_file_records_gateway_metadata(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
status.write_pid_file()
payload = json.loads((tmp_path / "gateway.pid").read_text())
assert payload["pid"] == os.getpid()
assert payload["kind"] == "hermes-gateway"
assert isinstance(payload["argv"], list)
assert payload["argv"]
def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
pid_path = tmp_path / "gateway.pid"
pid_path.write_text(str(os.getpid()))
assert status.get_running_pid() is None
assert not pid_path.exists()