fix(gateway): surface missing linger in status and doctor (#1296)
* fix(gateway): surface missing linger in status and doctor Warn when a systemd user gateway service has linger disabled so users can spot the common 'gateway sleeps after logout' deployment issue from both hermes doctor and hermes gateway status. * fix(gateway): check linger status after install After installing the systemd user service, report whether linger is already enabled instead of always printing the generic hint. This makes post-install guidance match the user's actual deployment state.
This commit is contained in:
parent
6fa197f973
commit
fb3c163612
4 changed files with 231 additions and 7 deletions
|
|
@ -94,6 +94,39 @@ def check_info(text: str):
|
|||
print(f" {color('→', Colors.CYAN)} {text}")
|
||||
|
||||
|
||||
def _check_gateway_service_linger(issues: list[str]) -> None:
|
||||
"""Warn when a systemd user gateway service will stop after logout."""
|
||||
try:
|
||||
from hermes_cli.gateway import (
|
||||
get_systemd_linger_status,
|
||||
get_systemd_unit_path,
|
||||
is_linux,
|
||||
)
|
||||
except Exception as e:
|
||||
check_warn("Gateway service linger", f"(could not import gateway helpers: {e})")
|
||||
return
|
||||
|
||||
if not is_linux():
|
||||
return
|
||||
|
||||
unit_path = get_systemd_unit_path()
|
||||
if not unit_path.exists():
|
||||
return
|
||||
|
||||
print()
|
||||
print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
linger_enabled, linger_detail = get_systemd_linger_status()
|
||||
if linger_enabled is True:
|
||||
check_ok("Systemd linger enabled", "(gateway service survives logout)")
|
||||
elif linger_enabled is False:
|
||||
check_warn("Systemd linger disabled", "(gateway may stop after logout)")
|
||||
check_info("Run: sudo loginctl enable-linger $USER")
|
||||
issues.append("Enable linger for the gateway user service: sudo loginctl enable-linger $USER")
|
||||
else:
|
||||
check_warn("Could not verify systemd linger", f"({linger_detail})")
|
||||
|
||||
|
||||
def run_doctor(args):
|
||||
"""Run diagnostic checks."""
|
||||
should_fix = getattr(args, 'fix', False)
|
||||
|
|
@ -348,6 +381,8 @@ def run_doctor(args):
|
|||
check_warn(f"~/.hermes/state.db exists but has issues: {e}")
|
||||
else:
|
||||
check_info("~/.hermes/state.db not created yet (will be created on first session)")
|
||||
|
||||
_check_gateway_service_linger(issues)
|
||||
|
||||
# =========================================================================
|
||||
# Check: External tools
|
||||
|
|
|
|||
|
|
@ -122,9 +122,72 @@ def is_windows() -> bool:
|
|||
SERVICE_NAME = "hermes-gateway"
|
||||
SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
|
||||
|
||||
|
||||
def get_systemd_unit_path() -> Path:
|
||||
return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"
|
||||
|
||||
|
||||
def get_systemd_linger_status() -> tuple[bool | None, str]:
|
||||
"""Return whether systemd user lingering is enabled for the current user.
|
||||
|
||||
Returns:
|
||||
(True, "") when linger is enabled.
|
||||
(False, "") when linger is disabled.
|
||||
(None, detail) when the status could not be determined.
|
||||
"""
|
||||
if not is_linux():
|
||||
return None, "not supported on this platform"
|
||||
|
||||
import shutil
|
||||
|
||||
if not shutil.which("loginctl"):
|
||||
return None, "loginctl not found"
|
||||
|
||||
username = os.getenv("USER") or os.getenv("LOGNAME")
|
||||
if not username:
|
||||
try:
|
||||
import pwd
|
||||
username = pwd.getpwuid(os.getuid()).pw_name
|
||||
except Exception:
|
||||
return None, "could not determine current user"
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["loginctl", "show-user", username, "--property=Linger", "--value"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except Exception as e:
|
||||
return None, str(e)
|
||||
|
||||
if result.returncode != 0:
|
||||
detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
|
||||
return None, detail or "loginctl query failed"
|
||||
|
||||
value = (result.stdout or "").strip().lower()
|
||||
if value in {"yes", "true", "1"}:
|
||||
return True, ""
|
||||
if value in {"no", "false", "0"}:
|
||||
return False, ""
|
||||
|
||||
rendered = value or "<empty>"
|
||||
return None, f"unexpected loginctl output: {rendered}"
|
||||
|
||||
|
||||
def print_systemd_linger_guidance() -> None:
|
||||
"""Print the current linger status and the fix when it is disabled."""
|
||||
linger_enabled, linger_detail = get_systemd_linger_status()
|
||||
if linger_enabled is True:
|
||||
print("✓ Systemd linger is enabled (service survives logout)")
|
||||
elif linger_enabled is False:
|
||||
print("⚠ Systemd linger is disabled (gateway may stop when you log out)")
|
||||
print(" Run: sudo loginctl enable-linger $USER")
|
||||
else:
|
||||
print(f"⚠ Could not verify systemd linger ({linger_detail})")
|
||||
print(" If you want the gateway user service to survive logout, run:")
|
||||
print(" sudo loginctl enable-linger $USER")
|
||||
|
||||
def get_launchd_plist_path() -> Path:
|
||||
return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"
|
||||
|
||||
|
|
@ -211,8 +274,7 @@ def systemd_install(force: bool = False):
|
|||
print(f" hermes gateway status # Check status")
|
||||
print(f" journalctl --user -u {SERVICE_NAME} -f # View logs")
|
||||
print()
|
||||
print("To enable lingering (keeps running after logout):")
|
||||
print(" sudo loginctl enable-linger $USER")
|
||||
print_systemd_linger_guidance()
|
||||
|
||||
def systemd_uninstall():
|
||||
subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False)
|
||||
|
|
@ -245,28 +307,38 @@ def systemd_status(deep: bool = False):
|
|||
print("✗ Gateway service is not installed")
|
||||
print(" Run: hermes gateway install")
|
||||
return
|
||||
|
||||
|
||||
# Show detailed status first
|
||||
subprocess.run(
|
||||
["systemctl", "--user", "status", SERVICE_NAME, "--no-pager"],
|
||||
capture_output=False
|
||||
)
|
||||
|
||||
|
||||
# Check if service is active
|
||||
result = subprocess.run(
|
||||
["systemctl", "--user", "is-active", SERVICE_NAME],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
|
||||
status = result.stdout.strip()
|
||||
|
||||
|
||||
if status == "active":
|
||||
print("✓ Gateway service is running")
|
||||
else:
|
||||
print("✗ Gateway service is stopped")
|
||||
print(" Run: hermes gateway start")
|
||||
|
||||
|
||||
if deep:
|
||||
print_systemd_linger_guidance()
|
||||
else:
|
||||
linger_enabled, _ = get_systemd_linger_status()
|
||||
if linger_enabled is True:
|
||||
print("✓ Systemd linger is enabled (service survives logout)")
|
||||
elif linger_enabled is False:
|
||||
print("⚠ Systemd linger is disabled (gateway may stop when you log out)")
|
||||
print(" Run: sudo loginctl enable-linger $USER")
|
||||
|
||||
if deep:
|
||||
print()
|
||||
print("Recent logs:")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue