fix(setup+gateway): defer config write, PID-based gateway kill, scoped systemd service names (#1499)
fix(setup+gateway): defer config write, PID-based gateway kill, scoped systemd service names
This commit is contained in:
commit
caa944e752
10 changed files with 137 additions and 56 deletions
|
|
@ -119,14 +119,35 @@ def is_windows() -> bool:
|
|||
# Service Configuration
|
||||
# =============================================================================
|
||||
|
||||
SERVICE_NAME = "hermes-gateway"
|
||||
_SERVICE_BASE = "hermes-gateway"
|
||||
SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
|
||||
|
||||
|
||||
def get_service_name() -> str:
|
||||
"""Derive a systemd service name scoped to this HERMES_HOME.
|
||||
|
||||
Default ``~/.hermes`` returns ``hermes-gateway`` (backward compatible).
|
||||
Any other HERMES_HOME appends a short hash so multiple installations
|
||||
can each have their own systemd service without conflicting.
|
||||
"""
|
||||
import hashlib
|
||||
from pathlib import Path as _Path # local import to avoid monkeypatch interference
|
||||
home = _Path(os.getenv("HERMES_HOME", _Path.home() / ".hermes")).resolve()
|
||||
default = (_Path.home() / ".hermes").resolve()
|
||||
if home == default:
|
||||
return _SERVICE_BASE
|
||||
suffix = hashlib.sha256(str(home).encode()).hexdigest()[:8]
|
||||
return f"{_SERVICE_BASE}-{suffix}"
|
||||
|
||||
|
||||
SERVICE_NAME = _SERVICE_BASE # backward-compat for external importers; prefer get_service_name()
|
||||
|
||||
|
||||
def get_systemd_unit_path(system: bool = False) -> Path:
|
||||
name = get_service_name()
|
||||
if system:
|
||||
return Path("/etc/systemd/system") / f"{SERVICE_NAME}.service"
|
||||
return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"
|
||||
return Path("/etc/systemd/system") / f"{name}.service"
|
||||
return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"
|
||||
|
||||
|
||||
def _systemctl_cmd(system: bool = False) -> list[str]:
|
||||
|
|
@ -362,6 +383,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
|||
sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main"
|
||||
|
||||
hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve())
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
return f"""[Unit]
|
||||
|
|
@ -380,6 +403,7 @@ Environment="USER={username}"
|
|||
Environment="LOGNAME={username}"
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
KillMode=mixed
|
||||
|
|
@ -403,6 +427,7 @@ ExecStop={hermes_cli} gateway stop
|
|||
WorkingDirectory={working_dir}
|
||||
Environment="PATH={sane_path}"
|
||||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
KillMode=mixed
|
||||
|
|
@ -455,7 +480,7 @@ def _print_linger_enable_warning(username: str, detail: str | None = None) -> No
|
|||
print(f" sudo loginctl enable-linger {username}")
|
||||
print()
|
||||
print(" Then restart the gateway:")
|
||||
print(f" systemctl --user restart {SERVICE_NAME}.service")
|
||||
print(f" systemctl --user restart {get_service_name()}.service")
|
||||
print()
|
||||
|
||||
|
||||
|
|
@ -526,7 +551,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
|
|||
unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")
|
||||
|
||||
subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True)
|
||||
subprocess.run(_systemctl_cmd(system) + ["enable", SERVICE_NAME], check=True)
|
||||
subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True)
|
||||
|
||||
print()
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
|
||||
|
|
@ -534,7 +559,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
|
|||
print("Next steps:")
|
||||
print(f" {'sudo ' if system else ''}hermes gateway start{scope_flag} # Start the service")
|
||||
print(f" {'sudo ' if system else ''}hermes gateway status{scope_flag} # Check status")
|
||||
print(f" {'journalctl' if system else 'journalctl --user'} -u {SERVICE_NAME} -f # View logs")
|
||||
print(f" {'journalctl' if system else 'journalctl --user'} -u {get_service_name()} -f # View logs")
|
||||
print()
|
||||
|
||||
if system:
|
||||
|
|
@ -552,8 +577,8 @@ def systemd_uninstall(system: bool = False):
|
|||
if system:
|
||||
_require_root_for_system_service("uninstall")
|
||||
|
||||
subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=False)
|
||||
subprocess.run(_systemctl_cmd(system) + ["disable", SERVICE_NAME], check=False)
|
||||
subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False)
|
||||
subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False)
|
||||
|
||||
unit_path = get_systemd_unit_path(system=system)
|
||||
if unit_path.exists():
|
||||
|
|
@ -569,7 +594,7 @@ def systemd_start(system: bool = False):
|
|||
if system:
|
||||
_require_root_for_system_service("start")
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
subprocess.run(_systemctl_cmd(system) + ["start", SERVICE_NAME], check=True)
|
||||
subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service started")
|
||||
|
||||
|
||||
|
|
@ -578,7 +603,7 @@ def systemd_stop(system: bool = False):
|
|||
system = _select_systemd_scope(system)
|
||||
if system:
|
||||
_require_root_for_system_service("stop")
|
||||
subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=True)
|
||||
subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service stopped")
|
||||
|
||||
|
||||
|
|
@ -588,7 +613,7 @@ def systemd_restart(system: bool = False):
|
|||
if system:
|
||||
_require_root_for_system_service("restart")
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
subprocess.run(_systemctl_cmd(system) + ["restart", SERVICE_NAME], check=True)
|
||||
subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
|
||||
|
||||
|
||||
|
|
@ -613,12 +638,12 @@ def systemd_status(deep: bool = False, system: bool = False):
|
|||
print()
|
||||
|
||||
subprocess.run(
|
||||
_systemctl_cmd(system) + ["status", SERVICE_NAME, "--no-pager"],
|
||||
_systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"],
|
||||
capture_output=False,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
_systemctl_cmd(system) + ["is-active", SERVICE_NAME],
|
||||
_systemctl_cmd(system) + ["is-active", get_service_name()],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
|
@ -657,7 +682,7 @@ def systemd_status(deep: bool = False, system: bool = False):
|
|||
if deep:
|
||||
print()
|
||||
print("Recent logs:")
|
||||
subprocess.run(_journalctl_cmd(system) + ["-u", SERVICE_NAME, "-n", "20", "--no-pager"])
|
||||
subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -1118,7 +1143,7 @@ def _is_service_running() -> bool:
|
|||
|
||||
if user_unit_exists:
|
||||
result = subprocess.run(
|
||||
_systemctl_cmd(False) + ["is-active", SERVICE_NAME],
|
||||
_systemctl_cmd(False) + ["is-active", get_service_name()],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
if result.stdout.strip() == "active":
|
||||
|
|
@ -1126,7 +1151,7 @@ def _is_service_running() -> bool:
|
|||
|
||||
if system_unit_exists:
|
||||
result = subprocess.run(
|
||||
_systemctl_cmd(True) + ["is-active", SERVICE_NAME],
|
||||
_systemctl_cmd(True) + ["is-active", get_service_name()],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
if result.stdout.strip() == "active":
|
||||
|
|
|
|||
|
|
@ -2301,26 +2301,60 @@ def cmd_update(args):
|
|||
print()
|
||||
print("✓ Update complete!")
|
||||
|
||||
# Auto-restart gateway if it's running as a systemd service
|
||||
# Auto-restart gateway if it's running.
|
||||
# Uses the PID file (scoped to HERMES_HOME) to find this
|
||||
# installation's gateway — safe with multiple installations.
|
||||
try:
|
||||
check = subprocess.run(
|
||||
["systemctl", "--user", "is-active", "hermes-gateway"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if check.stdout.strip() == "active":
|
||||
print()
|
||||
print("→ Gateway service is running — restarting to pick up changes...")
|
||||
restart = subprocess.run(
|
||||
["systemctl", "--user", "restart", "hermes-gateway"],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
from gateway.status import get_running_pid, remove_pid_file
|
||||
from hermes_cli.gateway import get_service_name
|
||||
import signal as _signal
|
||||
|
||||
_gw_service_name = get_service_name()
|
||||
existing_pid = get_running_pid()
|
||||
has_systemd_service = False
|
||||
|
||||
try:
|
||||
check = subprocess.run(
|
||||
["systemctl", "--user", "is-active", _gw_service_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
print("✓ Gateway restarted.")
|
||||
else:
|
||||
print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
|
||||
print(" Try manually: hermes gateway restart")
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass # No systemd (macOS, WSL1, etc.) — skip silently
|
||||
has_systemd_service = check.stdout.strip() == "active"
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
if existing_pid or has_systemd_service:
|
||||
print()
|
||||
|
||||
# Kill the PID-file-tracked process (may be manual or systemd)
|
||||
if existing_pid:
|
||||
try:
|
||||
os.kill(existing_pid, _signal.SIGTERM)
|
||||
print(f"→ Stopped gateway process (PID {existing_pid})")
|
||||
except ProcessLookupError:
|
||||
pass # Already gone
|
||||
except PermissionError:
|
||||
print(f"⚠ Permission denied killing gateway PID {existing_pid}")
|
||||
remove_pid_file()
|
||||
|
||||
# Restart the systemd service (starts a fresh process)
|
||||
if has_systemd_service:
|
||||
import time as _time
|
||||
_time.sleep(1) # Brief pause for port/socket release
|
||||
print("→ Restarting gateway service...")
|
||||
restart = subprocess.run(
|
||||
["systemctl", "--user", "restart", _gw_service_name],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
print("✓ Gateway restarted.")
|
||||
else:
|
||||
print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
|
||||
print(" Try manually: hermes gateway restart")
|
||||
elif existing_pid:
|
||||
print(" ℹ️ Gateway was running manually (not as a service).")
|
||||
print(" Restart it with: hermes gateway run")
|
||||
except Exception as e:
|
||||
logger.debug("Gateway restart during update failed: %s", e)
|
||||
|
||||
print()
|
||||
print("Tip: You can now select a provider and model:")
|
||||
|
|
|
|||
|
|
@ -743,6 +743,7 @@ def setup_model_provider(config: dict):
|
|||
selected_provider = (
|
||||
None # "nous", "openai-codex", "openrouter", "custom", or None (keep)
|
||||
)
|
||||
selected_base_url = None # deferred until after model selection
|
||||
nous_models = [] # populated if Nous login succeeds
|
||||
|
||||
if provider_idx == 0: # Nous Portal (OAuth)
|
||||
|
|
@ -1025,8 +1026,8 @@ def setup_model_provider(config: dict):
|
|||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("zai", zai_base_url, default_model="glm-5")
|
||||
_set_model_provider(config, "zai", zai_base_url)
|
||||
selected_base_url = zai_base_url
|
||||
|
||||
elif provider_idx == 5: # Kimi / Moonshot
|
||||
selected_provider = "kimi-coding"
|
||||
|
|
@ -1058,8 +1059,8 @@ def setup_model_provider(config: dict):
|
|||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("kimi-coding", pconfig.inference_base_url, default_model="kimi-k2.5")
|
||||
_set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
|
||||
selected_base_url = pconfig.inference_base_url
|
||||
|
||||
elif provider_idx == 6: # MiniMax
|
||||
selected_provider = "minimax"
|
||||
|
|
@ -1091,8 +1092,8 @@ def setup_model_provider(config: dict):
|
|||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("minimax", pconfig.inference_base_url, default_model="MiniMax-M2.5")
|
||||
_set_model_provider(config, "minimax", pconfig.inference_base_url)
|
||||
selected_base_url = pconfig.inference_base_url
|
||||
|
||||
elif provider_idx == 7: # MiniMax China
|
||||
selected_provider = "minimax-cn"
|
||||
|
|
@ -1124,8 +1125,8 @@ def setup_model_provider(config: dict):
|
|||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("minimax-cn", pconfig.inference_base_url, default_model="MiniMax-M2.5")
|
||||
_set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
|
||||
selected_base_url = pconfig.inference_base_url
|
||||
|
||||
elif provider_idx == 8: # Anthropic
|
||||
selected_provider = "anthropic"
|
||||
|
|
@ -1228,8 +1229,8 @@ def setup_model_provider(config: dict):
|
|||
save_env_value("OPENAI_API_KEY", "")
|
||||
# Don't save base_url for Anthropic — resolve_runtime_provider()
|
||||
# always hardcodes it. Stale base_urls contaminate other providers.
|
||||
_update_config_for_provider("anthropic", "", default_model="claude-opus-4-6")
|
||||
_set_model_provider(config, "anthropic")
|
||||
selected_base_url = ""
|
||||
|
||||
# else: provider_idx == 9 (Keep current) — only shown when a provider already exists
|
||||
# Normalize "keep current" to an explicit provider so downstream logic
|
||||
|
|
@ -1459,6 +1460,12 @@ def setup_model_provider(config: dict):
|
|||
)
|
||||
print_success(f"Model set to: {_display}")
|
||||
|
||||
# Write provider+base_url to config.yaml only after model selection is complete.
|
||||
# This prevents a race condition where the gateway picks up a new provider
|
||||
# before the model name has been updated to match.
|
||||
if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "anthropic") and selected_base_url is not None:
|
||||
_update_config_for_provider(selected_provider, selected_base_url)
|
||||
|
||||
save_config(config)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -275,8 +275,13 @@ def show_status(args):
|
|||
print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
if sys.platform.startswith('linux'):
|
||||
try:
|
||||
from hermes_cli.gateway import get_service_name
|
||||
_gw_svc = get_service_name()
|
||||
except Exception:
|
||||
_gw_svc = "hermes-gateway"
|
||||
result = subprocess.run(
|
||||
["systemctl", "--user", "is-active", "hermes-gateway"],
|
||||
["systemctl", "--user", "is-active", _gw_svc],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -133,7 +133,13 @@ def uninstall_gateway_service():
|
|||
if platform.system() != "Linux":
|
||||
return False
|
||||
|
||||
service_file = Path.home() / ".config" / "systemd" / "user" / "hermes-gateway.service"
|
||||
try:
|
||||
from hermes_cli.gateway import get_service_name
|
||||
svc_name = get_service_name()
|
||||
except Exception:
|
||||
svc_name = "hermes-gateway"
|
||||
|
||||
service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service"
|
||||
|
||||
if not service_file.exists():
|
||||
return False
|
||||
|
|
@ -141,14 +147,14 @@ def uninstall_gateway_service():
|
|||
try:
|
||||
# Stop the service
|
||||
subprocess.run(
|
||||
["systemctl", "--user", "stop", "hermes-gateway"],
|
||||
["systemctl", "--user", "stop", svc_name],
|
||||
capture_output=True,
|
||||
check=False
|
||||
)
|
||||
|
||||
# Disable the service
|
||||
subprocess.run(
|
||||
["systemctl", "--user", "disable", "hermes-gateway"],
|
||||
["systemctl", "--user", "disable", svc_name],
|
||||
capture_output=True,
|
||||
check=False
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue