The architecture has been updated

This commit is contained in:
Skyber_2 2026-03-31 23:31:36 +03:00
parent 805f7a017e
commit a01257ead9
1119 changed files with 226 additions and 352 deletions

View file

@ -0,0 +1,42 @@
"""
Cron job scheduling system for Hermes Agent.
This module provides scheduled task execution, allowing the agent to:
- Run automated tasks on schedules (cron expressions, intervals, one-shot)
- Self-schedule reminders and follow-up tasks
- Execute tasks in isolated sessions (no prior context)
Cron jobs are executed automatically by the gateway daemon:
hermes gateway install # Install as a user service
sudo hermes gateway install --system # Linux servers: boot-time system service
hermes gateway # Or run in foreground
The gateway ticks the scheduler every 60 seconds. A file lock prevents
duplicate execution if multiple processes overlap.
"""
from cron.jobs import (
create_job,
get_job,
list_jobs,
remove_job,
update_job,
pause_job,
resume_job,
trigger_job,
JOBS_FILE,
)
from cron.scheduler import tick
__all__ = [
"create_job",
"get_job",
"list_jobs",
"remove_job",
"update_job",
"pause_job",
"resume_job",
"trigger_job",
"tick",
"JOBS_FILE",
]

704
hermes_code/cron/jobs.py Normal file
View file

@ -0,0 +1,704 @@
"""
Cron job storage and management.
Jobs are stored in ~/.hermes/cron/jobs.json
Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
"""
import copy
import json
import logging
import tempfile
import os
import re
import uuid
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, Dict, List, Any
logger = logging.getLogger(__name__)
from hermes_time import now as _hermes_now
try:
from croniter import croniter
HAS_CRONITER = True
except ImportError:
HAS_CRONITER = False
# =============================================================================
# Configuration
# =============================================================================
HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
"""Normalize legacy/single-skill and multi-skill inputs into a unique ordered list."""
if skills is None:
raw_items = [skill] if skill else []
elif isinstance(skills, str):
raw_items = [skills]
else:
raw_items = list(skills)
normalized: List[str] = []
for item in raw_items:
text = str(item or "").strip()
if text and text not in normalized:
normalized.append(text)
return normalized
def _apply_skill_fields(job: Dict[str, Any]) -> Dict[str, Any]:
"""Return a job dict with canonical `skills` and legacy `skill` fields aligned."""
normalized = dict(job)
skills = _normalize_skill_list(normalized.get("skill"), normalized.get("skills"))
normalized["skills"] = skills
normalized["skill"] = skills[0] if skills else None
return normalized
def _secure_dir(path: Path):
"""Set directory to owner-only access (0700). No-op on Windows."""
try:
os.chmod(path, 0o700)
except (OSError, NotImplementedError):
pass # Windows or other platforms where chmod is not supported
def _secure_file(path: Path):
"""Set file to owner-only read/write (0600). No-op on Windows."""
try:
if path.exists():
os.chmod(path, 0o600)
except (OSError, NotImplementedError):
pass
def ensure_dirs():
"""Ensure cron directories exist with secure permissions."""
CRON_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
_secure_dir(CRON_DIR)
_secure_dir(OUTPUT_DIR)
# =============================================================================
# Schedule Parsing
# =============================================================================
def parse_duration(s: str) -> int:
"""
Parse duration string into minutes.
Examples:
"30m" 30
"2h" 120
"1d" 1440
"""
s = s.strip().lower()
match = re.match(r'^(\d+)\s*(m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$', s)
if not match:
raise ValueError(f"Invalid duration: '{s}'. Use format like '30m', '2h', or '1d'")
value = int(match.group(1))
unit = match.group(2)[0] # First char: m, h, or d
multipliers = {'m': 1, 'h': 60, 'd': 1440}
return value * multipliers[unit]
def parse_schedule(schedule: str) -> Dict[str, Any]:
"""
Parse schedule string into structured format.
Returns dict with:
- kind: "once" | "interval" | "cron"
- For "once": "run_at" (ISO timestamp)
- For "interval": "minutes" (int)
- For "cron": "expr" (cron expression)
Examples:
"30m" once in 30 minutes
"2h" once in 2 hours
"every 30m" recurring every 30 minutes
"every 2h" recurring every 2 hours
"0 9 * * *" cron expression
"2026-02-03T14:00" once at timestamp
"""
schedule = schedule.strip()
original = schedule
schedule_lower = schedule.lower()
# "every X" pattern → recurring interval
if schedule_lower.startswith("every "):
duration_str = schedule[6:].strip()
minutes = parse_duration(duration_str)
return {
"kind": "interval",
"minutes": minutes,
"display": f"every {minutes}m"
}
# Check for cron expression (5 or 6 space-separated fields)
# Cron fields: minute hour day month weekday [year]
parts = schedule.split()
if len(parts) >= 5 and all(
re.match(r'^[\d\*\-,/]+$', p) for p in parts[:5]
):
if not HAS_CRONITER:
raise ValueError("Cron expressions require 'croniter' package. Install with: pip install croniter")
# Validate cron expression
try:
croniter(schedule)
except Exception as e:
raise ValueError(f"Invalid cron expression '{schedule}': {e}")
return {
"kind": "cron",
"expr": schedule,
"display": schedule
}
# ISO timestamp (contains T or looks like date)
if 'T' in schedule or re.match(r'^\d{4}-\d{2}-\d{2}', schedule):
try:
# Parse and validate
dt = datetime.fromisoformat(schedule.replace('Z', '+00:00'))
# Make naive timestamps timezone-aware at parse time so the stored
# value doesn't depend on the system timezone matching at check time.
if dt.tzinfo is None:
dt = dt.astimezone() # Interpret as local timezone
return {
"kind": "once",
"run_at": dt.isoformat(),
"display": f"once at {dt.strftime('%Y-%m-%d %H:%M')}"
}
except ValueError as e:
raise ValueError(f"Invalid timestamp '{schedule}': {e}")
# Duration like "30m", "2h", "1d" → one-shot from now
try:
minutes = parse_duration(schedule)
run_at = _hermes_now() + timedelta(minutes=minutes)
return {
"kind": "once",
"run_at": run_at.isoformat(),
"display": f"once in {original}"
}
except ValueError:
pass
raise ValueError(
f"Invalid schedule '{original}'. Use:\n"
f" - Duration: '30m', '2h', '1d' (one-shot)\n"
f" - Interval: 'every 30m', 'every 2h' (recurring)\n"
f" - Cron: '0 9 * * *' (cron expression)\n"
f" - Timestamp: '2026-02-03T14:00:00' (one-shot at time)"
)
def _ensure_aware(dt: datetime) -> datetime:
"""Return a timezone-aware datetime in Hermes configured timezone.
Backward compatibility:
- Older stored timestamps may be naive.
- Naive values are interpreted as *system-local wall time* (the timezone
`datetime.now()` used when they were created), then converted to the
configured Hermes timezone.
This preserves relative ordering for legacy naive timestamps across
timezone changes and avoids false not-due results.
"""
target_tz = _hermes_now().tzinfo
if dt.tzinfo is None:
local_tz = datetime.now().astimezone().tzinfo
return dt.replace(tzinfo=local_tz).astimezone(target_tz)
return dt.astimezone(target_tz)
def _recoverable_oneshot_run_at(
schedule: Dict[str, Any],
now: datetime,
*,
last_run_at: Optional[str] = None,
) -> Optional[str]:
"""Return a one-shot run time if it is still eligible to fire.
One-shot jobs get a small grace window so jobs created a few seconds after
their requested minute still run on the next tick. Once a one-shot has
already run, it is never eligible again.
"""
if schedule.get("kind") != "once":
return None
if last_run_at:
return None
run_at = schedule.get("run_at")
if not run_at:
return None
run_at_dt = _ensure_aware(datetime.fromisoformat(run_at))
if run_at_dt >= now - timedelta(seconds=ONESHOT_GRACE_SECONDS):
return run_at
return None
def _compute_grace_seconds(schedule: dict) -> int:
"""Compute how late a job can be and still catch up instead of fast-forwarding.
Uses half the schedule period, clamped between 120 seconds and 2 hours.
This ensures daily jobs can catch up if missed by up to 2 hours,
while frequent jobs (every 5-10 min) still fast-forward quickly.
"""
MIN_GRACE = 120
MAX_GRACE = 7200 # 2 hours
kind = schedule.get("kind")
if kind == "interval":
period_seconds = schedule.get("minutes", 1) * 60
grace = period_seconds // 2
return max(MIN_GRACE, min(grace, MAX_GRACE))
if kind == "cron" and HAS_CRONITER:
try:
now = _hermes_now()
cron = croniter(schedule["expr"], now)
first = cron.get_next(datetime)
second = cron.get_next(datetime)
period_seconds = int((second - first).total_seconds())
grace = period_seconds // 2
return max(MIN_GRACE, min(grace, MAX_GRACE))
except Exception:
pass
return MIN_GRACE
def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
"""
Compute the next run time for a schedule.
Returns ISO timestamp string, or None if no more runs.
"""
now = _hermes_now()
if schedule["kind"] == "once":
return _recoverable_oneshot_run_at(schedule, now, last_run_at=last_run_at)
elif schedule["kind"] == "interval":
minutes = schedule["minutes"]
if last_run_at:
# Next run is last_run + interval
last = _ensure_aware(datetime.fromisoformat(last_run_at))
next_run = last + timedelta(minutes=minutes)
else:
# First run is now + interval
next_run = now + timedelta(minutes=minutes)
return next_run.isoformat()
elif schedule["kind"] == "cron":
if not HAS_CRONITER:
return None
cron = croniter(schedule["expr"], now)
next_run = cron.get_next(datetime)
return next_run.isoformat()
return None
# =============================================================================
# Job CRUD Operations
# =============================================================================
def load_jobs() -> List[Dict[str, Any]]:
"""Load all jobs from storage."""
ensure_dirs()
if not JOBS_FILE.exists():
return []
try:
with open(JOBS_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
return data.get("jobs", [])
except (json.JSONDecodeError, IOError):
return []
def save_jobs(jobs: List[Dict[str, Any]]):
"""Save all jobs to storage."""
ensure_dirs()
fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_')
try:
with os.fdopen(fd, 'w', encoding='utf-8') as f:
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, JOBS_FILE)
_secure_file(JOBS_FILE)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def create_job(
prompt: str,
schedule: str,
name: Optional[str] = None,
repeat: Optional[int] = None,
deliver: Optional[str] = None,
origin: Optional[Dict[str, Any]] = None,
skill: Optional[str] = None,
skills: Optional[List[str]] = None,
model: Optional[str] = None,
provider: Optional[str] = None,
base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
Args:
prompt: The prompt to run (must be self-contained, or a task instruction when skill is set)
schedule: Schedule string (see parse_schedule)
name: Optional friendly name
repeat: How many times to run (None = forever, 1 = once)
deliver: Where to deliver output ("origin", "local", "telegram", etc.)
origin: Source info where job was created (for "origin" delivery)
skill: Optional legacy single skill name to load before running the prompt
skills: Optional ordered list of skills to load before running the prompt
model: Optional per-job model override
provider: Optional per-job provider override
base_url: Optional per-job base URL override
Returns:
The created job dict
"""
parsed_schedule = parse_schedule(schedule)
# Normalize repeat: treat 0 or negative values as None (infinite)
if repeat is not None and repeat <= 0:
repeat = None
# Auto-set repeat=1 for one-shot schedules if not specified
if parsed_schedule["kind"] == "once" and repeat is None:
repeat = 1
# Default delivery to origin if available, otherwise local
if deliver is None:
deliver = "origin" if origin else "local"
job_id = uuid.uuid4().hex[:12]
now = _hermes_now().isoformat()
normalized_skills = _normalize_skill_list(skill, skills)
normalized_model = str(model).strip() if isinstance(model, str) else None
normalized_provider = str(provider).strip() if isinstance(provider, str) else None
normalized_base_url = str(base_url).strip().rstrip("/") if isinstance(base_url, str) else None
normalized_model = normalized_model or None
normalized_provider = normalized_provider or None
normalized_base_url = normalized_base_url or None
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
job = {
"id": job_id,
"name": name or label_source[:50].strip(),
"prompt": prompt,
"skills": normalized_skills,
"skill": normalized_skills[0] if normalized_skills else None,
"model": normalized_model,
"provider": normalized_provider,
"base_url": normalized_base_url,
"schedule": parsed_schedule,
"schedule_display": parsed_schedule.get("display", schedule),
"repeat": {
"times": repeat, # None = forever
"completed": 0
},
"enabled": True,
"state": "scheduled",
"paused_at": None,
"paused_reason": None,
"created_at": now,
"next_run_at": compute_next_run(parsed_schedule),
"last_run_at": None,
"last_status": None,
"last_error": None,
# Delivery configuration
"deliver": deliver,
"origin": origin, # Tracks where job was created for "origin" delivery
}
jobs = load_jobs()
jobs.append(job)
save_jobs(jobs)
return job
def get_job(job_id: str) -> Optional[Dict[str, Any]]:
"""Get a job by ID."""
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
return _apply_skill_fields(job)
return None
def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
"""List all jobs, optionally including disabled ones."""
jobs = [_apply_skill_fields(j) for j in load_jobs()]
if not include_disabled:
jobs = [j for j in jobs if j.get("enabled", True)]
return jobs
def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Update a job by ID, refreshing derived schedule fields when needed."""
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] != job_id:
continue
updated = _apply_skill_fields({**job, **updates})
schedule_changed = "schedule" in updates
if "skills" in updates or "skill" in updates:
normalized_skills = _normalize_skill_list(updated.get("skill"), updated.get("skills"))
updated["skills"] = normalized_skills
updated["skill"] = normalized_skills[0] if normalized_skills else None
if schedule_changed:
updated_schedule = updated["schedule"]
updated["schedule_display"] = updates.get(
"schedule_display",
updated_schedule.get("display", updated.get("schedule_display")),
)
if updated.get("state") != "paused":
updated["next_run_at"] = compute_next_run(updated_schedule)
if updated.get("enabled", True) and updated.get("state") != "paused" and not updated.get("next_run_at"):
updated["next_run_at"] = compute_next_run(updated["schedule"])
jobs[i] = updated
save_jobs(jobs)
return _apply_skill_fields(jobs[i])
return None
def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Pause a job without deleting it."""
return update_job(
job_id,
{
"enabled": False,
"state": "paused",
"paused_at": _hermes_now().isoformat(),
"paused_reason": reason,
},
)
def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
"""Resume a paused job and compute the next future run from now."""
job = get_job(job_id)
if not job:
return None
next_run_at = compute_next_run(job["schedule"])
return update_job(
job_id,
{
"enabled": True,
"state": "scheduled",
"paused_at": None,
"paused_reason": None,
"next_run_at": next_run_at,
},
)
def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
"""Schedule a job to run on the next scheduler tick."""
job = get_job(job_id)
if not job:
return None
return update_job(
job_id,
{
"enabled": True,
"state": "scheduled",
"paused_at": None,
"paused_reason": None,
"next_run_at": _hermes_now().isoformat(),
},
)
def remove_job(job_id: str) -> bool:
"""Remove a job by ID."""
jobs = load_jobs()
original_len = len(jobs)
jobs = [j for j in jobs if j["id"] != job_id]
if len(jobs) < original_len:
save_jobs(jobs)
return True
return False
def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
"""
Mark a job as having been run.
Updates last_run_at, last_status, increments completed count,
computes next_run_at, and auto-deletes if repeat limit reached.
"""
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
save_jobs(jobs)
return
save_jobs(jobs)
def get_due_jobs() -> List[Dict[str, Any]]:
"""Get all jobs that are due to run now.
For recurring jobs (cron/interval), if the scheduled time is stale
(more than one period in the past, e.g. because the gateway was down),
the job is fast-forwarded to the next future run instead of firing
immediately. This prevents a burst of missed jobs on gateway restart.
"""
now = _hermes_now()
raw_jobs = load_jobs()
jobs = [_apply_skill_fields(j) for j in copy.deepcopy(raw_jobs)]
due = []
needs_save = False
for job in jobs:
if not job.get("enabled", True):
continue
next_run = job.get("next_run_at")
if not next_run:
recovered_next = _recoverable_oneshot_run_at(
job.get("schedule", {}),
now,
last_run_at=job.get("last_run_at"),
)
if not recovered_next:
continue
job["next_run_at"] = recovered_next
next_run = recovered_next
logger.info(
"Job '%s' had no next_run_at; recovering one-shot run at %s",
job.get("name", job["id"]),
recovered_next,
)
for rj in raw_jobs:
if rj["id"] == job["id"]:
rj["next_run_at"] = recovered_next
needs_save = True
break
next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
if next_run_dt <= now:
schedule = job.get("schedule", {})
kind = schedule.get("kind")
# For recurring jobs, check if the scheduled time is stale
# (gateway was down and missed the window). Fast-forward to
# the next future occurrence instead of firing a stale run.
grace = _compute_grace_seconds(schedule)
if kind in ("cron", "interval") and (now - next_run_dt).total_seconds() > grace:
# Job is past its catch-up grace window — this is a stale missed run.
# Grace scales with schedule period: daily=2h, hourly=30m, 10min=5m.
new_next = compute_next_run(schedule, now.isoformat())
if new_next:
logger.info(
"Job '%s' missed its scheduled time (%s, grace=%ds). "
"Fast-forwarding to next run: %s",
job.get("name", job["id"]),
next_run,
grace,
new_next,
)
# Update the job in storage
for rj in raw_jobs:
if rj["id"] == job["id"]:
rj["next_run_at"] = new_next
needs_save = True
break
continue # Skip this run
due.append(job)
if needs_save:
save_jobs(raw_jobs)
return due
def save_job_output(job_id: str, output: str):
"""Save job output to file."""
ensure_dirs()
job_output_dir = OUTPUT_DIR / job_id
job_output_dir.mkdir(parents=True, exist_ok=True)
_secure_dir(job_output_dir)
timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
output_file = job_output_dir / f"{timestamp}.md"
fd, tmp_path = tempfile.mkstemp(dir=str(job_output_dir), suffix='.tmp', prefix='.output_')
try:
with os.fdopen(fd, 'w', encoding='utf-8') as f:
f.write(output)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, output_file)
_secure_file(output_file)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
return output_file

View file

@ -0,0 +1,568 @@
"""
Cron job scheduler - executes due jobs.
Provides tick() which checks for due jobs and runs them. The gateway
calls this every 60 seconds from a background thread.
Uses a file-based lock (~/.hermes/cron/.tick.lock) so only one tick
runs at a time if multiple processes overlap.
"""
import asyncio
import json
import logging
import os
import sys
import traceback
# fcntl is Unix-only; on Windows use msvcrt for file locking
try:
import fcntl
except ImportError:
fcntl = None
try:
import msvcrt
except ImportError:
msvcrt = None
from datetime import datetime
from pathlib import Path
from typing import Optional
from hermes_time import now as _hermes_now
logger = logging.getLogger(__name__)
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from cron.jobs import get_due_jobs, mark_job_run, save_job_output
# Sentinel: when a cron agent has nothing new to report, it can start its
# response with this marker to suppress delivery. Output is still saved
# locally for audit.
SILENT_MARKER = "[SILENT]"
# Resolve Hermes home directory (respects HERMES_HOME override)
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
_LOCK_DIR = _hermes_home / "cron"
_LOCK_FILE = _LOCK_DIR / ".tick.lock"
def _resolve_origin(job: dict) -> Optional[dict]:
"""Extract origin info from a job, preserving any extra routing metadata."""
origin = job.get("origin")
if not origin:
return None
platform = origin.get("platform")
chat_id = origin.get("chat_id")
if platform and chat_id:
return origin
return None
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver == "local":
return None
if deliver == "origin":
if not origin:
return None
return {
"platform": origin["platform"],
"chat_id": str(origin["chat_id"]),
"thread_id": origin.get("thread_id"),
}
if ":" in deliver:
platform_name, rest = deliver.split(":", 1)
# Check for thread_id suffix (e.g. "telegram:-1003724596514:17")
if ":" in rest:
chat_id, thread_id = rest.split(":", 1)
else:
chat_id, thread_id = rest, None
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": thread_id,
}
platform_name = deliver
if origin and origin.get("platform") == platform_name:
return {
"platform": platform_name,
"chat_id": str(origin["chat_id"]),
"thread_id": origin.get("thread_id"),
}
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
return None
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": None,
}
def _deliver_result(job: dict, content: str) -> None:
"""
Deliver job output to the configured target (origin chat, specific platform, etc.).
Uses the standalone platform send functions from send_message_tool so delivery
works whether or not the gateway is running.
"""
target = _resolve_delivery_target(job)
if not target:
if job.get("deliver", "local") != "local":
logger.warning(
"Job '%s' deliver=%s but no concrete delivery target could be resolved",
job["id"],
job.get("deliver", "local"),
)
return
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
platform_map = {
"telegram": Platform.TELEGRAM,
"discord": Platform.DISCORD,
"slack": Platform.SLACK,
"whatsapp": Platform.WHATSAPP,
"signal": Platform.SIGNAL,
"matrix": Platform.MATRIX,
"mattermost": Platform.MATTERMOST,
"homeassistant": Platform.HOMEASSISTANT,
"dingtalk": Platform.DINGTALK,
"email": Platform.EMAIL,
"sms": Platform.SMS,
}
platform = platform_map.get(platform_name.lower())
if not platform:
logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
return
try:
config = load_gateway_config()
except Exception as e:
logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
return
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
return
# Wrap the content so the user knows this is a cron delivery and that
# the interactive agent has no visibility into it.
task_name = job.get("name", job["id"])
wrapped = (
f"Cronjob Response: {task_name}\n"
f"-------------\n\n"
f"{content}\n\n"
f"Note: The agent cannot see this message, and therefore cannot respond to it."
)
# Run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, wrapped, thread_id=thread_id)
try:
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, wrapped, thread_id=thread_id))
result = future.result(timeout=30)
except Exception as e:
logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
return
if result and result.get("error"):
logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
else:
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
def _build_job_prompt(job: dict) -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first."""
prompt = job.get("prompt", "")
skills = job.get("skills")
# Always prepend [SILENT] guidance so the cron agent can suppress
# delivery when it has nothing new or noteworthy to report.
silent_hint = (
"[SYSTEM: If you have nothing new or noteworthy to report, respond "
"with exactly \"[SILENT]\" (optionally followed by a brief internal "
"note). This suppresses delivery to the user while still saving "
"output locally. Only use [SILENT] when there are genuinely no "
"changes worth reporting.]\n\n"
)
prompt = silent_hint + prompt
if skills is None:
legacy = job.get("skill")
skills = [legacy] if legacy else []
skill_names = [str(name).strip() for name in skills if str(name).strip()]
if not skill_names:
return prompt
from tools.skills_tool import skill_view
parts = []
skipped: list[str] = []
for skill_name in skill_names:
loaded = json.loads(skill_view(skill_name))
if not loaded.get("success"):
error = loaded.get("error") or f"Failed to load skill '{skill_name}'"
logger.warning("Cron job '%s': skill not found, skipping — %s", job.get("name", job.get("id")), error)
skipped.append(skill_name)
continue
content = str(loaded.get("content") or "").strip()
if parts:
parts.append("")
parts.extend(
[
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content,
]
)
if skipped:
notice = (
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
f"and were skipped: {', '.join(skipped)}. "
f"Start your response with a brief notice so the user is aware, e.g.: "
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
)
parts.insert(0, notice)
if prompt:
parts.extend(["", f"The user has provided the following instruction alongside the skill invocation: {prompt}"])
return "\n".join(parts)
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""
Execute a single cron job.
Returns:
Tuple of (success, full_output_doc, final_response, error_message)
"""
from run_agent import AIAgent
# Initialize SQLite session store so cron job messages are persisted
# and discoverable via session_search (same pattern as gateway/run.py).
_session_db = None
try:
from hermes_state import SessionDB
_session_db = SessionDB()
except Exception as e:
logger.debug("Job '%s': SQLite session store not available: %s", job.get("id", "?"), e)
job_id = job["id"]
job_name = job["name"]
prompt = _build_job_prompt(job)
origin = _resolve_origin(job)
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
# Inject origin context so the agent's send_message tool knows the chat
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
try:
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
try:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
delivery_target = _resolve_delivery_target(job)
if delivery_target:
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
_cfg = {}
try:
import yaml
_cfg_path = str(_hermes_home / "config.yaml")
if os.path.exists(_cfg_path):
with open(_cfg_path) as _f:
_cfg = yaml.safe_load(_f) or {}
_model_cfg = _cfg.get("model", {})
if not job.get("model"):
if isinstance(_model_cfg, str):
model = _model_cfg
elif isinstance(_model_cfg, dict):
model = _model_cfg.get("default", model)
except Exception as e:
logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
# Reasoning config from env or config.yaml
reasoning_config = None
effort = os.getenv("HERMES_REASONING_EFFORT", "")
if not effort:
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
if effort and effort.lower() != "none":
valid = ("xhigh", "high", "medium", "low", "minimal")
if effort.lower() in valid:
reasoning_config = {"enabled": True, "effort": effort.lower()}
elif effort.lower() == "none":
reasoning_config = {"enabled": False}
# Prefill messages from env or config.yaml
prefill_messages = None
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
if prefill_file:
import json as _json
pfpath = Path(prefill_file).expanduser()
if not pfpath.is_absolute():
pfpath = _hermes_home / pfpath
if pfpath.exists():
try:
with open(pfpath, "r", encoding="utf-8") as _pf:
prefill_messages = _json.load(_pf)
if not isinstance(prefill_messages, list):
prefill_messages = None
except Exception as e:
logger.warning("Job '%s': failed to parse prefill messages file '%s': %s", job_id, pfpath, e)
prefill_messages = None
# Max iterations
max_iterations = _cfg.get("agent", {}).get("max_turns") or _cfg.get("max_turns") or 90
# Provider routing
pr = _cfg.get("provider_routing", {})
smart_routing = _cfg.get("smart_model_routing", {}) or {}
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
try:
runtime_kwargs = {
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
}
if job.get("base_url"):
runtime_kwargs["explicit_base_url"] = job.get("base_url")
runtime = resolve_runtime_provider(**runtime_kwargs)
except Exception as exc:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
from agent.smart_model_routing import resolve_turn_route
turn_route = resolve_turn_route(
prompt,
smart_routing,
{
"model": model,
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
},
)
agent = AIAgent(
model=turn_route["model"],
api_key=turn_route["runtime"].get("api_key"),
base_url=turn_route["runtime"].get("base_url"),
provider=turn_route["runtime"].get("provider"),
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_iterations=max_iterations,
reasoning_config=reasoning_config,
prefill_messages=prefill_messages,
providers_allowed=pr.get("only"),
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
disabled_toolsets=["cronjob", "messaging", "clarify"],
quiet_mode=True,
platform="cron",
session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
session_db=_session_db,
)
result = agent.run_conversation(prompt)
final_response = result.get("final_response", "") or ""
# Use a separate variable for log display; keep final_response clean
# for delivery logic (empty response = no delivery).
logged_response = final_response if final_response else "(No response generated)"
output = f"""# Cron Job: {job_name}
**Job ID:** {job_id}
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
**Schedule:** {job.get('schedule_display', 'N/A')}
## Prompt
{prompt}
## Response
{logged_response}
"""
logger.info("Job '%s' completed successfully", job_name)
return True, output, final_response, None
except Exception as e:
error_msg = f"{type(e).__name__}: {str(e)}"
logger.error("Job '%s' failed: %s", job_name, error_msg)
output = f"""# Cron Job: {job_name} (FAILED)
**Job ID:** {job_id}
**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
**Schedule:** {job.get('schedule_display', 'N/A')}
## Prompt
{prompt}
## Error
```
{error_msg}
{traceback.format_exc()}
```
"""
return False, output, "", error_msg
finally:
# Clean up injected env vars so they don't leak to other jobs
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:
_session_db.close()
except Exception as e:
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
def tick(verbose: bool = True) -> int:
"""
Check and run all due jobs.
Uses a file lock so only one tick runs at a time, even if the gateway's
in-process ticker and a standalone daemon or manual tick overlap.
Args:
verbose: Whether to print status messages
Returns:
Number of jobs executed (0 if another tick is already running)
"""
_LOCK_DIR.mkdir(parents=True, exist_ok=True)
# Cross-platform file locking: fcntl on Unix, msvcrt on Windows
lock_fd = None
try:
lock_fd = open(_LOCK_FILE, "w")
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
elif msvcrt:
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
except (OSError, IOError):
logger.debug("Tick skipped — another instance holds the lock")
if lock_fd is not None:
lock_fd.close()
return 0
try:
due_jobs = get_due_jobs()
if verbose and not due_jobs:
logger.info("%s - No jobs due", _hermes_now().strftime('%H:%M:%S'))
return 0
if verbose:
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
executed = 0
for job in due_jobs:
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
if verbose:
logger.info("Output saved to: %s", output_file)
# Deliver the final response to the origin/target chat.
# If the agent responded with [SILENT], skip delivery (but
# output is already saved above). Failed jobs always deliver.
deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
should_deliver = bool(deliver_content)
if should_deliver and success and deliver_content.strip().upper().startswith(SILENT_MARKER):
logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
should_deliver = False
if should_deliver:
try:
_deliver_result(job, deliver_content)
except Exception as de:
logger.error("Delivery failed for job %s: %s", job["id"], de)
mark_job_run(job["id"], success, error)
executed += 1
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
elif msvcrt:
try:
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
except (OSError, IOError):
pass
lock_fd.close()
if __name__ == "__main__":
tick(verbose=True)