The architecture has been updated

2026-03-31 23:31:36 +03:00 · 2026-03-31 23:31:36 +03:00 · a01257ead9
commit a01257ead9
parent 805f7a017e
1119 changed files with 226 additions and 352 deletions
--- a/hermes_code/cron/init.py
+++ b/hermes_code/cron/init.py
@ -0,0 +1,42 @@
+"""
+Cron job scheduling system for Hermes Agent.
+
+This module provides scheduled task execution, allowing the agent to:
+- Run automated tasks on schedules (cron expressions, intervals, one-shot)
+- Self-schedule reminders and follow-up tasks
+- Execute tasks in isolated sessions (no prior context)
+
+Cron jobs are executed automatically by the gateway daemon:
+    hermes gateway install    # Install as a user service
+    sudo hermes gateway install --system  # Linux servers: boot-time system service
+    hermes gateway            # Or run in foreground
+
+The gateway ticks the scheduler every 60 seconds. A file lock prevents
+duplicate execution if multiple processes overlap.
+"""
+
+from cron.jobs import (
+    create_job,
+    get_job,
+    list_jobs,
+    remove_job,
+    update_job,
+    pause_job,
+    resume_job,
+    trigger_job,
+    JOBS_FILE,
+)
+from cron.scheduler import tick
+
+__all__ = [
+    "create_job",
+    "get_job", 
+    "list_jobs",
+    "remove_job",
+    "update_job",
+    "pause_job",
+    "resume_job",
+    "trigger_job",
+    "tick",
+    "JOBS_FILE",
+]
--- a/hermes_code/cron/jobs.py
+++ b/hermes_code/cron/jobs.py
@ -0,0 +1,704 @@
+"""
+Cron job storage and management.
+
+Jobs are stored in ~/.hermes/cron/jobs.json
+Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
+"""
+
+import copy
+import json
+import logging
+import tempfile
+import os
+import re
+import uuid
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List, Any
+
+logger = logging.getLogger(__name__)
+
+from hermes_time import now as _hermes_now
+
+try:
+    from croniter import croniter
+    HAS_CRONITER = True
+except ImportError:
+    HAS_CRONITER = False
+
+# =============================================================================
+# Configuration
+# =============================================================================
+
+HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+CRON_DIR = HERMES_DIR / "cron"
+JOBS_FILE = CRON_DIR / "jobs.json"
+OUTPUT_DIR = CRON_DIR / "output"
+ONESHOT_GRACE_SECONDS = 120
+
+
+def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
+    """Normalize legacy/single-skill and multi-skill inputs into a unique ordered list."""
+    if skills is None:
+        raw_items = [skill] if skill else []
+    elif isinstance(skills, str):
+        raw_items = [skills]
+    else:
+        raw_items = list(skills)
+
+    normalized: List[str] = []
+    for item in raw_items:
+        text = str(item or "").strip()
+        if text and text not in normalized:
+            normalized.append(text)
+    return normalized
+
+
+def _apply_skill_fields(job: Dict[str, Any]) -> Dict[str, Any]:
+    """Return a job dict with canonical `skills` and legacy `skill` fields aligned."""
+    normalized = dict(job)
+    skills = _normalize_skill_list(normalized.get("skill"), normalized.get("skills"))
+    normalized["skills"] = skills
+    normalized["skill"] = skills[0] if skills else None
+    return normalized
+
+
+def _secure_dir(path: Path):
+    """Set directory to owner-only access (0700). No-op on Windows."""
+    try:
+        os.chmod(path, 0o700)
+    except (OSError, NotImplementedError):
+        pass  # Windows or other platforms where chmod is not supported
+
+
+def _secure_file(path: Path):
+    """Set file to owner-only read/write (0600). No-op on Windows."""
+    try:
+        if path.exists():
+            os.chmod(path, 0o600)
+    except (OSError, NotImplementedError):
+        pass
+
+
+def ensure_dirs():
+    """Ensure cron directories exist with secure permissions."""
+    CRON_DIR.mkdir(parents=True, exist_ok=True)
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    _secure_dir(CRON_DIR)
+    _secure_dir(OUTPUT_DIR)
+
+
+# =============================================================================
+# Schedule Parsing
+# =============================================================================
+
+def parse_duration(s: str) -> int:
+    """
+    Parse duration string into minutes.
+    
+    Examples:
+        "30m" → 30
+        "2h" → 120
+        "1d" → 1440
+    """
+    s = s.strip().lower()
+    match = re.match(r'^(\d+)\s*(m|min|mins|minute|minutes|h|hr|hrs|hour|hours|d|day|days)$', s)
+    if not match:
+        raise ValueError(f"Invalid duration: '{s}'. Use format like '30m', '2h', or '1d'")
+    
+    value = int(match.group(1))
+    unit = match.group(2)[0]  # First char: m, h, or d
+    
+    multipliers = {'m': 1, 'h': 60, 'd': 1440}
+    return value * multipliers[unit]
+
+
+def parse_schedule(schedule: str) -> Dict[str, Any]:
+    """
+    Parse schedule string into structured format.
+    
+    Returns dict with:
+        - kind: "once" | "interval" | "cron"
+        - For "once": "run_at" (ISO timestamp)
+        - For "interval": "minutes" (int)
+        - For "cron": "expr" (cron expression)
+    
+    Examples:
+        "30m"              → once in 30 minutes
+        "2h"               → once in 2 hours
+        "every 30m"        → recurring every 30 minutes
+        "every 2h"         → recurring every 2 hours
+        "0 9 * * *"        → cron expression
+        "2026-02-03T14:00" → once at timestamp
+    """
+    schedule = schedule.strip()
+    original = schedule
+    schedule_lower = schedule.lower()
+    
+    # "every X" pattern → recurring interval
+    if schedule_lower.startswith("every "):
+        duration_str = schedule[6:].strip()
+        minutes = parse_duration(duration_str)
+        return {
+            "kind": "interval",
+            "minutes": minutes,
+            "display": f"every {minutes}m"
+        }
+    
+    # Check for cron expression (5 or 6 space-separated fields)
+    # Cron fields: minute hour day month weekday [year]
+    parts = schedule.split()
+    if len(parts) >= 5 and all(
+        re.match(r'^[\d\*\-,/]+$', p) for p in parts[:5]
+    ):
+        if not HAS_CRONITER:
+            raise ValueError("Cron expressions require 'croniter' package. Install with: pip install croniter")
+        # Validate cron expression
+        try:
+            croniter(schedule)
+        except Exception as e:
+            raise ValueError(f"Invalid cron expression '{schedule}': {e}")
+        return {
+            "kind": "cron",
+            "expr": schedule,
+            "display": schedule
+        }
+    
+    # ISO timestamp (contains T or looks like date)
+    if 'T' in schedule or re.match(r'^\d{4}-\d{2}-\d{2}', schedule):
+        try:
+            # Parse and validate
+            dt = datetime.fromisoformat(schedule.replace('Z', '+00:00'))
+            # Make naive timestamps timezone-aware at parse time so the stored
+            # value doesn't depend on the system timezone matching at check time.
+            if dt.tzinfo is None:
+                dt = dt.astimezone()  # Interpret as local timezone
+            return {
+                "kind": "once",
+                "run_at": dt.isoformat(),
+                "display": f"once at {dt.strftime('%Y-%m-%d %H:%M')}"
+            }
+        except ValueError as e:
+            raise ValueError(f"Invalid timestamp '{schedule}': {e}")
+    
+    # Duration like "30m", "2h", "1d" → one-shot from now
+    try:
+        minutes = parse_duration(schedule)
+        run_at = _hermes_now() + timedelta(minutes=minutes)
+        return {
+            "kind": "once",
+            "run_at": run_at.isoformat(),
+            "display": f"once in {original}"
+        }
+    except ValueError:
+        pass
+    
+    raise ValueError(
+        f"Invalid schedule '{original}'. Use:\n"
+        f"  - Duration: '30m', '2h', '1d' (one-shot)\n"
+        f"  - Interval: 'every 30m', 'every 2h' (recurring)\n"
+        f"  - Cron: '0 9 * * *' (cron expression)\n"
+        f"  - Timestamp: '2026-02-03T14:00:00' (one-shot at time)"
+    )
+
+
+def _ensure_aware(dt: datetime) -> datetime:
+    """Return a timezone-aware datetime in Hermes configured timezone.
+
+    Backward compatibility:
+    - Older stored timestamps may be naive.
+    - Naive values are interpreted as *system-local wall time* (the timezone
+      `datetime.now()` used when they were created), then converted to the
+      configured Hermes timezone.
+
+    This preserves relative ordering for legacy naive timestamps across
+    timezone changes and avoids false not-due results.
+    """
+    target_tz = _hermes_now().tzinfo
+    if dt.tzinfo is None:
+        local_tz = datetime.now().astimezone().tzinfo
+        return dt.replace(tzinfo=local_tz).astimezone(target_tz)
+    return dt.astimezone(target_tz)
+
+
+def _recoverable_oneshot_run_at(
+    schedule: Dict[str, Any],
+    now: datetime,
+    *,
+    last_run_at: Optional[str] = None,
+) -> Optional[str]:
+    """Return a one-shot run time if it is still eligible to fire.
+
+    One-shot jobs get a small grace window so jobs created a few seconds after
+    their requested minute still run on the next tick. Once a one-shot has
+    already run, it is never eligible again.
+    """
+    if schedule.get("kind") != "once":
+        return None
+    if last_run_at:
+        return None
+
+    run_at = schedule.get("run_at")
+    if not run_at:
+        return None
+
+    run_at_dt = _ensure_aware(datetime.fromisoformat(run_at))
+    if run_at_dt >= now - timedelta(seconds=ONESHOT_GRACE_SECONDS):
+        return run_at
+    return None
+
+
+def _compute_grace_seconds(schedule: dict) -> int:
+    """Compute how late a job can be and still catch up instead of fast-forwarding.
+
+    Uses half the schedule period, clamped between 120 seconds and 2 hours.
+    This ensures daily jobs can catch up if missed by up to 2 hours,
+    while frequent jobs (every 5-10 min) still fast-forward quickly.
+    """
+    MIN_GRACE = 120
+    MAX_GRACE = 7200  # 2 hours
+
+    kind = schedule.get("kind")
+
+    if kind == "interval":
+        period_seconds = schedule.get("minutes", 1) * 60
+        grace = period_seconds // 2
+        return max(MIN_GRACE, min(grace, MAX_GRACE))
+
+    if kind == "cron" and HAS_CRONITER:
+        try:
+            now = _hermes_now()
+            cron = croniter(schedule["expr"], now)
+            first = cron.get_next(datetime)
+            second = cron.get_next(datetime)
+            period_seconds = int((second - first).total_seconds())
+            grace = period_seconds // 2
+            return max(MIN_GRACE, min(grace, MAX_GRACE))
+        except Exception:
+            pass
+
+    return MIN_GRACE
+
+
+def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
+    """
+    Compute the next run time for a schedule.
+
+    Returns ISO timestamp string, or None if no more runs.
+    """
+    now = _hermes_now()
+
+    if schedule["kind"] == "once":
+        return _recoverable_oneshot_run_at(schedule, now, last_run_at=last_run_at)
+
+    elif schedule["kind"] == "interval":
+        minutes = schedule["minutes"]
+        if last_run_at:
+            # Next run is last_run + interval
+            last = _ensure_aware(datetime.fromisoformat(last_run_at))
+            next_run = last + timedelta(minutes=minutes)
+        else:
+            # First run is now + interval
+            next_run = now + timedelta(minutes=minutes)
+        return next_run.isoformat()
+
+    elif schedule["kind"] == "cron":
+        if not HAS_CRONITER:
+            return None
+        cron = croniter(schedule["expr"], now)
+        next_run = cron.get_next(datetime)
+        return next_run.isoformat()
+
+    return None
+
+
+# =============================================================================
+# Job CRUD Operations
+# =============================================================================
+
+def load_jobs() -> List[Dict[str, Any]]:
+    """Load all jobs from storage."""
+    ensure_dirs()
+    if not JOBS_FILE.exists():
+        return []
+    
+    try:
+        with open(JOBS_FILE, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            return data.get("jobs", [])
+    except (json.JSONDecodeError, IOError):
+        return []
+
+
+def save_jobs(jobs: List[Dict[str, Any]]):
+    """Save all jobs to storage."""
+    ensure_dirs()
+    fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_')
+    try:
+        with os.fdopen(fd, 'w', encoding='utf-8') as f:
+            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, JOBS_FILE)
+        _secure_file(JOBS_FILE)
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+
+
+def create_job(
+    prompt: str,
+    schedule: str,
+    name: Optional[str] = None,
+    repeat: Optional[int] = None,
+    deliver: Optional[str] = None,
+    origin: Optional[Dict[str, Any]] = None,
+    skill: Optional[str] = None,
+    skills: Optional[List[str]] = None,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    Create a new cron job.
+
+    Args:
+        prompt: The prompt to run (must be self-contained, or a task instruction when skill is set)
+        schedule: Schedule string (see parse_schedule)
+        name: Optional friendly name
+        repeat: How many times to run (None = forever, 1 = once)
+        deliver: Where to deliver output ("origin", "local", "telegram", etc.)
+        origin: Source info where job was created (for "origin" delivery)
+        skill: Optional legacy single skill name to load before running the prompt
+        skills: Optional ordered list of skills to load before running the prompt
+        model: Optional per-job model override
+        provider: Optional per-job provider override
+        base_url: Optional per-job base URL override
+
+    Returns:
+        The created job dict
+    """
+    parsed_schedule = parse_schedule(schedule)
+
+    # Normalize repeat: treat 0 or negative values as None (infinite)
+    if repeat is not None and repeat <= 0:
+        repeat = None
+
+    # Auto-set repeat=1 for one-shot schedules if not specified
+    if parsed_schedule["kind"] == "once" and repeat is None:
+        repeat = 1
+
+    # Default delivery to origin if available, otherwise local
+    if deliver is None:
+        deliver = "origin" if origin else "local"
+
+    job_id = uuid.uuid4().hex[:12]
+    now = _hermes_now().isoformat()
+
+    normalized_skills = _normalize_skill_list(skill, skills)
+    normalized_model = str(model).strip() if isinstance(model, str) else None
+    normalized_provider = str(provider).strip() if isinstance(provider, str) else None
+    normalized_base_url = str(base_url).strip().rstrip("/") if isinstance(base_url, str) else None
+    normalized_model = normalized_model or None
+    normalized_provider = normalized_provider or None
+    normalized_base_url = normalized_base_url or None
+
+    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
+    job = {
+        "id": job_id,
+        "name": name or label_source[:50].strip(),
+        "prompt": prompt,
+        "skills": normalized_skills,
+        "skill": normalized_skills[0] if normalized_skills else None,
+        "model": normalized_model,
+        "provider": normalized_provider,
+        "base_url": normalized_base_url,
+        "schedule": parsed_schedule,
+        "schedule_display": parsed_schedule.get("display", schedule),
+        "repeat": {
+            "times": repeat,  # None = forever
+            "completed": 0
+        },
+        "enabled": True,
+        "state": "scheduled",
+        "paused_at": None,
+        "paused_reason": None,
+        "created_at": now,
+        "next_run_at": compute_next_run(parsed_schedule),
+        "last_run_at": None,
+        "last_status": None,
+        "last_error": None,
+        # Delivery configuration
+        "deliver": deliver,
+        "origin": origin,  # Tracks where job was created for "origin" delivery
+    }
+
+    jobs = load_jobs()
+    jobs.append(job)
+    save_jobs(jobs)
+
+    return job
+
+
+def get_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Get a job by ID."""
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == job_id:
+            return _apply_skill_fields(job)
+    return None
+
+
+def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
+    """List all jobs, optionally including disabled ones."""
+    jobs = [_apply_skill_fields(j) for j in load_jobs()]
+    if not include_disabled:
+        jobs = [j for j in jobs if j.get("enabled", True)]
+    return jobs
+
+
+def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Update a job by ID, refreshing derived schedule fields when needed."""
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] != job_id:
+            continue
+
+        updated = _apply_skill_fields({**job, **updates})
+        schedule_changed = "schedule" in updates
+
+        if "skills" in updates or "skill" in updates:
+            normalized_skills = _normalize_skill_list(updated.get("skill"), updated.get("skills"))
+            updated["skills"] = normalized_skills
+            updated["skill"] = normalized_skills[0] if normalized_skills else None
+
+        if schedule_changed:
+            updated_schedule = updated["schedule"]
+            updated["schedule_display"] = updates.get(
+                "schedule_display",
+                updated_schedule.get("display", updated.get("schedule_display")),
+            )
+            if updated.get("state") != "paused":
+                updated["next_run_at"] = compute_next_run(updated_schedule)
+
+        if updated.get("enabled", True) and updated.get("state") != "paused" and not updated.get("next_run_at"):
+            updated["next_run_at"] = compute_next_run(updated["schedule"])
+
+        jobs[i] = updated
+        save_jobs(jobs)
+        return _apply_skill_fields(jobs[i])
+    return None
+
+
+def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]:
+    """Pause a job without deleting it."""
+    return update_job(
+        job_id,
+        {
+            "enabled": False,
+            "state": "paused",
+            "paused_at": _hermes_now().isoformat(),
+            "paused_reason": reason,
+        },
+    )
+
+
+def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Resume a paused job and compute the next future run from now."""
+    job = get_job(job_id)
+    if not job:
+        return None
+
+    next_run_at = compute_next_run(job["schedule"])
+    return update_job(
+        job_id,
+        {
+            "enabled": True,
+            "state": "scheduled",
+            "paused_at": None,
+            "paused_reason": None,
+            "next_run_at": next_run_at,
+        },
+    )
+
+
+def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Schedule a job to run on the next scheduler tick."""
+    job = get_job(job_id)
+    if not job:
+        return None
+    return update_job(
+        job_id,
+        {
+            "enabled": True,
+            "state": "scheduled",
+            "paused_at": None,
+            "paused_reason": None,
+            "next_run_at": _hermes_now().isoformat(),
+        },
+    )
+
+
+def remove_job(job_id: str) -> bool:
+    """Remove a job by ID."""
+    jobs = load_jobs()
+    original_len = len(jobs)
+    jobs = [j for j in jobs if j["id"] != job_id]
+    if len(jobs) < original_len:
+        save_jobs(jobs)
+        return True
+    return False
+
+
+def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
+    """
+    Mark a job as having been run.
+    
+    Updates last_run_at, last_status, increments completed count,
+    computes next_run_at, and auto-deletes if repeat limit reached.
+    """
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            now = _hermes_now().isoformat()
+            job["last_run_at"] = now
+            job["last_status"] = "ok" if success else "error"
+            job["last_error"] = error if not success else None
+            
+            # Increment completed count
+            if job.get("repeat"):
+                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                
+                # Check if we've hit the repeat limit
+                times = job["repeat"].get("times")
+                completed = job["repeat"]["completed"]
+                if times is not None and times > 0 and completed >= times:
+                    # Remove the job (limit reached)
+                    jobs.pop(i)
+                    save_jobs(jobs)
+                    return
+            
+            # Compute next run
+            job["next_run_at"] = compute_next_run(job["schedule"], now)
+
+            # If no next run (one-shot completed), disable
+            if job["next_run_at"] is None:
+                job["enabled"] = False
+                job["state"] = "completed"
+            elif job.get("state") != "paused":
+                job["state"] = "scheduled"
+
+            save_jobs(jobs)
+            return
+    
+    save_jobs(jobs)
+
+
+def get_due_jobs() -> List[Dict[str, Any]]:
+    """Get all jobs that are due to run now.
+
+    For recurring jobs (cron/interval), if the scheduled time is stale
+    (more than one period in the past, e.g. because the gateway was down),
+    the job is fast-forwarded to the next future run instead of firing
+    immediately.  This prevents a burst of missed jobs on gateway restart.
+    """
+    now = _hermes_now()
+    raw_jobs = load_jobs()
+    jobs = [_apply_skill_fields(j) for j in copy.deepcopy(raw_jobs)]
+    due = []
+    needs_save = False
+
+    for job in jobs:
+        if not job.get("enabled", True):
+            continue
+
+        next_run = job.get("next_run_at")
+        if not next_run:
+            recovered_next = _recoverable_oneshot_run_at(
+                job.get("schedule", {}),
+                now,
+                last_run_at=job.get("last_run_at"),
+            )
+            if not recovered_next:
+                continue
+
+            job["next_run_at"] = recovered_next
+            next_run = recovered_next
+            logger.info(
+                "Job '%s' had no next_run_at; recovering one-shot run at %s",
+                job.get("name", job["id"]),
+                recovered_next,
+            )
+            for rj in raw_jobs:
+                if rj["id"] == job["id"]:
+                    rj["next_run_at"] = recovered_next
+                    needs_save = True
+                    break
+
+        next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
+        if next_run_dt <= now:
+            schedule = job.get("schedule", {})
+            kind = schedule.get("kind")
+
+            # For recurring jobs, check if the scheduled time is stale
+            # (gateway was down and missed the window). Fast-forward to
+            # the next future occurrence instead of firing a stale run.
+            grace = _compute_grace_seconds(schedule)
+            if kind in ("cron", "interval") and (now - next_run_dt).total_seconds() > grace:
+                # Job is past its catch-up grace window — this is a stale missed run.
+                # Grace scales with schedule period: daily=2h, hourly=30m, 10min=5m.
+                new_next = compute_next_run(schedule, now.isoformat())
+                if new_next:
+                    logger.info(
+                        "Job '%s' missed its scheduled time (%s, grace=%ds). "
+                        "Fast-forwarding to next run: %s",
+                        job.get("name", job["id"]),
+                        next_run,
+                        grace,
+                        new_next,
+                    )
+                    # Update the job in storage
+                    for rj in raw_jobs:
+                        if rj["id"] == job["id"]:
+                            rj["next_run_at"] = new_next
+                            needs_save = True
+                            break
+                    continue  # Skip this run
+
+            due.append(job)
+
+    if needs_save:
+        save_jobs(raw_jobs)
+
+    return due
+
+
+def save_job_output(job_id: str, output: str):
+    """Save job output to file."""
+    ensure_dirs()
+    job_output_dir = OUTPUT_DIR / job_id
+    job_output_dir.mkdir(parents=True, exist_ok=True)
+    _secure_dir(job_output_dir)
+    
+    timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
+    output_file = job_output_dir / f"{timestamp}.md"
+    
+    fd, tmp_path = tempfile.mkstemp(dir=str(job_output_dir), suffix='.tmp', prefix='.output_')
+    try:
+        with os.fdopen(fd, 'w', encoding='utf-8') as f:
+            f.write(output)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, output_file)
+        _secure_file(output_file)
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+    
+    return output_file
--- a/hermes_code/cron/scheduler.py
+++ b/hermes_code/cron/scheduler.py
@ -0,0 +1,568 @@
+"""
+Cron job scheduler - executes due jobs.
+
+Provides tick() which checks for due jobs and runs them. The gateway
+calls this every 60 seconds from a background thread.
+
+Uses a file-based lock (~/.hermes/cron/.tick.lock) so only one tick
+runs at a time if multiple processes overlap.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import sys
+import traceback
+
+# fcntl is Unix-only; on Windows use msvcrt for file locking
+try:
+    import fcntl
+except ImportError:
+    fcntl = None
+    try:
+        import msvcrt
+    except ImportError:
+        msvcrt = None
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from hermes_time import now as _hermes_now
+
+logger = logging.getLogger(__name__)
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from cron.jobs import get_due_jobs, mark_job_run, save_job_output
+
+# Sentinel: when a cron agent has nothing new to report, it can start its
+# response with this marker to suppress delivery.  Output is still saved
+# locally for audit.
+SILENT_MARKER = "[SILENT]"
+
+# Resolve Hermes home directory (respects HERMES_HOME override)
+_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+
+# File-based lock prevents concurrent ticks from gateway + daemon + systemd timer
+_LOCK_DIR = _hermes_home / "cron"
+_LOCK_FILE = _LOCK_DIR / ".tick.lock"
+
+
+def _resolve_origin(job: dict) -> Optional[dict]:
+    """Extract origin info from a job, preserving any extra routing metadata."""
+    origin = job.get("origin")
+    if not origin:
+        return None
+    platform = origin.get("platform")
+    chat_id = origin.get("chat_id")
+    if platform and chat_id:
+        return origin
+    return None
+
+
+def _resolve_delivery_target(job: dict) -> Optional[dict]:
+    """Resolve the concrete auto-delivery target for a cron job, if any."""
+    deliver = job.get("deliver", "local")
+    origin = _resolve_origin(job)
+
+    if deliver == "local":
+        return None
+
+    if deliver == "origin":
+        if not origin:
+            return None
+        return {
+            "platform": origin["platform"],
+            "chat_id": str(origin["chat_id"]),
+            "thread_id": origin.get("thread_id"),
+        }
+
+    if ":" in deliver:
+        platform_name, rest = deliver.split(":", 1)
+        # Check for thread_id suffix (e.g. "telegram:-1003724596514:17")
+        if ":" in rest:
+            chat_id, thread_id = rest.split(":", 1)
+        else:
+            chat_id, thread_id = rest, None
+        return {
+            "platform": platform_name,
+            "chat_id": chat_id,
+            "thread_id": thread_id,
+        }
+
+    platform_name = deliver
+    if origin and origin.get("platform") == platform_name:
+        return {
+            "platform": platform_name,
+            "chat_id": str(origin["chat_id"]),
+            "thread_id": origin.get("thread_id"),
+        }
+
+    chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
+    if not chat_id:
+        return None
+
+    return {
+        "platform": platform_name,
+        "chat_id": chat_id,
+        "thread_id": None,
+    }
+
+
+def _deliver_result(job: dict, content: str) -> None:
+    """
+    Deliver job output to the configured target (origin chat, specific platform, etc.).
+
+    Uses the standalone platform send functions from send_message_tool so delivery
+    works whether or not the gateway is running.
+    """
+    target = _resolve_delivery_target(job)
+    if not target:
+        if job.get("deliver", "local") != "local":
+            logger.warning(
+                "Job '%s' deliver=%s but no concrete delivery target could be resolved",
+                job["id"],
+                job.get("deliver", "local"),
+            )
+        return
+
+    platform_name = target["platform"]
+    chat_id = target["chat_id"]
+    thread_id = target.get("thread_id")
+
+    from tools.send_message_tool import _send_to_platform
+    from gateway.config import load_gateway_config, Platform
+
+    platform_map = {
+        "telegram": Platform.TELEGRAM,
+        "discord": Platform.DISCORD,
+        "slack": Platform.SLACK,
+        "whatsapp": Platform.WHATSAPP,
+        "signal": Platform.SIGNAL,
+        "matrix": Platform.MATRIX,
+        "mattermost": Platform.MATTERMOST,
+        "homeassistant": Platform.HOMEASSISTANT,
+        "dingtalk": Platform.DINGTALK,
+        "email": Platform.EMAIL,
+        "sms": Platform.SMS,
+    }
+    platform = platform_map.get(platform_name.lower())
+    if not platform:
+        logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
+        return
+
+    try:
+        config = load_gateway_config()
+    except Exception as e:
+        logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
+        return
+
+    pconfig = config.platforms.get(platform)
+    if not pconfig or not pconfig.enabled:
+        logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
+        return
+
+    # Wrap the content so the user knows this is a cron delivery and that
+    # the interactive agent has no visibility into it.
+    task_name = job.get("name", job["id"])
+    wrapped = (
+        f"Cronjob Response: {task_name}\n"
+        f"-------------\n\n"
+        f"{content}\n\n"
+        f"Note: The agent cannot see this message, and therefore cannot respond to it."
+    )
+
+    # Run the async send in a fresh event loop (safe from any thread)
+    coro = _send_to_platform(platform, pconfig, chat_id, wrapped, thread_id=thread_id)
+    try:
+        result = asyncio.run(coro)
+    except RuntimeError:
+        # asyncio.run() checks for a running loop before awaiting the coroutine;
+        # when it raises, the original coro was never started — close it to
+        # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
+        # fresh thread that has no running loop.
+        coro.close()
+        import concurrent.futures
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, wrapped, thread_id=thread_id))
+            result = future.result(timeout=30)
+    except Exception as e:
+        logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
+        return
+
+    if result and result.get("error"):
+        logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
+    else:
+        logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
+
+
+def _build_job_prompt(job: dict) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+    prompt = job.get("prompt", "")
+    skills = job.get("skills")
+
+    # Always prepend [SILENT] guidance so the cron agent can suppress
+    # delivery when it has nothing new or noteworthy to report.
+    silent_hint = (
+        "[SYSTEM: If you have nothing new or noteworthy to report, respond "
+        "with exactly \"[SILENT]\" (optionally followed by a brief internal "
+        "note). This suppresses delivery to the user while still saving "
+        "output locally. Only use [SILENT] when there are genuinely no "
+        "changes worth reporting.]\n\n"
+    )
+    prompt = silent_hint + prompt
+    if skills is None:
+        legacy = job.get("skill")
+        skills = [legacy] if legacy else []
+
+    skill_names = [str(name).strip() for name in skills if str(name).strip()]
+    if not skill_names:
+        return prompt
+
+    from tools.skills_tool import skill_view
+
+    parts = []
+    skipped: list[str] = []
+    for skill_name in skill_names:
+        loaded = json.loads(skill_view(skill_name))
+        if not loaded.get("success"):
+            error = loaded.get("error") or f"Failed to load skill '{skill_name}'"
+            logger.warning("Cron job '%s': skill not found, skipping — %s", job.get("name", job.get("id")), error)
+            skipped.append(skill_name)
+            continue
+
+        content = str(loaded.get("content") or "").strip()
+        if parts:
+            parts.append("")
+        parts.extend(
+            [
+                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                "",
+                content,
+            ]
+        )
+
+    if skipped:
+        notice = (
+            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
+            f"and were skipped: {', '.join(skipped)}. "
+            f"Start your response with a brief notice so the user is aware, e.g.: "
+            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
+        )
+        parts.insert(0, notice)
+
+    if prompt:
+        parts.extend(["", f"The user has provided the following instruction alongside the skill invocation: {prompt}"])
+    return "\n".join(parts)
+
+
+def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
+    """
+    Execute a single cron job.
+    
+    Returns:
+        Tuple of (success, full_output_doc, final_response, error_message)
+    """
+    from run_agent import AIAgent
+    
+    # Initialize SQLite session store so cron job messages are persisted
+    # and discoverable via session_search (same pattern as gateway/run.py).
+    _session_db = None
+    try:
+        from hermes_state import SessionDB
+        _session_db = SessionDB()
+    except Exception as e:
+        logger.debug("Job '%s': SQLite session store not available: %s", job.get("id", "?"), e)
+    
+    job_id = job["id"]
+    job_name = job["name"]
+    prompt = _build_job_prompt(job)
+    origin = _resolve_origin(job)
+
+    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
+    logger.info("Prompt: %s", prompt[:100])
+
+    # Inject origin context so the agent's send_message tool knows the chat
+    if origin:
+        os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
+        os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
+        if origin.get("chat_name"):
+            os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
+
+    try:
+        # Re-read .env and config.yaml fresh every run so provider/key
+        # changes take effect without a gateway restart.
+        from dotenv import load_dotenv
+        try:
+            load_dotenv(str(_hermes_home / ".env"), override=True, encoding="utf-8")
+        except UnicodeDecodeError:
+            load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
+
+        delivery_target = _resolve_delivery_target(job)
+        if delivery_target:
+            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
+            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            if delivery_target.get("thread_id") is not None:
+                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+
+        model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
+
+        # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
+        _cfg = {}
+        try:
+            import yaml
+            _cfg_path = str(_hermes_home / "config.yaml")
+            if os.path.exists(_cfg_path):
+                with open(_cfg_path) as _f:
+                    _cfg = yaml.safe_load(_f) or {}
+                _model_cfg = _cfg.get("model", {})
+                if not job.get("model"):
+                    if isinstance(_model_cfg, str):
+                        model = _model_cfg
+                    elif isinstance(_model_cfg, dict):
+                        model = _model_cfg.get("default", model)
+        except Exception as e:
+            logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
+
+        # Reasoning config from env or config.yaml
+        reasoning_config = None
+        effort = os.getenv("HERMES_REASONING_EFFORT", "")
+        if not effort:
+            effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
+        if effort and effort.lower() != "none":
+            valid = ("xhigh", "high", "medium", "low", "minimal")
+            if effort.lower() in valid:
+                reasoning_config = {"enabled": True, "effort": effort.lower()}
+        elif effort.lower() == "none":
+            reasoning_config = {"enabled": False}
+
+        # Prefill messages from env or config.yaml
+        prefill_messages = None
+        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
+        if prefill_file:
+            import json as _json
+            pfpath = Path(prefill_file).expanduser()
+            if not pfpath.is_absolute():
+                pfpath = _hermes_home / pfpath
+            if pfpath.exists():
+                try:
+                    with open(pfpath, "r", encoding="utf-8") as _pf:
+                        prefill_messages = _json.load(_pf)
+                    if not isinstance(prefill_messages, list):
+                        prefill_messages = None
+                except Exception as e:
+                    logger.warning("Job '%s': failed to parse prefill messages file '%s': %s", job_id, pfpath, e)
+                    prefill_messages = None
+
+        # Max iterations
+        max_iterations = _cfg.get("agent", {}).get("max_turns") or _cfg.get("max_turns") or 90
+
+        # Provider routing
+        pr = _cfg.get("provider_routing", {})
+        smart_routing = _cfg.get("smart_model_routing", {}) or {}
+
+        from hermes_cli.runtime_provider import (
+            resolve_runtime_provider,
+            format_runtime_provider_error,
+        )
+        try:
+            runtime_kwargs = {
+                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
+            }
+            if job.get("base_url"):
+                runtime_kwargs["explicit_base_url"] = job.get("base_url")
+            runtime = resolve_runtime_provider(**runtime_kwargs)
+        except Exception as exc:
+            message = format_runtime_provider_error(exc)
+            raise RuntimeError(message) from exc
+
+        from agent.smart_model_routing import resolve_turn_route
+        turn_route = resolve_turn_route(
+            prompt,
+            smart_routing,
+            {
+                "model": model,
+                "api_key": runtime.get("api_key"),
+                "base_url": runtime.get("base_url"),
+                "provider": runtime.get("provider"),
+                "api_mode": runtime.get("api_mode"),
+                "command": runtime.get("command"),
+                "args": list(runtime.get("args") or []),
+            },
+        )
+
+        agent = AIAgent(
+            model=turn_route["model"],
+            api_key=turn_route["runtime"].get("api_key"),
+            base_url=turn_route["runtime"].get("base_url"),
+            provider=turn_route["runtime"].get("provider"),
+            api_mode=turn_route["runtime"].get("api_mode"),
+            acp_command=turn_route["runtime"].get("command"),
+            acp_args=turn_route["runtime"].get("args"),
+            max_iterations=max_iterations,
+            reasoning_config=reasoning_config,
+            prefill_messages=prefill_messages,
+            providers_allowed=pr.get("only"),
+            providers_ignored=pr.get("ignore"),
+            providers_order=pr.get("order"),
+            provider_sort=pr.get("sort"),
+            disabled_toolsets=["cronjob", "messaging", "clarify"],
+            quiet_mode=True,
+            platform="cron",
+            session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
+            session_db=_session_db,
+        )
+        
+        result = agent.run_conversation(prompt)
+        
+        final_response = result.get("final_response", "") or ""
+        # Use a separate variable for log display; keep final_response clean
+        # for delivery logic (empty response = no delivery).
+        logged_response = final_response if final_response else "(No response generated)"
+        
+        output = f"""# Cron Job: {job_name}
+
+**Job ID:** {job_id}
+**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
+**Schedule:** {job.get('schedule_display', 'N/A')}
+
+## Prompt
+
+{prompt}
+
+## Response
+
+{logged_response}
+"""
+        
+        logger.info("Job '%s' completed successfully", job_name)
+        return True, output, final_response, None
+        
+    except Exception as e:
+        error_msg = f"{type(e).__name__}: {str(e)}"
+        logger.error("Job '%s' failed: %s", job_name, error_msg)
+        
+        output = f"""# Cron Job: {job_name} (FAILED)
+
+**Job ID:** {job_id}
+**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}
+**Schedule:** {job.get('schedule_display', 'N/A')}
+
+## Prompt
+
+{prompt}
+
+## Error
+
+```
+{error_msg}
+
+{traceback.format_exc()}
+```
+"""
+        return False, output, "", error_msg
+
+    finally:
+        # Clean up injected env vars so they don't leak to other jobs
+        for key in (
+            "HERMES_SESSION_PLATFORM",
+            "HERMES_SESSION_CHAT_ID",
+            "HERMES_SESSION_CHAT_NAME",
+            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+        ):
+            os.environ.pop(key, None)
+        if _session_db:
+            try:
+                _session_db.close()
+            except Exception as e:
+                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
+
+
+def tick(verbose: bool = True) -> int:
+    """
+    Check and run all due jobs.
+    
+    Uses a file lock so only one tick runs at a time, even if the gateway's
+    in-process ticker and a standalone daemon or manual tick overlap.
+    
+    Args:
+        verbose: Whether to print status messages
+    
+    Returns:
+        Number of jobs executed (0 if another tick is already running)
+    """
+    _LOCK_DIR.mkdir(parents=True, exist_ok=True)
+
+    # Cross-platform file locking: fcntl on Unix, msvcrt on Windows
+    lock_fd = None
+    try:
+        lock_fd = open(_LOCK_FILE, "w")
+        if fcntl:
+            fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        elif msvcrt:
+            msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
+    except (OSError, IOError):
+        logger.debug("Tick skipped — another instance holds the lock")
+        if lock_fd is not None:
+            lock_fd.close()
+        return 0
+
+    try:
+        due_jobs = get_due_jobs()
+
+        if verbose and not due_jobs:
+            logger.info("%s - No jobs due", _hermes_now().strftime('%H:%M:%S'))
+            return 0
+
+        if verbose:
+            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
+
+        executed = 0
+        for job in due_jobs:
+            try:
+                success, output, final_response, error = run_job(job)
+
+                output_file = save_job_output(job["id"], output)
+                if verbose:
+                    logger.info("Output saved to: %s", output_file)
+
+                # Deliver the final response to the origin/target chat.
+                # If the agent responded with [SILENT], skip delivery (but
+                # output is already saved above).  Failed jobs always deliver.
+                deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
+                should_deliver = bool(deliver_content)
+                if should_deliver and success and deliver_content.strip().upper().startswith(SILENT_MARKER):
+                    logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
+                    should_deliver = False
+
+                if should_deliver:
+                    try:
+                        _deliver_result(job, deliver_content)
+                    except Exception as de:
+                        logger.error("Delivery failed for job %s: %s", job["id"], de)
+
+                mark_job_run(job["id"], success, error)
+                executed += 1
+
+            except Exception as e:
+                logger.error("Error processing job %s: %s", job['id'], e)
+                mark_job_run(job["id"], False, str(e))
+
+        return executed
+    finally:
+        if fcntl:
+            fcntl.flock(lock_fd, fcntl.LOCK_UN)
+        elif msvcrt:
+            try:
+                msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
+            except (OSError, IOError):
+                pass
+        lock_fd.close()
+
+
+if __name__ == "__main__":
+    tick(verbose=True)