From 7b140b31e679cfd4e9cdf419814a4e344ed66c01 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Mar 2026 19:07:50 -0700 Subject: [PATCH 1/2] fix: suppress duplicate cron sends to auto-delivery targets Allow cron runs to keep using send_message for additional destinations, but skip same-target sends when the scheduler will already auto-deliver the final response there. Add prompt/tool guidance, docs, and regression coverage for origin/home-channel resolution and thread-aware comparisons. --- agent/prompt_builder.py | 7 ++ cron/scheduler.py | 98 ++++++++++++++------ tests/agent/test_prompt_builder.py | 1 + tests/cron/test_scheduler.py | 52 ++++++++++- tests/tools/test_send_message_tool.py | 113 +++++++++++++++++++++++ tools/cronjob_tools.py | 5 +- tools/send_message_tool.py | 49 ++++++++++ website/docs/user-guide/features/cron.md | 2 +- 8 files changed, 295 insertions(+), 32 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index f1dbcf75..06d63632 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -141,6 +141,13 @@ PLATFORM_HINTS = { "is preserved for threading. Do not include greetings or sign-offs unless " "contextually appropriate." ), + "cron": ( + "You are running as a scheduled cron job. Your final response is automatically " + "delivered to the job's configured destination, so do not use send_message to " + "send to that same target again. If you want the user to receive something in " + "the scheduled destination, put it directly in your final response. Use " + "send_message only for additional or different targets." + ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " "renderable inside a terminal." diff --git a/cron/scheduler.py b/cron/scheduler.py index 12d355cd..4f85677d 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -56,6 +56,50 @@ def _resolve_origin(job: dict) -> Optional[dict]: return None +def _resolve_delivery_target(job: dict) -> Optional[dict]: + """Resolve the concrete auto-delivery target for a cron job, if any.""" + deliver = job.get("deliver", "local") + origin = _resolve_origin(job) + + if deliver == "local": + return None + + if deliver == "origin": + if not origin: + return None + return { + "platform": origin["platform"], + "chat_id": str(origin["chat_id"]), + "thread_id": origin.get("thread_id"), + } + + if ":" in deliver: + platform_name, chat_id = deliver.split(":", 1) + return { + "platform": platform_name, + "chat_id": chat_id, + "thread_id": None, + } + + platform_name = deliver + if origin and origin.get("platform") == platform_name: + return { + "platform": platform_name, + "chat_id": str(origin["chat_id"]), + "thread_id": origin.get("thread_id"), + } + + chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") + if not chat_id: + return None + + return { + "platform": platform_name, + "chat_id": chat_id, + "thread_id": None, + } + + def _deliver_result(job: dict, content: str) -> None: """ Deliver job output to the configured target (origin chat, specific platform, etc.). @@ -63,36 +107,19 @@ def _deliver_result(job: dict, content: str) -> None: Uses the standalone platform send functions from send_message_tool so delivery works whether or not the gateway is running. """ - deliver = job.get("deliver", "local") - origin = _resolve_origin(job) - - if deliver == "local": + target = _resolve_delivery_target(job) + if not target: + if job.get("deliver", "local") != "local": + logger.warning( + "Job '%s' deliver=%s but no concrete delivery target could be resolved", + job["id"], + job.get("deliver", "local"), + ) return - thread_id = None - - # Resolve target platform + chat_id - if deliver == "origin": - if not origin: - logger.warning("Job '%s' deliver=origin but no origin stored, skipping delivery", job["id"]) - return - platform_name = origin["platform"] - chat_id = origin["chat_id"] - thread_id = origin.get("thread_id") - elif ":" in deliver: - platform_name, chat_id = deliver.split(":", 1) - else: - # Bare platform name like "telegram" — need to resolve to origin or home channel - platform_name = deliver - if origin and origin.get("platform") == platform_name: - chat_id = origin["chat_id"] - thread_id = origin.get("thread_id") - else: - # Fall back to home channel - chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") - if not chat_id: - logger.warning("Job '%s' deliver=%s but no chat_id or home channel. Set via: hermes config set %s_HOME_CHANNEL ", job["id"], deliver, platform_name.upper()) - return + platform_name = target["platform"] + chat_id = target["chat_id"] + thread_id = target.get("thread_id") from tools.send_message_tool import _send_to_platform from gateway.config import load_gateway_config, Platform @@ -169,6 +196,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: job_name = job["name"] prompt = job["prompt"] origin = _resolve_origin(job) + delivery_target = _resolve_delivery_target(job) logger.info("Running job '%s' (ID: %s)", job_name, job_id) logger.info("Prompt: %s", prompt[:100]) @@ -179,6 +207,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"]) if origin.get("chat_name"): os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"] + if delivery_target: + os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"] + os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"]) + if delivery_target.get("thread_id") is not None: + os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"]) try: # Re-read .env and config.yaml fresh every run so provider/key @@ -324,7 +357,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: finally: # Clean up injected env vars so they don't leak to other jobs - for key in ("HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"): + for key in ( + "HERMES_SESSION_PLATFORM", + "HERMES_SESSION_CHAT_ID", + "HERMES_SESSION_CHAT_NAME", + "HERMES_CRON_AUTO_DELIVER_PLATFORM", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID", + "HERMES_CRON_AUTO_DELIVER_THREAD_ID", + ): os.environ.pop(key, None) if _session_db: try: diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index b5c10bee..cfcc4017 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -455,6 +455,7 @@ class TestPromptBuilderConstants: assert "whatsapp" in PLATFORM_HINTS assert "telegram" in PLATFORM_HINTS assert "discord" in PLATFORM_HINTS + assert "cron" in PLATFORM_HINTS assert "cli" in PLATFORM_HINTS diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 4314b5ac..6af83f1e 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -6,7 +6,7 @@ from unittest.mock import patch, MagicMock import pytest -from cron.scheduler import _resolve_origin, _deliver_result, run_job +from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job class TestResolveOrigin: @@ -44,6 +44,56 @@ class TestResolveOrigin: assert _resolve_origin(job) is None +class TestResolveDeliveryTarget: + def test_origin_delivery_preserves_thread_id(self): + job = { + "deliver": "origin", + "origin": { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + } + + def test_bare_platform_uses_matching_origin_chat(self): + job = { + "deliver": "telegram", + "origin": { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + } + + def test_bare_platform_falls_back_to_home_channel(self, monkeypatch): + monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "-2002") + job = { + "deliver": "telegram", + "origin": { + "platform": "discord", + "chat_id": "abc", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-2002", + "thread_id": None, + } + + class TestDeliverResultMirrorLogging: """Verify that mirror_to_session failures are logged, not silently swallowed.""" diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index 3ad44f0f..d5599894 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -2,6 +2,7 @@ import asyncio import json +import os import sys from pathlib import Path from types import SimpleNamespace @@ -29,6 +30,118 @@ def _install_telegram_mock(monkeypatch, bot): class TestSendMessageTool: + def test_cron_duplicate_target_is_skipped_and_explained(self): + home = SimpleNamespace(chat_id="-1001") + config, _telegram_cfg = _make_config() + config.get_home_channel = lambda _platform: home + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result["skipped"] is True + assert result["reason"] == "cron_auto_delivery_duplicate_target" + assert "final response" in result["note"] + send_mock.assert_not_awaited() + mirror_mock.assert_not_called() + + def test_cron_different_target_still_sends(self): + config, telegram_cfg = _make_config() + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram:-1002", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result.get("skipped") is not True + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1002", + "hello", + thread_id=None, + media_files=[], + ) + mirror_mock.assert_called_once_with("telegram", "-1002", "hello", source_label="cli", thread_id=None) + + def test_cron_same_chat_different_thread_still_sends(self): + config, telegram_cfg = _make_config() + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + "HERMES_CRON_AUTO_DELIVER_THREAD_ID": "17585", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram:-1001:99999", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result.get("skipped") is not True + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1001", + "hello", + thread_id="99999", + media_files=[], + ) + mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="99999") + def test_sends_to_explicit_telegram_topic_target(self): config, telegram_cfg = _make_config() diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index bdfa58d6..bad2e22a 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -194,7 +194,10 @@ DELIVERY OPTIONS (where output goes): - "telegram:123456": Send to specific chat (if user provides ID) NOTE: The agent's final response is auto-delivered to the target — do NOT use -send_message in the prompt. Just have the agent compose its response normally. +send_message in the prompt for that same destination. Same-target send_message +calls are skipped so the cron doesn't double-message the user. Put the main +user-facing content in the final response, and use send_message only for +additional or different targets. Use for: reminders, periodic checks, scheduled reports, automated maintenance.""", "parameters": { diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 537f6335..6a7260fd 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -153,6 +153,10 @@ def _handle_send(args): f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL " }) + duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id) + if duplicate_skip: + return json.dumps(duplicate_skip) + try: from model_tools import _run_async result = _run_async( @@ -213,6 +217,51 @@ def _describe_media_for_mirror(media_files): return f"[Sent {len(media_files)} media attachments]" +def _get_cron_auto_delivery_target(): + """Return the cron scheduler's auto-delivery target for the current run, if any.""" + platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower() + chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip() + if not platform or not chat_id: + return None + thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None + return { + "platform": platform, + "chat_id": chat_id, + "thread_id": thread_id, + } + + +def _maybe_skip_cron_duplicate_send(platform_name: str, chat_id: str, thread_id: str | None): + """Skip redundant cron send_message calls when the scheduler will auto-deliver there.""" + auto_target = _get_cron_auto_delivery_target() + if not auto_target: + return None + + same_target = ( + auto_target["platform"] == platform_name + and str(auto_target["chat_id"]) == str(chat_id) + and auto_target.get("thread_id") == thread_id + ) + if not same_target: + return None + + target_label = f"{platform_name}:{chat_id}" + if thread_id is not None: + target_label += f":{thread_id}" + + return { + "success": True, + "skipped": True, + "reason": "cron_auto_delivery_duplicate_target", + "target": target_label, + "note": ( + f"Skipped send_message to {target_label}. This cron job will already auto-deliver " + "its final response to that same target. Put the intended user-facing content in " + "your final response instead, or use a different target if you want an additional message." + ), + } + + async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None): """Route a message to the appropriate platform sender.""" from gateway.config import Platform diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index b044eb0d..03bf4bfb 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -79,7 +79,7 @@ When scheduling jobs, you specify where the output goes: **How platform names work:** When you specify a bare platform name like `"telegram"`, Hermes first checks if the job's origin matches that platform and uses the origin chat ID. Otherwise, it falls back to the platform's home channel configured via environment variable (e.g., `TELEGRAM_HOME_CHANNEL`). -The agent's final response is automatically delivered — you do **not** need to include `send_message` in the cron prompt. +The agent's final response is automatically delivered — you do **not** need to include `send_message` in the cron prompt for that same destination. If a cron run calls `send_message` to the exact target the scheduler will already deliver to, Hermes skips that duplicate send and tells the model to put the user-facing content in the final response instead. Use `send_message` only for additional or different targets. The agent knows your connected platforms and home channels — it'll choose sensible defaults. From ea053e8afd8daa73acd3b55fa55b1364c00c3392 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Mar 2026 19:22:47 -0700 Subject: [PATCH 2/2] docs: add provider contribution guide --- .../docs/developer-guide/adding-providers.md | 424 ++++++++++++++++++ website/docs/developer-guide/architecture.md | 13 +- website/docs/developer-guide/contributing.md | 6 + .../docs/developer-guide/provider-runtime.md | 2 + website/sidebars.ts | 1 + 5 files changed, 440 insertions(+), 6 deletions(-) create mode 100644 website/docs/developer-guide/adding-providers.md diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md new file mode 100644 index 00000000..7b4695dc --- /dev/null +++ b/website/docs/developer-guide/adding-providers.md @@ -0,0 +1,424 @@ +--- +sidebar_position: 5 +title: "Adding Providers" +description: "How to add a new inference provider to Hermes Agent — auth, runtime resolution, CLI flows, adapters, tests, and docs" +--- + +# Adding Providers + +Hermes can already talk to any OpenAI-compatible endpoint through the custom provider path. Do not add a built-in provider unless you want first-class UX for that service: + +- provider-specific auth or token refresh +- a curated model catalog +- setup / `hermes model` menu entries +- provider aliases for `provider:model` syntax +- a non-OpenAI API shape that needs an adapter + +If the provider is just "another OpenAI-compatible base URL and API key", a named custom provider may be enough. + +## The mental model + +A built-in provider has to line up across a few layers: + +1. `hermes_cli/auth.py` decides how credentials are found. +2. `hermes_cli/runtime_provider.py` turns that into runtime data: + - `provider` + - `api_mode` + - `base_url` + - `api_key` + - `source` +3. `run_agent.py` uses `api_mode` to decide how requests are built and sent. +4. `hermes_cli/models.py`, `hermes_cli/main.py`, and `hermes_cli/setup.py` make the provider show up in the CLI. +5. `agent/auxiliary_client.py` and `agent/model_metadata.py` keep side tasks and token budgeting working. + +The important abstraction is `api_mode`. + +- Most providers use `chat_completions`. +- Codex uses `codex_responses`. +- Anthropic uses `anthropic_messages`. +- A new non-OpenAI protocol usually means adding a new adapter and a new `api_mode` branch. + +## Choose the implementation path first + +### Path A — OpenAI-compatible provider + +Use this when the provider accepts standard chat-completions style requests. + +Typical work: + +- add auth metadata +- add model catalog / aliases +- add runtime resolution +- add CLI menu wiring +- add aux-model defaults +- add tests and user docs + +You usually do not need a new adapter or a new `api_mode`. + +### Path B — Native provider + +Use this when the provider does not behave like OpenAI chat completions. + +Examples in-tree today: + +- `codex_responses` +- `anthropic_messages` + +This path includes everything from Path A plus: + +- a provider adapter in `agent/` +- `run_agent.py` branches for request building, dispatch, usage extraction, interrupt handling, and response normalization +- adapter tests + +## File checklist + +### Required for every built-in provider + +1. `hermes_cli/auth.py` +2. `hermes_cli/models.py` +3. `hermes_cli/runtime_provider.py` +4. `hermes_cli/main.py` +5. `hermes_cli/setup.py` +6. `agent/auxiliary_client.py` +7. `agent/model_metadata.py` +8. tests +9. user-facing docs under `website/docs/` + +### Additional for native / non-OpenAI providers + +10. `agent/_adapter.py` +11. `run_agent.py` +12. `pyproject.toml` if a provider SDK is required + +## Step 1: Pick one canonical provider id + +Choose a single provider id and use it everywhere. + +Examples from the repo: + +- `openai-codex` +- `kimi-coding` +- `minimax-cn` + +That same id should appear in: + +- `PROVIDER_REGISTRY` in `hermes_cli/auth.py` +- `_PROVIDER_LABELS` in `hermes_cli/models.py` +- `_PROVIDER_ALIASES` in both `hermes_cli/auth.py` and `hermes_cli/models.py` +- CLI `--provider` choices in `hermes_cli/main.py` +- setup / model selection branches +- auxiliary-model defaults +- tests + +If the id differs between those files, the provider will feel half-wired: auth may work while `/model`, setup, or runtime resolution silently misses it. + +## Step 2: Add auth metadata in `hermes_cli/auth.py` + +For API-key providers, add a `ProviderConfig` entry to `PROVIDER_REGISTRY` with: + +- `id` +- `name` +- `auth_type="api_key"` +- `inference_base_url` +- `api_key_env_vars` +- optional `base_url_env_var` + +Also add aliases to `_PROVIDER_ALIASES`. + +Use the existing providers as templates: + +- simple API-key path: Z.AI, MiniMax +- API-key path with endpoint detection: Kimi, Z.AI +- native token resolution: Anthropic +- OAuth / auth-store path: Nous, OpenAI Codex + +Questions to answer here: + +- What env vars should Hermes check, and in what priority order? +- Does the provider need base-URL overrides? +- Does it need endpoint probing or token refresh? +- What should the auth error say when credentials are missing? + +If the provider needs something more than "look up an API key", add a dedicated credential resolver instead of shoving logic into unrelated branches. + +## Step 3: Add model catalog and aliases in `hermes_cli/models.py` + +Update the provider catalog so the provider works in menus and in `provider:model` syntax. + +Typical edits: + +- `_PROVIDER_MODELS` +- `_PROVIDER_LABELS` +- `_PROVIDER_ALIASES` +- provider display order inside `list_available_providers()` +- `provider_model_ids()` if the provider supports a live `/models` fetch + +If the provider exposes a live model list, prefer that first and keep `_PROVIDER_MODELS` as the static fallback. + +This file is also what makes inputs like these work: + +```text +anthropic:claude-sonnet-4-6 +kimi:model-name +``` + +If aliases are missing here, the provider may authenticate correctly but still fail in `/model` parsing. + +## Step 4: Resolve runtime data in `hermes_cli/runtime_provider.py` + +`resolve_runtime_provider()` is the shared path used by CLI, gateway, cron, ACP, and helper clients. + +Add a branch that returns a dict with at least: + +```python +{ + "provider": "your-provider", + "api_mode": "chat_completions", # or your native mode + "base_url": "https://...", + "api_key": "...", + "source": "env|portal|auth-store|explicit", + "requested_provider": requested_provider, +} +``` + +If the provider is OpenAI-compatible, `api_mode` should usually stay `chat_completions`. + +Be careful with API-key precedence. Hermes already contains logic to avoid leaking an OpenRouter key to unrelated endpoints. A new provider should be equally explicit about which key goes to which base URL. + +## Step 5: Wire the CLI in `hermes_cli/main.py` and `hermes_cli/setup.py` + +A provider is not discoverable until it shows up in the interactive flows. + +Update: + +### `hermes_cli/main.py` + +- `provider_labels` +- provider dispatch inside the `model` command +- `--provider` argument choices +- login/logout choices if the provider supports those flows +- a `_model_flow_()` function, or reuse `_model_flow_api_key_provider()` if it fits + +### `hermes_cli/setup.py` + +- `provider_choices` +- auth branch for the provider +- model-selection branch +- any provider-specific explanatory text +- any place where a provider should be excluded from OpenRouter-only prompts or routing settings + +If you only update one of these files, `hermes model` and `hermes setup` will drift. + +## Step 6: Keep auxiliary calls working + +Two files matter here: + +### `agent/auxiliary_client.py` + +Add a cheap / fast default aux model to `_API_KEY_PROVIDER_AUX_MODELS` if this is a direct API-key provider. + +Auxiliary tasks include things like: + +- vision summarization +- web extraction summarization +- context compression summaries +- session-search summaries +- memory flushes + +If the provider has no sensible aux default, side tasks may fall back badly or use an expensive main model unexpectedly. + +### `agent/model_metadata.py` + +Add context lengths for the provider's models so token budgeting, compression thresholds, and limits stay sane. + +## Step 7: If the provider is native, add an adapter and `run_agent.py` support + +If the provider is not plain chat completions, isolate the provider-specific logic in `agent/_adapter.py`. + +Keep `run_agent.py` focused on orchestration. It should call adapter helpers, not hand-build provider payloads inline all over the file. + +A native provider usually needs work in these places: + +### New adapter file + +Typical responsibilities: + +- build the SDK / HTTP client +- resolve tokens +- convert OpenAI-style conversation messages to the provider's request format +- convert tool schemas if needed +- normalize provider responses back into what `run_agent.py` expects +- extract usage and finish-reason data + +### `run_agent.py` + +Search for `api_mode` and audit every switch point. At minimum, verify: + +- `__init__` chooses the new `api_mode` +- client construction works for the provider +- `_build_api_kwargs()` knows how to format requests +- `_api_call_with_interrupt()` dispatches to the right client call +- interrupt / client rebuild paths work +- response validation accepts the provider's shape +- finish-reason extraction is correct +- token-usage extraction is correct +- fallback-model activation can switch into the new provider cleanly +- summary-generation and memory-flush paths still work + +Also search `run_agent.py` for `self.client.`. Any code path that assumes the standard OpenAI client exists can break when a native provider uses a different client object or `self.client = None`. + +### Prompt caching and provider-specific request fields + +Prompt caching and provider-specific knobs are easy to regress. + +Examples already in-tree: + +- Anthropic has a native prompt-caching path +- OpenRouter gets provider-routing fields +- not every provider should receive every request-side option + +When you add a native provider, double-check that Hermes is only sending fields that provider actually understands. + +## Step 8: Tests + +At minimum, touch the tests that guard provider wiring. + +Common places: + +- `tests/test_runtime_provider_resolution.py` +- `tests/test_cli_provider_resolution.py` +- `tests/test_cli_model_command.py` +- `tests/test_setup_model_selection.py` +- `tests/test_provider_parity.py` +- `tests/test_run_agent.py` +- `tests/test__adapter.py` for a native provider + +For docs-only examples, the exact file set may differ. The point is to cover: + +- auth resolution +- CLI menu / provider selection +- runtime provider resolution +- agent execution path +- provider:model parsing +- any adapter-specific message conversion + +Run tests with xdist disabled: + +```bash +source .venv/bin/activate +python -m pytest tests/test_runtime_provider_resolution.py tests/test_cli_provider_resolution.py tests/test_cli_model_command.py tests/test_setup_model_selection.py -n0 -q +``` + +For deeper changes, run the full suite before pushing: + +```bash +source .venv/bin/activate +python -m pytest tests/ -n0 -q +``` + +## Step 9: Live verification + +After tests, run a real smoke test. + +```bash +source .venv/bin/activate +python -m hermes_cli.main chat -q "Say hello" --provider your-provider --model your-model +``` + +Also test the interactive flows if you changed menus: + +```bash +source .venv/bin/activate +python -m hermes_cli.main model +python -m hermes_cli.main setup +``` + +For native providers, verify at least one tool call too, not just a plain text response. + +## Step 10: Update user-facing docs + +If the provider is meant to ship as a first-class option, update the user docs too: + +- `website/docs/getting-started/quickstart.md` +- `website/docs/user-guide/configuration.md` +- `website/docs/reference/environment-variables.md` + +A developer can wire the provider perfectly and still leave users unable to discover the required env vars or setup flow. + +## OpenAI-compatible provider checklist + +Use this if the provider is standard chat completions. + +- [ ] `ProviderConfig` added in `hermes_cli/auth.py` +- [ ] aliases added in `hermes_cli/auth.py` and `hermes_cli/models.py` +- [ ] model catalog added in `hermes_cli/models.py` +- [ ] runtime branch added in `hermes_cli/runtime_provider.py` +- [ ] CLI wiring added in `hermes_cli/main.py` +- [ ] setup wiring added in `hermes_cli/setup.py` +- [ ] aux model added in `agent/auxiliary_client.py` +- [ ] context lengths added in `agent/model_metadata.py` +- [ ] runtime / CLI tests updated +- [ ] user docs updated + +## Native provider checklist + +Use this when the provider needs a new protocol path. + +- [ ] everything in the OpenAI-compatible checklist +- [ ] adapter added in `agent/_adapter.py` +- [ ] new `api_mode` supported in `run_agent.py` +- [ ] interrupt / rebuild path works +- [ ] usage and finish-reason extraction works +- [ ] fallback path works +- [ ] adapter tests added +- [ ] live smoke test passes + +## Common pitfalls + +### 1. Adding the provider to auth but not to model parsing + +That makes credentials resolve correctly while `/model` and `provider:model` inputs fail. + +### 2. Forgetting that `config["model"]` can be a string or a dict + +A lot of provider-selection code has to normalize both forms. + +### 3. Assuming a built-in provider is required + +If the service is just OpenAI-compatible, a custom provider may already solve the user problem with less maintenance. + +### 4. Forgetting auxiliary paths + +The main chat path can work while summarization, memory flushes, or vision helpers fail because aux routing was never updated. + +### 5. Native-provider branches hiding in `run_agent.py` + +Search for `api_mode` and `self.client.`. Do not assume the obvious request path is the only one. + +### 6. Sending OpenRouter-only knobs to other providers + +Fields like provider routing belong only on the providers that support them. + +### 7. Updating `hermes model` but not `hermes setup` + +Both flows need to know about the provider. + +## Good search targets while implementing + +If you are hunting for all the places a provider touches, search these symbols: + +- `PROVIDER_REGISTRY` +- `_PROVIDER_ALIASES` +- `_PROVIDER_MODELS` +- `resolve_runtime_provider` +- `_model_flow_` +- `provider_choices` +- `api_mode` +- `_API_KEY_PROVIDER_AUX_MODELS` +- `self.client.` + +## Related docs + +- [Provider Runtime Resolution](./provider-runtime.md) +- [Architecture](./architecture.md) +- [Contributing](./contributing.md) diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md index 2ff14817..1fb9ff41 100644 --- a/website/docs/developer-guide/architecture.md +++ b/website/docs/developer-guide/architecture.md @@ -41,12 +41,13 @@ If you are new to the codebase, read in this order: 2. [Agent Loop Internals](./agent-loop.md) 3. [Prompt Assembly](./prompt-assembly.md) 4. [Provider Runtime Resolution](./provider-runtime.md) -5. [Tools Runtime](./tools-runtime.md) -6. [Session Storage](./session-storage.md) -7. [Gateway Internals](./gateway-internals.md) -8. [Context Compression & Prompt Caching](./context-compression-and-caching.md) -9. [ACP Internals](./acp-internals.md) -10. [Environments, Benchmarks & Data Generation](./environments.md) +5. [Adding Providers](./adding-providers.md) +6. [Tools Runtime](./tools-runtime.md) +7. [Session Storage](./session-storage.md) +8. [Gateway Internals](./gateway-internals.md) +9. [Context Compression & Prompt Caching](./context-compression-and-caching.md) +10. [ACP Internals](./acp-internals.md) +11. [Environments, Benchmarks & Data Generation](./environments.md) ## Major subsystems diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index f14ab9b4..5f653eae 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -20,6 +20,12 @@ We value contributions in this order: 6. **New tools** — rarely needed; most capabilities should be skills 7. **Documentation** — fixes, clarifications, new examples +## Common contribution paths + +- Building a new tool? Start with [Adding Tools](./adding-tools.md) +- Building a new skill? Start with [Creating Skills](./creating-skills.md) +- Building a new inference provider? Start with [Adding Providers](./adding-providers.md) + ## Development Setup ### Prerequisites diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index 9bfd48c2..68fe537c 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -20,6 +20,8 @@ Primary implementation: - `hermes_cli/auth.py` - `agent/auxiliary_client.py` +If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page. + ## Resolution precedence At a high level, provider resolution uses: diff --git a/website/sidebars.ts b/website/sidebars.ts index 828b4472..94a28aac 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -109,6 +109,7 @@ const sidebars: SidebarsConfig = { 'developer-guide/architecture', 'developer-guide/agent-loop', 'developer-guide/provider-runtime', + 'developer-guide/adding-providers', 'developer-guide/prompt-assembly', 'developer-guide/context-compression-and-caching', 'developer-guide/gateway-internals',