Merge origin/main, resolve conflicts (self._base_url_lower)
This commit is contained in:
commit
e7844e9c8d
54 changed files with 2281 additions and 179 deletions
196
run_agent.py
196
run_agent.py
|
|
@ -203,6 +203,27 @@ class IterationBudget:
|
|||
# When any of these appear in a batch, we fall back to sequential execution.
|
||||
_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
|
||||
|
||||
# Read-only tools with no shared mutable session state.
|
||||
_PARALLEL_SAFE_TOOLS = frozenset({
|
||||
"ha_get_state",
|
||||
"ha_list_entities",
|
||||
"ha_list_services",
|
||||
"honcho_context",
|
||||
"honcho_profile",
|
||||
"honcho_search",
|
||||
"read_file",
|
||||
"search_files",
|
||||
"session_search",
|
||||
"skill_view",
|
||||
"skills_list",
|
||||
"vision_analyze",
|
||||
"web_extract",
|
||||
"web_search",
|
||||
})
|
||||
|
||||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
|
@ -234,6 +255,74 @@ def _is_destructive_command(cmd: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _should_parallelize_tool_batch(tool_calls) -> bool:
|
||||
"""Return True when a tool-call batch is safe to run concurrently."""
|
||||
if len(tool_calls) <= 1:
|
||||
return False
|
||||
|
||||
tool_names = [tc.function.name for tc in tool_calls]
|
||||
if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
|
||||
return False
|
||||
|
||||
reserved_paths: list[Path] = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except Exception:
|
||||
logging.debug(
|
||||
"Could not parse args for %s — defaulting to sequential; raw=%s",
|
||||
tool_name,
|
||||
tool_call.function.arguments[:200],
|
||||
)
|
||||
return False
|
||||
if not isinstance(function_args, dict):
|
||||
logging.debug(
|
||||
"Non-dict args for %s (%s) — defaulting to sequential",
|
||||
tool_name,
|
||||
type(function_args).__name__,
|
||||
)
|
||||
return False
|
||||
|
||||
if tool_name in _PATH_SCOPED_TOOLS:
|
||||
scoped_path = _extract_parallel_scope_path(tool_name, function_args)
|
||||
if scoped_path is None:
|
||||
return False
|
||||
if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
|
||||
return False
|
||||
reserved_paths.append(scoped_path)
|
||||
continue
|
||||
|
||||
if tool_name not in _PARALLEL_SAFE_TOOLS:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path | None:
|
||||
"""Return the normalized file target for path-scoped tools."""
|
||||
if tool_name not in _PATH_SCOPED_TOOLS:
|
||||
return None
|
||||
|
||||
raw_path = function_args.get("path")
|
||||
if not isinstance(raw_path, str) or not raw_path.strip():
|
||||
return None
|
||||
|
||||
# Avoid resolve(); the file may not exist yet.
|
||||
return Path(raw_path).expanduser()
|
||||
|
||||
|
||||
def _paths_overlap(left: Path, right: Path) -> bool:
|
||||
"""Return True when two paths may refer to the same subtree."""
|
||||
left_parts = left.parts
|
||||
right_parts = right.parts
|
||||
if not left_parts or not right_parts:
|
||||
# Empty paths shouldn't reach here (guarded upstream), but be safe.
|
||||
return bool(left_parts) == bool(right_parts) and bool(left_parts)
|
||||
common_len = min(len(left_parts), len(right_parts))
|
||||
return left_parts[:common_len] == right_parts[:common_len]
|
||||
|
||||
|
||||
def _inject_honcho_turn_context(content, turn_context: str):
|
||||
"""Append Honcho recall to the current-turn user message without mutating history.
|
||||
|
||||
|
|
@ -263,11 +352,20 @@ def _inject_honcho_turn_context(content, turn_context: str):
|
|||
class AIAgent:
|
||||
"""
|
||||
AI Agent with tool calling capabilities.
|
||||
|
||||
|
||||
This class manages the conversation flow, tool execution, and response handling
|
||||
for AI models that support function calling.
|
||||
"""
|
||||
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return self._base_url
|
||||
|
||||
@base_url.setter
|
||||
def base_url(self, value: str) -> None:
|
||||
self._base_url = value
|
||||
self._base_url_lower = value.lower() if value else ""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = None,
|
||||
|
|
@ -389,10 +487,10 @@ class AIAgent:
|
|||
self.api_mode = api_mode
|
||||
elif self.provider == "openai-codex":
|
||||
self.api_mode = "codex_responses"
|
||||
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
|
||||
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
|
||||
self.api_mode = "codex_responses"
|
||||
self.provider = "openai-codex"
|
||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
|
||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
|
||||
self.api_mode = "anthropic_messages"
|
||||
self.provider = "anthropic"
|
||||
else:
|
||||
|
|
@ -401,7 +499,7 @@ class AIAgent:
|
|||
# Pre-warm OpenRouter model metadata cache in a background thread.
|
||||
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
|
||||
# HTTP request on the first API response when pricing is estimated.
|
||||
if self.provider == "openrouter" or "openrouter" in self.base_url.lower():
|
||||
if self.provider == "openrouter" or "openrouter" in self._base_url_lower:
|
||||
threading.Thread(
|
||||
target=lambda: fetch_model_metadata(),
|
||||
daemon=True,
|
||||
|
|
@ -445,7 +543,7 @@ class AIAgent:
|
|||
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
|
||||
# Reduces input costs by ~75% on multi-turn conversations by caching the
|
||||
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
|
||||
is_openrouter = "openrouter" in self.base_url.lower()
|
||||
is_openrouter = "openrouter" in self._base_url_lower
|
||||
is_claude = "claude" in self.model.lower()
|
||||
is_native_anthropic = self.api_mode == "anthropic_messages"
|
||||
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
|
||||
|
|
@ -561,6 +659,7 @@ class AIAgent:
|
|||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||
effective_key = api_key or resolve_anthropic_token() or ""
|
||||
self.api_key = effective_key
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_base_url = base_url
|
||||
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
||||
|
|
@ -622,6 +721,7 @@ class AIAgent:
|
|||
}
|
||||
|
||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||
self.api_key = client_kwargs.get("api_key", "")
|
||||
try:
|
||||
self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
|
||||
if not self.quiet_mode:
|
||||
|
|
@ -745,16 +845,24 @@ class AIAgent:
|
|||
from tools.todo_tool import TodoStore
|
||||
self._todo_store = TodoStore()
|
||||
|
||||
# Load config once for memory, skills, and compression sections
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_agent_config
|
||||
_agent_cfg = _load_agent_config()
|
||||
except Exception:
|
||||
_agent_cfg = {}
|
||||
|
||||
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
|
||||
self._memory_store = None
|
||||
self._memory_enabled = False
|
||||
self._user_profile_enabled = False
|
||||
self._memory_nudge_interval = 10
|
||||
self._memory_flush_min_turns = 6
|
||||
self._turns_since_memory = 0
|
||||
self._iters_since_skill = 0
|
||||
if not skip_memory:
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_mem_config
|
||||
mem_config = _load_mem_config().get("memory", {})
|
||||
mem_config = _agent_cfg.get("memory", {})
|
||||
self._memory_enabled = mem_config.get("memory_enabled", False)
|
||||
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
||||
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
|
||||
|
|
@ -842,21 +950,16 @@ class AIAgent:
|
|||
# Skills config: nudge interval for skill creation reminders
|
||||
self._skill_nudge_interval = 10
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_skills_config
|
||||
skills_config = _load_skills_config().get("skills", {})
|
||||
skills_config = _agent_cfg.get("skills", {})
|
||||
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via config.yaml (compression section)
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_compression_config
|
||||
_compression_cfg = _load_compression_config().get("compression", {})
|
||||
if not isinstance(_compression_cfg, dict):
|
||||
_compression_cfg = {}
|
||||
except ImportError:
|
||||
_compression_cfg = _agent_cfg.get("compression", {})
|
||||
if not isinstance(_compression_cfg, dict):
|
||||
_compression_cfg = {}
|
||||
compression_threshold = float(_compression_cfg.get("threshold", 0.50))
|
||||
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
||||
|
|
@ -871,6 +974,7 @@ class AIAgent:
|
|||
summary_model_override=compression_summary_model,
|
||||
quiet_mode=self.quiet_mode,
|
||||
base_url=self.base_url,
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
)
|
||||
self.compression_enabled = compression_enabled
|
||||
self._user_turn_count = 0
|
||||
|
|
@ -926,8 +1030,8 @@ class AIAgent:
|
|||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
_is_direct_openai = (
|
||||
"api.openai.com" in self.base_url.lower()
|
||||
and "openrouter" not in self.base_url.lower()
|
||||
"api.openai.com" in self._base_url_lower
|
||||
and "openrouter" not in self._base_url_lower
|
||||
)
|
||||
if _is_direct_openai:
|
||||
return {"max_completion_tokens": value}
|
||||
|
|
@ -2977,6 +3081,9 @@ class AIAgent:
|
|||
return False
|
||||
|
||||
self._anthropic_api_key = new_token
|
||||
# Update OAuth flag — token type may have changed (API key ↔ OAuth)
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
self._is_anthropic_oauth = _is_oauth_token(new_token)
|
||||
return True
|
||||
|
||||
def _anthropic_messages_create(self, api_kwargs: dict):
|
||||
|
|
@ -3368,11 +3475,12 @@ class AIAgent:
|
|||
|
||||
if fb_api_mode == "anthropic_messages":
|
||||
# Build native Anthropic client instead of using OpenAI client
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
|
||||
effective_key = fb_client.api_key or resolve_anthropic_token() or ""
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_base_url = getattr(fb_client, "base_url", None)
|
||||
self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key)
|
||||
self.client = None
|
||||
self._client_kwargs = {}
|
||||
else:
|
||||
|
|
@ -3678,10 +3786,10 @@ class AIAgent:
|
|||
|
||||
extra_body = {}
|
||||
|
||||
_is_openrouter = "openrouter" in self.base_url.lower()
|
||||
_is_openrouter = "openrouter" in self._base_url_lower
|
||||
_is_github_models = (
|
||||
"models.github.ai" in self.base_url.lower()
|
||||
or "api.githubcopilot.com" in self.base_url.lower()
|
||||
"models.github.ai" in self._base_url_lower
|
||||
or "api.githubcopilot.com" in self._base_url_lower
|
||||
)
|
||||
|
||||
# Provider preferences (only, ignore, order, sort) are OpenRouter-
|
||||
|
|
@ -3690,7 +3798,7 @@ class AIAgent:
|
|||
# for _is_nous when their backend is updated.
|
||||
if provider_preferences and _is_openrouter:
|
||||
extra_body["provider"] = provider_preferences
|
||||
_is_nous = "nousresearch" in self.base_url.lower()
|
||||
_is_nous = "nousresearch" in self._base_url_lower
|
||||
|
||||
if self._supports_reasoning_extra_body():
|
||||
if _is_github_models:
|
||||
|
|
@ -3728,21 +3836,20 @@ class AIAgent:
|
|||
Some providers/routes reject `reasoning` with 400s, so gate it to
|
||||
known reasoning-capable model families and direct Nous Portal.
|
||||
"""
|
||||
base_url = (self.base_url or "").lower()
|
||||
if "nousresearch" in base_url:
|
||||
if "nousresearch" in self._base_url_lower:
|
||||
return True
|
||||
if "ai-gateway.vercel.sh" in base_url:
|
||||
if "ai-gateway.vercel.sh" in self._base_url_lower:
|
||||
return True
|
||||
if "models.github.ai" in base_url or "api.githubcopilot.com" in base_url:
|
||||
if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower:
|
||||
try:
|
||||
from hermes_cli.models import github_model_reasoning_efforts
|
||||
|
||||
return bool(github_model_reasoning_efforts(self.model))
|
||||
except Exception:
|
||||
return False
|
||||
if "openrouter" not in base_url:
|
||||
if "openrouter" not in self._base_url_lower:
|
||||
return False
|
||||
if "api.mistral.ai" in base_url:
|
||||
if "api.mistral.ai" in self._base_url_lower:
|
||||
return False
|
||||
|
||||
model = (self.model or "").lower()
|
||||
|
|
@ -3960,7 +4067,7 @@ class AIAgent:
|
|||
|
||||
try:
|
||||
# Build API messages for the flush call
|
||||
_is_strict_api = "api.mistral.ai" in self.base_url.lower()
|
||||
_is_strict_api = "api.mistral.ai" in self._base_url_lower
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
|
|
@ -4149,20 +4256,17 @@ class AIAgent:
|
|||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute tool calls from the assistant message and append results to messages.
|
||||
|
||||
Dispatches to concurrent execution when multiple independent tool calls
|
||||
are present, falling back to sequential execution for single calls or
|
||||
when interactive tools (e.g. clarify) are in the batch.
|
||||
Dispatches to concurrent execution only for batches that look
|
||||
independent: read-only tools may always share the parallel path, while
|
||||
file reads/writes may do so only when their target paths do not overlap.
|
||||
"""
|
||||
tool_calls = assistant_message.tool_calls
|
||||
|
||||
# Single tool call or interactive tool present → sequential
|
||||
if (len(tool_calls) <= 1
|
||||
or any(tc.function.name in _NEVER_PARALLEL_TOOLS for tc in tool_calls)):
|
||||
if not _should_parallelize_tool_batch(tool_calls):
|
||||
return self._execute_tool_calls_sequential(
|
||||
assistant_message, messages, effective_task_id, api_call_count
|
||||
)
|
||||
|
||||
# Multiple non-interactive tools → concurrent
|
||||
return self._execute_tool_calls_concurrent(
|
||||
assistant_message, messages, effective_task_id, api_call_count
|
||||
)
|
||||
|
|
@ -4736,7 +4840,7 @@ class AIAgent:
|
|||
try:
|
||||
# Build API messages, stripping internal-only fields
|
||||
# (finish_reason, reasoning) that strict APIs like Mistral reject with 422
|
||||
_is_strict_api = "api.mistral.ai" in self.base_url.lower()
|
||||
_is_strict_api = "api.mistral.ai" in self._base_url_lower
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
|
|
@ -4757,7 +4861,7 @@ class AIAgent:
|
|||
api_messages.insert(sys_offset + idx, pfm.copy())
|
||||
|
||||
summary_extra_body = {}
|
||||
_is_nous = "nousresearch" in self.base_url.lower()
|
||||
_is_nous = "nousresearch" in self._base_url_lower
|
||||
if self._supports_reasoning_extra_body():
|
||||
if self.reasoning_config is not None:
|
||||
summary_extra_body["reasoning"] = self.reasoning_config
|
||||
|
|
@ -4920,8 +5024,9 @@ class AIAgent:
|
|||
self._incomplete_scratchpad_retries = 0
|
||||
self._codex_incomplete_retries = 0
|
||||
self._last_content_with_tools = None
|
||||
self._turns_since_memory = 0
|
||||
self._iters_since_skill = 0
|
||||
# NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
|
||||
# They are initialized in __init__ and must persist across run_conversation
|
||||
# calls so that nudge logic accumulates correctly in CLI mode.
|
||||
self.iteration_budget = IterationBudget(self.max_iterations)
|
||||
|
||||
# Initialize conversation (copy to avoid mutating the caller's list)
|
||||
|
|
@ -5174,7 +5279,7 @@ class AIAgent:
|
|||
# strict providers like Mistral that reject unknown fields with 422.
|
||||
# Uses new dicts so the internal messages list retains the fields
|
||||
# for Codex Responses compatibility.
|
||||
if "api.mistral.ai" in self.base_url.lower():
|
||||
if "api.mistral.ai" in self._base_url_lower:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
|
||||
# The signature field helps maintain reasoning continuity
|
||||
|
|
@ -5546,6 +5651,7 @@ class AIAgent:
|
|||
canonical_usage,
|
||||
provider=self.provider,
|
||||
base_url=self.base_url,
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
)
|
||||
if cost_result.amount_usd is not None:
|
||||
self.session_estimated_cost_usd += float(cost_result.amount_usd)
|
||||
|
|
@ -5939,10 +6045,6 @@ class AIAgent:
|
|||
self._client_log_context(),
|
||||
api_error,
|
||||
)
|
||||
if retry_count >= max_retries:
|
||||
self._vprint(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
|
||||
self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
|
||||
|
||||
# Sleep in small increments so we can respond to interrupts quickly
|
||||
# instead of blocking the entire wait_time in one sleep() call
|
||||
sleep_end = time.time() + wait_time
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue