feat: call_llm/async_call_llm + config slots + migrate all consumers
Add centralized call_llm() and async_call_llm() functions that own the full LLM request lifecycle: 1. Resolve provider + model from task config or explicit args 2. Get or create a cached client for that provider 3. Format request args (max_tokens handling, provider extra_body) 4. Make the API call with max_tokens/max_completion_tokens retry 5. Return the response Config: expanded auxiliary section with provider:model slots for all tasks (compression, vision, web_extract, session_search, skills_hub, mcp, flush_memories). Config version bumped to 7. Migrated all auxiliary consumers: - context_compressor.py: uses call_llm(task='compression') - vision_tools.py: uses async_call_llm(task='vision') - web_tools.py: uses async_call_llm(task='web_extract') - session_search_tool.py: uses async_call_llm(task='session_search') - browser_tool.py: uses call_llm(task='vision'/'web_extract') - mcp_tool.py: uses call_llm(task='mcp') - skills_guard.py: uses call_llm(provider='openrouter') - run_agent.py flush_memories: uses call_llm(task='flush_memories') Tests updated for context_compressor and MCP tool. Some test mocks still need updating (15 remaining failures from mock pattern changes, 2 pre-existing).
This commit is contained in:
parent
013cc4d2fc
commit
0aa31cd3cb
13 changed files with 552 additions and 375 deletions
|
|
@ -63,7 +63,7 @@ import time
|
|||
import requests
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pathlib import Path
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
|
|||
# Max tokens for snapshot content before summarization
|
||||
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
|
||||
|
||||
# Vision client — for browser_vision (screenshot analysis)
|
||||
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
|
||||
# browser_tool module from importing (which would disable all 10 browser tools).
|
||||
try:
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
|
||||
|
||||
# Text client — for page snapshot summarization (same config as web_extract)
|
||||
try:
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
|
||||
|
||||
# Module-level alias for availability checks
|
||||
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
|
||||
|
||||
|
||||
def _get_vision_model() -> str:
|
||||
def _get_vision_model() -> Optional[str]:
|
||||
"""Model for browser_vision (screenshot analysis — multimodal)."""
|
||||
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or _DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
|
||||
|
||||
|
||||
def _get_extraction_model() -> str:
|
||||
def _get_extraction_model() -> Optional[str]:
|
||||
"""Model for page snapshot text summarization — same as web_extract."""
|
||||
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
||||
or _DEFAULT_TEXT_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||
|
||||
|
||||
def _is_local_mode() -> bool:
|
||||
|
|
@ -941,9 +918,6 @@ def _extract_relevant_content(
|
|||
|
||||
Falls back to simple truncation when no auxiliary text model is configured.
|
||||
"""
|
||||
if _aux_text_client is None:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
|
||||
if user_task:
|
||||
extraction_prompt = (
|
||||
f"You are a content extractor for a browser automation agent.\n\n"
|
||||
|
|
@ -968,13 +942,16 @@ def _extract_relevant_content(
|
|||
)
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
response = _aux_text_client.chat.completions.create(
|
||||
model=_get_extraction_model(),
|
||||
messages=[{"role": "user", "content": extraction_prompt}],
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
temperature=0.1,
|
||||
)
|
||||
call_kwargs = {
|
||||
"task": "web_extract",
|
||||
"messages": [{"role": "user", "content": extraction_prompt}],
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
model = _get_extraction_model()
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = call_llm(**call_kwargs)
|
||||
return response.choices[0].message.content
|
||||
except Exception:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
|
|
@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Check auxiliary vision client
|
||||
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Browser vision unavailable: no auxiliary vision model configured. "
|
||||
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Save screenshot to persistent location so it can be shared with users
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
screenshots_dir = hermes_home / "browser_screenshots"
|
||||
|
|
@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
f"Focus on answering the user's specific question."
|
||||
)
|
||||
|
||||
# Use the sync auxiliary vision client directly
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
# Use the centralized LLM router
|
||||
vision_model = _get_vision_model()
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
|
||||
len(image_data), vision_model)
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=vision_model,
|
||||
messages=[
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes)",
|
||||
len(image_data))
|
||||
call_kwargs = {
|
||||
"task": "vision",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
|
|
@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
|||
],
|
||||
}
|
||||
],
|
||||
**auxiliary_max_tokens_param(2000),
|
||||
temperature=0.1,
|
||||
)
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
if vision_model:
|
||||
call_kwargs["model"] = vision_model
|
||||
response = call_llm(**call_kwargs)
|
||||
|
||||
analysis = response.choices[0].message.content
|
||||
response_data = {
|
||||
|
|
|
|||
|
|
@ -456,17 +456,13 @@ class SamplingHandler:
|
|||
# Resolve model
|
||||
model = self._resolve_model(getattr(params, "modelPreferences", None))
|
||||
|
||||
# Get auxiliary LLM client
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
client, default_model = get_text_auxiliary_client()
|
||||
if client is None:
|
||||
self.metrics["errors"] += 1
|
||||
return self._error("No LLM provider available for sampling")
|
||||
# Get auxiliary LLM client via centralized router
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
resolved_model = model or default_model
|
||||
# Model whitelist check (we need to resolve model before calling)
|
||||
resolved_model = model or self.model_override or ""
|
||||
|
||||
# Model whitelist check
|
||||
if self.allowed_models and resolved_model not in self.allowed_models:
|
||||
if self.allowed_models and resolved_model and resolved_model not in self.allowed_models:
|
||||
logger.warning(
|
||||
"MCP server '%s' requested model '%s' not in allowed_models",
|
||||
self.server_name, resolved_model,
|
||||
|
|
@ -484,20 +480,15 @@ class SamplingHandler:
|
|||
|
||||
# Build LLM call kwargs
|
||||
max_tokens = min(params.maxTokens, self.max_tokens_cap)
|
||||
call_kwargs: dict = {
|
||||
"model": resolved_model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
call_temperature = None
|
||||
if hasattr(params, "temperature") and params.temperature is not None:
|
||||
call_kwargs["temperature"] = params.temperature
|
||||
if stop := getattr(params, "stopSequences", None):
|
||||
call_kwargs["stop"] = stop
|
||||
call_temperature = params.temperature
|
||||
|
||||
# Forward server-provided tools
|
||||
call_tools = None
|
||||
server_tools = getattr(params, "tools", None)
|
||||
if server_tools:
|
||||
call_kwargs["tools"] = [
|
||||
call_tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
|
|
@ -508,9 +499,6 @@ class SamplingHandler:
|
|||
}
|
||||
for t in server_tools
|
||||
]
|
||||
if tool_choice := getattr(params, "toolChoice", None):
|
||||
mode = getattr(tool_choice, "mode", "auto")
|
||||
call_kwargs["tool_choice"] = {"auto": "auto", "required": "required", "none": "none"}.get(mode, "auto")
|
||||
|
||||
logger.log(
|
||||
self.audit_level,
|
||||
|
|
@ -520,7 +508,15 @@ class SamplingHandler:
|
|||
|
||||
# Offload sync LLM call to thread (non-blocking)
|
||||
def _sync_call():
|
||||
return client.chat.completions.create(**call_kwargs)
|
||||
return call_llm(
|
||||
task="mcp",
|
||||
model=resolved_model or None,
|
||||
messages=messages,
|
||||
temperature=call_temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=call_tools,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
|
|
|
|||
|
|
@ -22,13 +22,7 @@ import os
|
|||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
||||
|
||||
# Resolve the async auxiliary client at import time so we have the model slug.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
|
|
@ -156,26 +150,22 @@ async def _summarize_session(
|
|||
f"Summarize this conversation with focus on: {query}"
|
||||
)
|
||||
|
||||
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _async_aux_client.chat.completions.create(
|
||||
model=_SUMMARIZER_MODEL,
|
||||
response = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except RuntimeError:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
|
|
@ -333,8 +323,6 @@ def session_search(
|
|||
|
||||
def check_session_search_requirements() -> bool:
|
||||
"""Requires SQLite state database and an auxiliary text model."""
|
||||
if _async_aux_client is None:
|
||||
return False
|
||||
try:
|
||||
from hermes_state import DEFAULT_DB_PATH
|
||||
return DEFAULT_DB_PATH.parent.exists()
|
||||
|
|
|
|||
|
|
@ -936,13 +936,10 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
|||
|
||||
# Call the LLM via the centralized provider router
|
||||
try:
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
client, _default_model = resolve_provider_client("openrouter")
|
||||
if client is None:
|
||||
return static_result
|
||||
|
||||
response = client.chat.completions.create(
|
||||
response = call_llm(
|
||||
provider="openrouter",
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
|
|
|
|||
|
|
@ -37,16 +37,11 @@ from pathlib import Path
|
|||
from typing import Any, Awaitable, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from agent.auxiliary_client import get_async_vision_auxiliary_client
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Resolve vision auxiliary client at module level.
|
||||
# Uses get_async_vision_auxiliary_client() which properly handles Codex
|
||||
# routing (Responses API adapter) instead of raw AsyncOpenAI construction.
|
||||
_aux_async_client, DEFAULT_VISION_MODEL = get_async_vision_auxiliary_client()
|
||||
|
||||
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
|
||||
|
||||
|
||||
|
|
@ -185,7 +180,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
|
|||
async def vision_analyze_tool(
|
||||
image_url: str,
|
||||
user_prompt: str,
|
||||
model: str = DEFAULT_VISION_MODEL,
|
||||
model: str = None,
|
||||
) -> str:
|
||||
"""
|
||||
Analyze an image from a URL or local file path using vision AI.
|
||||
|
|
@ -245,15 +240,6 @@ async def vision_analyze_tool(
|
|||
logger.info("Analyzing image: %s", image_url[:60])
|
||||
logger.info("User prompt: %s", user_prompt[:100])
|
||||
|
||||
# Check auxiliary vision client availability
|
||||
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
|
||||
logger.error("Vision analysis unavailable: no auxiliary vision model configured")
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
|
||||
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
|
||||
}, indent=2, ensure_ascii=False)
|
||||
|
||||
# Determine if this is a local file path or a remote URL
|
||||
local_path = Path(image_url)
|
||||
if local_path.is_file():
|
||||
|
|
@ -309,18 +295,18 @@ async def vision_analyze_tool(
|
|||
}
|
||||
]
|
||||
|
||||
logger.info("Processing image with %s...", model)
|
||||
logger.info("Processing image with vision model...")
|
||||
|
||||
# Call the vision API
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(2000),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
# Call the vision API via centralized router
|
||||
call_kwargs = {
|
||||
"task": "vision",
|
||||
"messages": messages,
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 2000,
|
||||
}
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
|
||||
# Extract the analysis
|
||||
analysis = response.choices[0].message.content.strip()
|
||||
|
|
@ -391,7 +377,18 @@ async def vision_analyze_tool(
|
|||
|
||||
def check_vision_requirements() -> bool:
|
||||
"""Check if an auxiliary vision model is available."""
|
||||
return _aux_async_client is not None
|
||||
try:
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
client, _ = resolve_provider_client("openrouter")
|
||||
if client is not None:
|
||||
return True
|
||||
client, _ = resolve_provider_client("nous")
|
||||
if client is not None:
|
||||
return True
|
||||
client, _ = resolve_provider_client("custom")
|
||||
return client is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
|
|
@ -419,10 +416,9 @@ if __name__ == "__main__":
|
|||
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
|
||||
exit(1)
|
||||
else:
|
||||
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
|
||||
print("✅ Vision model available")
|
||||
|
||||
print("🛠️ Vision tools ready for use!")
|
||||
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
|
||||
|
||||
# Show debug mode status
|
||||
if _debug.active:
|
||||
|
|
@ -489,9 +485,7 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
|
|||
"Fully describe and explain everything about this image, then answer the "
|
||||
f"following question:\n\n{question}"
|
||||
)
|
||||
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
|
||||
return vision_analyze_tool(image_url, full_prompt, model)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -47,8 +47,7 @@ import re
|
|||
import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
from firecrawl import Firecrawl
|
||||
from openai import AsyncOpenAI
|
||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -83,15 +82,8 @@ def _get_firecrawl_client():
|
|||
|
||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||
|
||||
# Resolve async auxiliary client at module level.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
|
||||
|
||||
# Allow per-task override via config.yaml auxiliary.web_extract_model
|
||||
DEFAULT_SUMMARIZER_MODEL = (
|
||||
os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
||||
or _DEFAULT_SUMMARIZER_MODEL
|
||||
)
|
||||
# Allow per-task override via env var
|
||||
DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||
|
||||
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
|
||||
|
||||
|
|
@ -249,22 +241,22 @@ Create a markdown summary that captures all key information in a well-organized,
|
|||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
if _aux_async_client is None:
|
||||
logger.warning("No auxiliary model available for web content processing")
|
||||
return None
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
call_kwargs = {
|
||||
"task": "web_extract",
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(max_tokens),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
"temperature": 0.1,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
return response.choices[0].message.content.strip()
|
||||
except RuntimeError:
|
||||
logger.warning("No auxiliary model available for web content processing")
|
||||
return None
|
||||
except Exception as api_error:
|
||||
last_error = api_error
|
||||
if attempt < max_retries - 1:
|
||||
|
|
@ -368,25 +360,18 @@ Synthesize these into ONE cohesive, comprehensive summary that:
|
|||
Create a single, unified markdown summary."""
|
||||
|
||||
try:
|
||||
if _aux_async_client is None:
|
||||
logger.warning("No auxiliary model for synthesis, concatenating summaries")
|
||||
fallback = "\n\n".join(summaries)
|
||||
if len(fallback) > max_output_size:
|
||||
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
|
||||
return fallback
|
||||
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
call_kwargs = {
|
||||
"task": "web_extract",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
|
||||
{"role": "user", "content": synthesis_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(20000),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 20000,
|
||||
}
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
||||
# Enforce hard cap
|
||||
|
|
@ -713,8 +698,8 @@ async def web_extract_tool(
|
|||
debug_call_data["pages_extracted"] = pages_extracted
|
||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||
|
||||
# Process each result with LLM if enabled and auxiliary client is available
|
||||
if use_llm_processing and _aux_async_client is not None:
|
||||
# Process each result with LLM if enabled
|
||||
if use_llm_processing:
|
||||
logger.info("Processing extracted content with LLM (parallel)...")
|
||||
debug_call_data["processing_applied"].append("llm_processing")
|
||||
|
||||
|
|
@ -780,10 +765,6 @@ async def web_extract_tool(
|
|||
else:
|
||||
logger.warning("%s (no content to process)", url)
|
||||
else:
|
||||
if use_llm_processing and _aux_async_client is None:
|
||||
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
|
||||
debug_call_data["processing_applied"].append("llm_processing_unavailable")
|
||||
|
||||
# Print summary of extracted pages for debugging (original behavior)
|
||||
for result in response.get('results', []):
|
||||
url = result.get('url', 'Unknown URL')
|
||||
|
|
@ -1013,8 +994,8 @@ async def web_crawl_tool(
|
|||
debug_call_data["pages_crawled"] = pages_crawled
|
||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||
|
||||
# Process each result with LLM if enabled and auxiliary client is available
|
||||
if use_llm_processing and _aux_async_client is not None:
|
||||
# Process each result with LLM if enabled
|
||||
if use_llm_processing:
|
||||
logger.info("Processing crawled content with LLM (parallel)...")
|
||||
debug_call_data["processing_applied"].append("llm_processing")
|
||||
|
||||
|
|
@ -1080,10 +1061,6 @@ async def web_crawl_tool(
|
|||
else:
|
||||
logger.warning("%s (no content to process)", page_url)
|
||||
else:
|
||||
if use_llm_processing and _aux_async_client is None:
|
||||
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
|
||||
debug_call_data["processing_applied"].append("llm_processing_unavailable")
|
||||
|
||||
# Print summary of crawled pages for debugging (original behavior)
|
||||
for result in response.get('results', []):
|
||||
page_url = result.get('url', 'Unknown URL')
|
||||
|
|
@ -1138,7 +1115,15 @@ def check_firecrawl_api_key() -> bool:
|
|||
|
||||
def check_auxiliary_model() -> bool:
|
||||
"""Check if an auxiliary text model is available for LLM content processing."""
|
||||
return _aux_async_client is not None
|
||||
try:
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
for p in ("openrouter", "nous", "custom", "codex"):
|
||||
client, _ = resolve_provider_client(p)
|
||||
if client is not None:
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_debug_session_info() -> Dict[str, Any]:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue