Merge remote-tracking branch 'origin/main' into feat/honcho-async-memory
Made-with: Cursor # Conflicts: # cli.py # tests/test_run_agent.py
This commit is contained in:
commit
a0b0dbe6b2
138 changed files with 17829 additions and 1109 deletions
206
run_agent.py
206
run_agent.py
|
|
@ -175,6 +175,7 @@ class AIAgent:
|
|||
session_id: str = None,
|
||||
tool_progress_callback: callable = None,
|
||||
thinking_callback: callable = None,
|
||||
reasoning_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
|
|
@ -266,6 +267,7 @@ class AIAgent:
|
|||
|
||||
self.tool_progress_callback = tool_progress_callback
|
||||
self.thinking_callback = thinking_callback
|
||||
self.reasoning_callback = reasoning_callback
|
||||
self.clarify_callback = clarify_callback
|
||||
self.step_callback = step_callback
|
||||
self._last_reported_tool = None # Track for "new tool" mode
|
||||
|
|
@ -303,6 +305,13 @@ class AIAgent:
|
|||
self._use_prompt_caching = is_openrouter and is_claude
|
||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
||||
|
||||
# Iteration budget pressure: warn the LLM as it approaches max_iterations.
|
||||
# Warnings are injected into the last tool result JSON (not as separate
|
||||
# messages) so they don't break message structure or invalidate caching.
|
||||
self._budget_caution_threshold = 0.7 # 70% — nudge to start wrapping up
|
||||
self._budget_warning_threshold = 0.9 # 90% — urgent, respond now
|
||||
self._budget_pressure_enabled = True
|
||||
|
||||
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
||||
# so tool failures, API errors, etc. are inspectable after the fact.
|
||||
from agent.redact import RedactingFormatter
|
||||
|
|
@ -503,6 +512,7 @@ class AIAgent:
|
|||
|
||||
# SQLite session store (optional -- provided by CLI or gateway)
|
||||
self._session_db = session_db
|
||||
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.create_session(
|
||||
|
|
@ -833,45 +843,19 @@ class AIAgent:
|
|||
self._save_session_log(messages)
|
||||
self._flush_messages_to_session_db(messages, conversation_history)
|
||||
|
||||
def _log_msg_to_db(self, msg: Dict):
|
||||
"""Log a single message to SQLite immediately. Called after each messages.append()."""
|
||||
if not self._session_db:
|
||||
return
|
||||
try:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content")
|
||||
tool_calls_data = None
|
||||
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
||||
tool_calls_data = [
|
||||
{"name": tc.function.name, "arguments": tc.function.arguments}
|
||||
for tc in msg.tool_calls
|
||||
]
|
||||
elif isinstance(msg.get("tool_calls"), list):
|
||||
tool_calls_data = msg["tool_calls"]
|
||||
self._session_db.append_message(
|
||||
session_id=self.session_id,
|
||||
role=role,
|
||||
content=content,
|
||||
tool_name=msg.get("tool_name"),
|
||||
tool_calls=tool_calls_data,
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
finish_reason=msg.get("finish_reason"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB log_msg failed: %s", e)
|
||||
|
||||
def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
|
||||
"""Persist any un-logged messages to the SQLite session store.
|
||||
"""Persist any un-flushed messages to the SQLite session store.
|
||||
|
||||
Called both at the normal end of run_conversation and from every early-
|
||||
return path so that tool calls, tool responses, and assistant messages
|
||||
are never lost even when the conversation errors out.
|
||||
Uses _last_flushed_db_idx to track which messages have already been
|
||||
written, so repeated calls (from multiple exit paths) only write
|
||||
truly new messages — preventing the duplicate-write bug (#860).
|
||||
"""
|
||||
if not self._session_db:
|
||||
return
|
||||
try:
|
||||
start_idx = len(conversation_history) if conversation_history else 0
|
||||
for msg in messages[start_idx:]:
|
||||
flush_from = max(start_idx, self._last_flushed_db_idx)
|
||||
for msg in messages[flush_from:]:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content")
|
||||
tool_calls_data = None
|
||||
|
|
@ -891,6 +875,7 @@ class AIAgent:
|
|||
tool_call_id=msg.get("tool_call_id"),
|
||||
finish_reason=msg.get("finish_reason"),
|
||||
)
|
||||
self._last_flushed_db_idx = len(messages)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB append_message failed: %s", e)
|
||||
|
||||
|
|
@ -1668,7 +1653,14 @@ class AIAgent:
|
|||
prompt_parts.append(user_block)
|
||||
|
||||
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
||||
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
|
||||
if has_skills_tools:
|
||||
avail_toolsets = {ts for ts, avail in check_toolset_requirements().items() if avail}
|
||||
skills_prompt = build_skills_system_prompt(
|
||||
available_tools=self.valid_tool_names,
|
||||
available_toolsets=avail_toolsets,
|
||||
)
|
||||
else:
|
||||
skills_prompt = ""
|
||||
if skills_prompt:
|
||||
prompt_parts.append(skills_prompt)
|
||||
|
||||
|
|
@ -2043,6 +2035,7 @@ class AIAgent:
|
|||
allowed_keys = {
|
||||
"model", "instructions", "input", "tools", "store",
|
||||
"reasoning", "include", "max_output_tokens", "temperature",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key",
|
||||
}
|
||||
normalized: Dict[str, Any] = {
|
||||
"model": model,
|
||||
|
|
@ -2068,6 +2061,12 @@ class AIAgent:
|
|||
if isinstance(temperature, (int, float)):
|
||||
normalized["temperature"] = float(temperature)
|
||||
|
||||
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
|
||||
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
|
||||
val = api_kwargs.get(passthrough_key)
|
||||
if val is not None:
|
||||
normalized[passthrough_key] = val
|
||||
|
||||
if allow_stream:
|
||||
stream = api_kwargs.get("stream")
|
||||
if stream is not None and stream is not True:
|
||||
|
|
@ -2604,7 +2603,10 @@ class AIAgent:
|
|||
"instructions": instructions,
|
||||
"input": self._chat_messages_to_responses_input(payload_messages),
|
||||
"tools": self._responses_tools(),
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": True,
|
||||
"store": False,
|
||||
"prompt_cache_key": self.session_id,
|
||||
}
|
||||
|
||||
if reasoning_enabled:
|
||||
|
|
@ -2681,6 +2683,12 @@ class AIAgent:
|
|||
preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
|
||||
logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
|
||||
|
||||
if reasoning_text and self.reasoning_callback:
|
||||
try:
|
||||
self.reasoning_callback(reasoning_text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
msg = {
|
||||
"role": "assistant",
|
||||
"content": assistant_message.content or "",
|
||||
|
|
@ -2900,7 +2908,7 @@ class AIAgent:
|
|||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Returns:
|
||||
|
|
@ -2915,6 +2923,25 @@ class AIAgent:
|
|||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
||||
# Preserve file-read history so the model doesn't re-read files
|
||||
# it already examined before compression.
|
||||
try:
|
||||
from tools.file_tools import get_read_files_summary
|
||||
read_files = get_read_files_summary(task_id)
|
||||
if read_files:
|
||||
file_list = "\n".join(
|
||||
f" - {f['path']} ({', '.join(f['regions'])})"
|
||||
for f in read_files
|
||||
)
|
||||
compressed.append({"role": "user", "content": (
|
||||
"[Files already read in this session — do NOT re-read these]\n"
|
||||
f"{file_list}\n"
|
||||
"Use the information from the context summary above. "
|
||||
"Proceed with writing, editing, or responding."
|
||||
)})
|
||||
except Exception:
|
||||
pass # Don't break compression if file tracking fails
|
||||
|
||||
self._invalidate_system_prompt()
|
||||
new_system_prompt = self._build_system_prompt(system_message)
|
||||
self._cached_system_prompt = new_system_prompt
|
||||
|
|
@ -2940,12 +2967,14 @@ class AIAgent:
|
|||
except (ValueError, Exception) as e:
|
||||
logger.debug("Could not propagate title on compression: %s", e)
|
||||
self._session_db.update_system_prompt(self.session_id, new_system_prompt)
|
||||
# Reset flush cursor — new session starts with no messages written
|
||||
self._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.debug("Session DB compression split failed: %s", e)
|
||||
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None:
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute tool calls from the assistant message and append results to messages."""
|
||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||
# SAFETY: check interrupt BEFORE starting each tool.
|
||||
|
|
@ -2963,7 +2992,6 @@ class AIAgent:
|
|||
"tool_call_id": skipped_tc.id,
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
self._log_msg_to_db(skip_msg)
|
||||
break
|
||||
|
||||
function_name = tool_call.function.name
|
||||
|
|
@ -3172,7 +3200,6 @@ class AIAgent:
|
|||
"tool_call_id": tool_call.id
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
self._log_msg_to_db(tool_msg)
|
||||
|
||||
if not self.quiet_mode:
|
||||
response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
|
||||
|
|
@ -3189,12 +3216,56 @@ class AIAgent:
|
|||
"tool_call_id": skipped_tc.id
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
self._log_msg_to_db(skip_msg)
|
||||
break
|
||||
|
||||
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
time.sleep(self.tool_delay)
|
||||
|
||||
# ── Budget pressure injection ─────────────────────────────────
|
||||
# After all tool calls in this turn are processed, check if we're
|
||||
# approaching max_iterations. If so, inject a warning into the LAST
|
||||
# tool result's JSON so the LLM sees it naturally when reading results.
|
||||
budget_warning = self._get_budget_warning(api_call_count)
|
||||
if budget_warning and messages and messages[-1].get("role") == "tool":
|
||||
last_content = messages[-1]["content"]
|
||||
try:
|
||||
parsed = json.loads(last_content)
|
||||
if isinstance(parsed, dict):
|
||||
parsed["_budget_warning"] = budget_warning
|
||||
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||
else:
|
||||
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||
if not self.quiet_mode:
|
||||
remaining = self.max_iterations - api_call_count
|
||||
tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
|
||||
print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
|
||||
|
||||
def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
|
||||
"""Return a budget pressure string, or None if not yet needed.
|
||||
|
||||
Two-tier system:
|
||||
- Caution (70%): nudge to consolidate work
|
||||
- Warning (90%): urgent, must respond now
|
||||
"""
|
||||
if not self._budget_pressure_enabled or self.max_iterations <= 0:
|
||||
return None
|
||||
progress = api_call_count / self.max_iterations
|
||||
remaining = self.max_iterations - api_call_count
|
||||
if progress >= self._budget_warning_threshold:
|
||||
return (
|
||||
f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
|
||||
f"Only {remaining} iteration(s) left. "
|
||||
"Provide your final response NOW. No more tool calls unless absolutely critical.]"
|
||||
)
|
||||
if progress >= self._budget_caution_threshold:
|
||||
return (
|
||||
f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
|
||||
f"{remaining} iterations left. Start consolidating your work.]"
|
||||
)
|
||||
return None
|
||||
|
||||
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
||||
"""Request a summary when max iterations are reached. Returns the final response text."""
|
||||
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
||||
|
|
@ -3427,7 +3498,6 @@ class AIAgent:
|
|||
# Add user message
|
||||
user_msg = {"role": "user", "content": user_message}
|
||||
messages.append(user_msg)
|
||||
self._log_msg_to_db(user_msg)
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
|
||||
|
|
@ -3509,7 +3579,8 @@ class AIAgent:
|
|||
for _pass in range(3):
|
||||
_orig_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=_preflight_tokens
|
||||
messages, system_message, approx_tokens=_preflight_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
if len(messages) >= _orig_len:
|
||||
break # Cannot compress further
|
||||
|
|
@ -3660,7 +3731,7 @@ class AIAgent:
|
|||
|
||||
api_start_time = time.time()
|
||||
retry_count = 0
|
||||
max_retries = 6 # Increased to allow longer backoff periods
|
||||
max_retries = 3
|
||||
compression_attempts = 0
|
||||
max_compression_attempts = 3
|
||||
codex_auth_retry_attempted = False
|
||||
|
|
@ -3827,7 +3898,6 @@ class AIAgent:
|
|||
length_continue_retries += 1
|
||||
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(interim_msg)
|
||||
self._log_msg_to_db(interim_msg)
|
||||
if assistant_message.content:
|
||||
truncated_response_prefix += assistant_message.content
|
||||
|
||||
|
|
@ -3845,7 +3915,6 @@ class AIAgent:
|
|||
),
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
self._log_msg_to_db(continue_msg)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
restart_with_length_continuation = True
|
||||
|
|
@ -4035,7 +4104,8 @@ class AIAgent:
|
|||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
|
||||
if len(messages) < original_len:
|
||||
|
|
@ -4064,6 +4134,7 @@ class AIAgent:
|
|||
'token limit', 'too many tokens', 'reduce the length',
|
||||
'exceeds the limit', 'context window',
|
||||
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||
'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum"
|
||||
])
|
||||
|
||||
if is_context_length_error:
|
||||
|
|
@ -4103,7 +4174,8 @@ class AIAgent:
|
|||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens
|
||||
messages, system_message, approx_tokens=approx_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
|
||||
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
|
||||
|
|
@ -4130,8 +4202,11 @@ class AIAgent:
|
|||
# These indicate a problem with the request itself (bad model ID,
|
||||
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
||||
# Note: 413 and context-length errors are excluded — handled above.
|
||||
# Also catch local validation errors (ValueError, TypeError) — these
|
||||
# are programming bugs, not transient failures.
|
||||
is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
|
||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
||||
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
|
||||
is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
|
||||
'error code: 401', 'error code: 403',
|
||||
'error code: 404', 'error code: 422',
|
||||
'is not a valid model', 'invalid model', 'model not found',
|
||||
|
|
@ -4318,7 +4393,6 @@ class AIAgent:
|
|||
)
|
||||
if not duplicate_interim:
|
||||
messages.append(interim_msg)
|
||||
self._log_msg_to_db(interim_msg)
|
||||
|
||||
if self._codex_incomplete_retries < 3:
|
||||
if not self.quiet_mode:
|
||||
|
|
@ -4369,7 +4443,6 @@ class AIAgent:
|
|||
print(f"{self.log_prefix}⚠️ Unknown tool '{invalid_preview}' — sending error to model for self-correction")
|
||||
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(assistant_msg)
|
||||
self._log_msg_to_db(assistant_msg)
|
||||
for tc in assistant_message.tool_calls:
|
||||
if tc.function.name not in self.valid_tool_names:
|
||||
content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
|
||||
|
|
@ -4424,7 +4497,6 @@ class AIAgent:
|
|||
)
|
||||
recovery_dict = {"role": "user", "content": recovery_msg}
|
||||
messages.append(recovery_dict)
|
||||
self._log_msg_to_db(recovery_dict)
|
||||
continue
|
||||
|
||||
# Reset retry counter on successful JSON validation
|
||||
|
|
@ -4446,9 +4518,9 @@ class AIAgent:
|
|||
print(f" ┊ 💬 {clean}")
|
||||
|
||||
messages.append(assistant_msg)
|
||||
self._log_msg_to_db(assistant_msg)
|
||||
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id)
|
||||
_msg_count_before_tools = len(messages)
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||
|
||||
# Refund the iteration if the ONLY tool(s) called were
|
||||
# execute_code (programmatic tool calling). These are
|
||||
|
|
@ -4457,10 +4529,24 @@ class AIAgent:
|
|||
if _tc_names == {"execute_code"}:
|
||||
self.iteration_budget.refund()
|
||||
|
||||
if self.compression_enabled and self.context_compressor.should_compress():
|
||||
# Estimate next prompt size using real token counts from the
|
||||
# last API response + rough estimate of newly appended tool
|
||||
# results. This catches cases where tool results push the
|
||||
# context past the limit that last_prompt_tokens alone misses
|
||||
# (e.g. large file reads, web extractions).
|
||||
_compressor = self.context_compressor
|
||||
_new_tool_msgs = messages[_msg_count_before_tools:]
|
||||
_new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
|
||||
_estimated_next_prompt = (
|
||||
_compressor.last_prompt_tokens
|
||||
+ _compressor.last_completion_tokens
|
||||
+ _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token
|
||||
)
|
||||
if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message,
|
||||
approx_tokens=self.context_compressor.last_prompt_tokens
|
||||
approx_tokens=self.context_compressor.last_prompt_tokens,
|
||||
task_id=effective_task_id,
|
||||
)
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
|
|
@ -4549,7 +4635,6 @@ class AIAgent:
|
|||
"finish_reason": finish_reason,
|
||||
}
|
||||
messages.append(empty_msg)
|
||||
self._log_msg_to_db(empty_msg)
|
||||
|
||||
self._cleanup_task_resources(effective_task_id)
|
||||
self._persist_session(messages, conversation_history)
|
||||
|
|
@ -4580,7 +4665,6 @@ class AIAgent:
|
|||
codex_ack_continuations += 1
|
||||
interim_msg = self._build_assistant_message(assistant_message, "incomplete")
|
||||
messages.append(interim_msg)
|
||||
self._log_msg_to_db(interim_msg)
|
||||
|
||||
continue_msg = {
|
||||
"role": "user",
|
||||
|
|
@ -4590,7 +4674,6 @@ class AIAgent:
|
|||
),
|
||||
}
|
||||
messages.append(continue_msg)
|
||||
self._log_msg_to_db(continue_msg)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
continue
|
||||
|
|
@ -4606,7 +4689,6 @@ class AIAgent:
|
|||
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
|
||||
messages.append(final_msg)
|
||||
self._log_msg_to_db(final_msg)
|
||||
|
||||
if not self.quiet_mode:
|
||||
print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
|
||||
|
|
@ -4643,7 +4725,6 @@ class AIAgent:
|
|||
"content": f"Error executing tool: {error_msg}",
|
||||
}
|
||||
messages.append(err_msg)
|
||||
self._log_msg_to_db(err_msg)
|
||||
pending_handled = True
|
||||
break
|
||||
|
||||
|
|
@ -4656,7 +4737,6 @@ class AIAgent:
|
|||
"content": f"[System error during processing: {error_msg}]",
|
||||
}
|
||||
messages.append(sys_err_msg)
|
||||
self._log_msg_to_db(sys_err_msg)
|
||||
|
||||
# If we're near the limit, break to avoid infinite loops
|
||||
if api_call_count >= self.max_iterations - 1:
|
||||
|
|
@ -4688,9 +4768,17 @@ class AIAgent:
|
|||
self._honcho_sync(original_user_message, final_response)
|
||||
self._queue_honcho_prefetch(original_user_message)
|
||||
|
||||
# Extract reasoning from the last assistant message (if any)
|
||||
last_reasoning = None
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "assistant" and msg.get("reasoning"):
|
||||
last_reasoning = msg["reasoning"]
|
||||
break
|
||||
|
||||
# Build result with interrupt info if applicable
|
||||
result = {
|
||||
"final_response": final_response,
|
||||
"last_reasoning": last_reasoning,
|
||||
"messages": messages,
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue