fix: WebResearchEnv compute_reward extracts from AgentResult.messages
AgentResult has .messages (list of dicts), not .final_response or .tool_calls. Fixed compute_reward to extract the final response and tool names from the message history. Verified with live process mode test: - Agent used 7 tool calls (web_search, web_extract) - Produced a 1106-char researched response about Winter Olympics - Reward: 0.384 (partial correctness via LLM judge) - JSONL output contains valid tokens, masks, scores, messages
This commit is contained in:
parent
8eabdefa8a
commit
320f881e0b
1 changed files with 13 additions and 4 deletions
|
|
@ -356,10 +356,19 @@ class WebResearchEnv(HermesAgentBaseEnv):
|
||||||
efficiency_weight * efficiency — penalizes wasteful tool usage
|
efficiency_weight * efficiency — penalizes wasteful tool usage
|
||||||
+ diversity_bonus — source diversity (≥2 distinct domains)
|
+ diversity_bonus — source diversity (≥2 distinct domains)
|
||||||
"""
|
"""
|
||||||
final_response: str = result.final_response or ""
|
# Extract final response from messages (last assistant message with content)
|
||||||
tools_used: list[str] = [
|
final_response = ""
|
||||||
tc.tool_name for tc in (result.tool_calls or [])
|
tools_used: list[str] = []
|
||||||
] if hasattr(result, "tool_calls") and result.tool_calls else []
|
for msg in reversed(result.messages):
|
||||||
|
if msg.get("role") == "assistant" and msg.get("content") and not final_response:
|
||||||
|
final_response = msg["content"]
|
||||||
|
# Collect tool names from tool call messages
|
||||||
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||||
|
for tc in msg["tool_calls"]:
|
||||||
|
fn = tc.get("function", {}) if isinstance(tc, dict) else {}
|
||||||
|
name = fn.get("name", "")
|
||||||
|
if name:
|
||||||
|
tools_used.append(name)
|
||||||
tool_call_count: int = result.turns_used or len(tools_used)
|
tool_call_count: int = result.turns_used or len(tools_used)
|
||||||
|
|
||||||
cfg = self.config
|
cfg = self.config
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue