Merge: WebResearchEnv compute_reward fix (verified with live test)

2026-03-09 19:29:19 -07:00 · 2026-03-09 19:29:19 -07:00 · a5c6348d41
commit a5c6348d41
parent 172a38c344 320f881e0b
1 changed files with 13 additions and 4 deletions
--- a/environments/web_research_env.py
+++ b/environments/web_research_env.py
@ -356,10 +356,19 @@ class WebResearchEnv(HermesAgentBaseEnv):
          efficiency_weight  * efficiency   — penalizes wasteful tool usage
          + diversity_bonus                 — source diversity (≥2 distinct domains)
        """
-        final_response: str = result.final_response or ""
+        # Extract final response from messages (last assistant message with content)
-        tools_used: list[str] = [
+        final_response = ""
-            tc.tool_name for tc in (result.tool_calls or [])
+        tools_used: list[str] = []
-        ] if hasattr(result, "tool_calls") and result.tool_calls else []
+        for msg in reversed(result.messages):
            if msg.get("role") == "assistant" and msg.get("content") and not final_response:
                final_response = msg["content"]
            # Collect tool names from tool call messages
            if msg.get("role") == "assistant" and msg.get("tool_calls"):
                for tc in msg["tool_calls"]:
                    fn = tc.get("function", {}) if isinstance(tc, dict) else {}
                    name = fn.get("name", "")
                    if name:
                        tools_used.append(name)
        tool_call_count: int = result.turns_used or len(tools_used)
        cfg = self.config