feat: add /insights command with usage analytics and cost estimation

Inspired by Claude Code's /insights, adapted for Hermes Agent's multi-platform architecture. Analyzes session history from state.db to produce comprehensive usage insights. Features: - Overview stats: sessions, messages, tokens, estimated cost, active time - Model breakdown: per-model sessions, tokens, and cost estimation - Platform breakdown: CLI vs Telegram vs Discord etc. (unique to Hermes) - Tool usage ranking: most-used tools with percentages - Activity patterns: day-of-week chart, peak hours, streaks - Notable sessions: longest, most messages, most tokens, most tool calls - Cost estimation: real pricing data for 25+ models (OpenAI, Anthropic, DeepSeek, Google, Meta) with fuzzy model name matching - Configurable time window: --days flag (default 30) - Source filtering: --source flag to filter by platform Three entry points: - /insights slash command in CLI (supports --days and --source flags) - /insights slash command in gateway (compact markdown format) - hermes insights CLI subcommand (standalone) Includes 56 tests covering pricing helpers, format helpers, empty DB, populated DB with multi-platform data, filtering, formatting, and edge cases.
2026-03-06 14:04:59 -08:00 · 2026-03-06 14:04:59 -08:00 · b52b37ae64
commit b52b37ae64
parent d63b363cde
6 changed files with 1387 additions and 1 deletions
--- a/tests/test_insights.py
+++ b/tests/test_insights.py
@ -0,0 +1,582 @@
+"""Tests for agent/insights.py — InsightsEngine analytics and reporting."""
+
+import time
+import pytest
+from pathlib import Path
+
+from hermes_state import SessionDB
+from agent.insights import (
+    InsightsEngine,
+    _get_pricing,
+    _estimate_cost,
+    _format_duration,
+    _bar_chart,
+    _DEFAULT_PRICING,
+)
+
+
+@pytest.fixture()
+def db(tmp_path):
+    """Create a SessionDB with a temp database file."""
+    db_path = tmp_path / "test_insights.db"
+    session_db = SessionDB(db_path=db_path)
+    yield session_db
+    session_db.close()
+
+
+@pytest.fixture()
+def populated_db(db):
+    """Create a DB with realistic session data for insights testing."""
+    now = time.time()
+    day = 86400
+
+    # Session 1: CLI, claude-sonnet, ended, 2 days ago
+    db.create_session(
+        session_id="s1", source="cli",
+        model="anthropic/claude-sonnet-4-20250514", user_id="user1",
+    )
+    # Backdate the started_at
+    db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 's1'", (now - 2 * day,))
+    db.end_session("s1", end_reason="user_exit")
+    db._conn.execute("UPDATE sessions SET ended_at = ? WHERE id = 's1'", (now - 2 * day + 3600,))
+    db.update_token_counts("s1", input_tokens=50000, output_tokens=15000)
+    db.append_message("s1", role="user", content="Hello, help me fix a bug")
+    db.append_message("s1", role="assistant", content="Sure, let me look into that.")
+    db.append_message("s1", role="assistant", content="Let me search the files.",
+                      tool_calls=[{"function": {"name": "search_files"}}])
+    db.append_message("s1", role="tool", content="Found 3 matches", tool_name="search_files")
+    db.append_message("s1", role="assistant", content="Let me read the file.",
+                      tool_calls=[{"function": {"name": "read_file"}}])
+    db.append_message("s1", role="tool", content="file contents...", tool_name="read_file")
+    db.append_message("s1", role="assistant", content="I found the bug. Let me fix it.",
+                      tool_calls=[{"function": {"name": "patch"}}])
+    db.append_message("s1", role="tool", content="patched successfully", tool_name="patch")
+    db.append_message("s1", role="user", content="Thanks!")
+    db.append_message("s1", role="assistant", content="You're welcome!")
+
+    # Session 2: Telegram, gpt-4o, ended, 5 days ago
+    db.create_session(
+        session_id="s2", source="telegram",
+        model="gpt-4o", user_id="user1",
+    )
+    db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 's2'", (now - 5 * day,))
+    db.end_session("s2", end_reason="timeout")
+    db._conn.execute("UPDATE sessions SET ended_at = ? WHERE id = 's2'", (now - 5 * day + 1800,))
+    db.update_token_counts("s2", input_tokens=20000, output_tokens=8000)
+    db.append_message("s2", role="user", content="Search the web for something")
+    db.append_message("s2", role="assistant", content="Searching...",
+                      tool_calls=[{"function": {"name": "web_search"}}])
+    db.append_message("s2", role="tool", content="results...", tool_name="web_search")
+    db.append_message("s2", role="assistant", content="Here's what I found")
+
+    # Session 3: CLI, deepseek-chat, ended, 10 days ago
+    db.create_session(
+        session_id="s3", source="cli",
+        model="deepseek-chat", user_id="user1",
+    )
+    db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 's3'", (now - 10 * day,))
+    db.end_session("s3", end_reason="user_exit")
+    db._conn.execute("UPDATE sessions SET ended_at = ? WHERE id = 's3'", (now - 10 * day + 7200,))
+    db.update_token_counts("s3", input_tokens=100000, output_tokens=40000)
+    db.append_message("s3", role="user", content="Run this terminal command")
+    db.append_message("s3", role="assistant", content="Running...",
+                      tool_calls=[{"function": {"name": "terminal"}}])
+    db.append_message("s3", role="tool", content="output...", tool_name="terminal")
+    db.append_message("s3", role="assistant", content="Let me run another",
+                      tool_calls=[{"function": {"name": "terminal"}}])
+    db.append_message("s3", role="tool", content="more output...", tool_name="terminal")
+    db.append_message("s3", role="assistant", content="And search files",
+                      tool_calls=[{"function": {"name": "search_files"}}])
+    db.append_message("s3", role="tool", content="found stuff", tool_name="search_files")
+
+    # Session 4: Discord, same model as s1, ended, 1 day ago
+    db.create_session(
+        session_id="s4", source="discord",
+        model="anthropic/claude-sonnet-4-20250514", user_id="user2",
+    )
+    db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 's4'", (now - 1 * day,))
+    db.end_session("s4", end_reason="user_exit")
+    db._conn.execute("UPDATE sessions SET ended_at = ? WHERE id = 's4'", (now - 1 * day + 900,))
+    db.update_token_counts("s4", input_tokens=10000, output_tokens=5000)
+    db.append_message("s4", role="user", content="Quick question")
+    db.append_message("s4", role="assistant", content="Sure, go ahead")
+
+    # Session 5: Old session, 45 days ago (should be excluded from 30-day window)
+    db.create_session(
+        session_id="s_old", source="cli",
+        model="gpt-4o-mini", user_id="user1",
+    )
+    db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = 's_old'", (now - 45 * day,))
+    db.end_session("s_old", end_reason="user_exit")
+    db._conn.execute("UPDATE sessions SET ended_at = ? WHERE id = 's_old'", (now - 45 * day + 600,))
+    db.update_token_counts("s_old", input_tokens=5000, output_tokens=2000)
+    db.append_message("s_old", role="user", content="old message")
+    db.append_message("s_old", role="assistant", content="old reply")
+
+    db._conn.commit()
+    return db
+
+
+# =========================================================================
+# Pricing helpers
+# =========================================================================
+
+class TestPricing:
+    def test_exact_match(self):
+        pricing = _get_pricing("gpt-4o")
+        assert pricing["input"] == 2.50
+        assert pricing["output"] == 10.00
+
+    def test_provider_prefix_stripped(self):
+        pricing = _get_pricing("anthropic/claude-sonnet-4-20250514")
+        assert pricing["input"] == 3.00
+        assert pricing["output"] == 15.00
+
+    def test_prefix_match(self):
+        pricing = _get_pricing("claude-3-5-sonnet-20241022")
+        assert pricing["input"] == 3.00
+
+    def test_keyword_heuristic_opus(self):
+        pricing = _get_pricing("some-new-opus-model")
+        assert pricing["input"] == 15.00
+        assert pricing["output"] == 75.00
+
+    def test_keyword_heuristic_haiku(self):
+        pricing = _get_pricing("anthropic/claude-haiku-future")
+        assert pricing["input"] == 0.80
+
+    def test_unknown_model_returns_default(self):
+        pricing = _get_pricing("totally-unknown-model-xyz")
+        assert pricing == _DEFAULT_PRICING
+
+    def test_none_model(self):
+        pricing = _get_pricing(None)
+        assert pricing == _DEFAULT_PRICING
+
+    def test_empty_model(self):
+        pricing = _get_pricing("")
+        assert pricing == _DEFAULT_PRICING
+
+    def test_deepseek_heuristic(self):
+        pricing = _get_pricing("deepseek-v3")
+        assert pricing["input"] == 0.14
+
+    def test_gemini_heuristic(self):
+        pricing = _get_pricing("gemini-3.0-ultra")
+        assert pricing["input"] == 0.15
+
+
+class TestEstimateCost:
+    def test_basic_cost(self):
+        # gpt-4o: 2.50/M input, 10.00/M output
+        cost = _estimate_cost("gpt-4o", 1_000_000, 1_000_000)
+        assert cost == pytest.approx(12.50, abs=0.01)
+
+    def test_zero_tokens(self):
+        cost = _estimate_cost("gpt-4o", 0, 0)
+        assert cost == 0.0
+
+    def test_small_usage(self):
+        cost = _estimate_cost("gpt-4o", 1000, 500)
+        # 1000 * 2.50/1M + 500 * 10.00/1M = 0.0025 + 0.005 = 0.0075
+        assert cost == pytest.approx(0.0075, abs=0.0001)
+
+
+# =========================================================================
+# Format helpers
+# =========================================================================
+
+class TestFormatDuration:
+    def test_seconds(self):
+        assert _format_duration(45) == "45s"
+
+    def test_minutes(self):
+        assert _format_duration(300) == "5m"
+
+    def test_hours_with_minutes(self):
+        result = _format_duration(5400)  # 1.5 hours
+        assert result == "1h 30m"
+
+    def test_exact_hours(self):
+        assert _format_duration(7200) == "2h"
+
+    def test_days(self):
+        result = _format_duration(172800)  # 2 days
+        assert result == "2.0d"
+
+
+class TestBarChart:
+    def test_basic_bars(self):
+        bars = _bar_chart([10, 5, 0, 20], max_width=10)
+        assert len(bars) == 4
+        assert len(bars[3]) == 10  # max value gets full width
+        assert len(bars[0]) == 5   # half of max
+        assert bars[2] == ""       # zero gets empty
+
+    def test_empty_values(self):
+        bars = _bar_chart([], max_width=10)
+        assert bars == []
+
+    def test_all_zeros(self):
+        bars = _bar_chart([0, 0, 0], max_width=10)
+        assert all(b == "" for b in bars)
+
+    def test_single_value(self):
+        bars = _bar_chart([5], max_width=10)
+        assert len(bars) == 1
+        assert len(bars[0]) == 10
+
+
+# =========================================================================
+# InsightsEngine — empty DB
+# =========================================================================
+
+class TestInsightsEmpty:
+    def test_empty_db_returns_empty_report(self, db):
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert report["empty"] is True
+        assert report["overview"] == {}
+
+    def test_empty_db_terminal_format(self, db):
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        text = engine.format_terminal(report)
+        assert "No sessions found" in text
+
+    def test_empty_db_gateway_format(self, db):
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        text = engine.format_gateway(report)
+        assert "No sessions found" in text
+
+
+# =========================================================================
+# InsightsEngine — populated DB
+# =========================================================================
+
+class TestInsightsPopulated:
+    def test_generate_returns_all_sections(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+
+        assert report["empty"] is False
+        assert "overview" in report
+        assert "models" in report
+        assert "platforms" in report
+        assert "tools" in report
+        assert "activity" in report
+        assert "top_sessions" in report
+
+    def test_overview_session_count(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        overview = report["overview"]
+
+        # s1, s2, s3, s4 are within 30 days; s_old is 45 days ago
+        assert overview["total_sessions"] == 4
+
+    def test_overview_token_totals(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        overview = report["overview"]
+
+        expected_input = 50000 + 20000 + 100000 + 10000
+        expected_output = 15000 + 8000 + 40000 + 5000
+        assert overview["total_input_tokens"] == expected_input
+        assert overview["total_output_tokens"] == expected_output
+        assert overview["total_tokens"] == expected_input + expected_output
+
+    def test_overview_cost_positive(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        assert report["overview"]["estimated_cost"] > 0
+
+    def test_overview_duration_stats(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        overview = report["overview"]
+
+        # All 4 sessions have durations
+        assert overview["total_hours"] > 0
+        assert overview["avg_session_duration"] > 0
+
+    def test_model_breakdown(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        models = report["models"]
+
+        # Should have 3 distinct models (claude-sonnet x2, gpt-4o, deepseek-chat)
+        model_names = [m["model"] for m in models]
+        assert "claude-sonnet-4-20250514" in model_names
+        assert "gpt-4o" in model_names
+        assert "deepseek-chat" in model_names
+
+        # Claude-sonnet has 2 sessions (s1 + s4)
+        claude = next(m for m in models if "claude-sonnet" in m["model"])
+        assert claude["sessions"] == 2
+
+    def test_platform_breakdown(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        platforms = report["platforms"]
+
+        platform_names = [p["platform"] for p in platforms]
+        assert "cli" in platform_names
+        assert "telegram" in platform_names
+        assert "discord" in platform_names
+
+        cli = next(p for p in platforms if p["platform"] == "cli")
+        assert cli["sessions"] == 2  # s1 + s3
+
+    def test_tool_breakdown(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        tools = report["tools"]
+
+        tool_names = [t["tool"] for t in tools]
+        assert "terminal" in tool_names
+        assert "search_files" in tool_names
+        assert "read_file" in tool_names
+        assert "patch" in tool_names
+        assert "web_search" in tool_names
+
+        # terminal was used 2x in s3
+        terminal = next(t for t in tools if t["tool"] == "terminal")
+        assert terminal["count"] == 2
+
+        # Percentages should sum to ~100%
+        total_pct = sum(t["percentage"] for t in tools)
+        assert total_pct == pytest.approx(100.0, abs=0.1)
+
+    def test_activity_patterns(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        activity = report["activity"]
+
+        assert len(activity["by_day"]) == 7
+        assert len(activity["by_hour"]) == 24
+        assert activity["active_days"] >= 1
+        assert activity["busiest_day"] is not None
+        assert activity["busiest_hour"] is not None
+
+    def test_top_sessions(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        top = report["top_sessions"]
+
+        labels = [t["label"] for t in top]
+        assert "Longest session" in labels
+        assert "Most messages" in labels
+        assert "Most tokens" in labels
+        assert "Most tool calls" in labels
+
+    def test_source_filter_cli(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30, source="cli")
+
+        assert report["overview"]["total_sessions"] == 2  # s1, s3
+
+    def test_source_filter_telegram(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30, source="telegram")
+
+        assert report["overview"]["total_sessions"] == 1  # s2
+
+    def test_source_filter_nonexistent(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30, source="slack")
+
+        assert report["empty"] is True
+
+    def test_days_filter_short(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=3)
+
+        # Only s1 (2 days ago) and s4 (1 day ago) should be included
+        assert report["overview"]["total_sessions"] == 2
+
+    def test_days_filter_long(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=60)
+
+        # All 5 sessions should be included
+        assert report["overview"]["total_sessions"] == 5
+
+
+# =========================================================================
+# Formatting
+# =========================================================================
+
+class TestTerminalFormatting:
+    def test_terminal_format_has_sections(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_terminal(report)
+
+        assert "Hermes Insights" in text
+        assert "Overview" in text
+        assert "Models Used" in text
+        assert "Top Tools" in text
+        assert "Activity Patterns" in text
+        assert "Notable Sessions" in text
+
+    def test_terminal_format_shows_tokens(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_terminal(report)
+
+        assert "Input tokens" in text
+        assert "Output tokens" in text
+        assert "Est. cost" in text
+        assert "$" in text
+
+    def test_terminal_format_shows_platforms(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_terminal(report)
+
+        # Multi-platform, so Platforms section should show
+        assert "Platforms" in text
+        assert "cli" in text
+        assert "telegram" in text
+
+    def test_terminal_format_shows_bar_chart(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_terminal(report)
+
+        assert "█" in text  # Bar chart characters
+
+
+class TestGatewayFormatting:
+    def test_gateway_format_is_shorter(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        terminal_text = engine.format_terminal(report)
+        gateway_text = engine.format_gateway(report)
+
+        assert len(gateway_text) < len(terminal_text)
+
+    def test_gateway_format_has_bold(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_gateway(report)
+
+        assert "**" in text  # Markdown bold
+
+    def test_gateway_format_shows_cost(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_gateway(report)
+
+        assert "$" in text
+        assert "Est. cost" in text
+
+    def test_gateway_format_shows_models(self, populated_db):
+        engine = InsightsEngine(populated_db)
+        report = engine.generate(days=30)
+        text = engine.format_gateway(report)
+
+        assert "Models" in text
+        assert "sessions" in text
+
+
+# =========================================================================
+# Edge cases
+# =========================================================================
+
+class TestEdgeCases:
+    def test_session_with_no_tokens(self, db):
+        """Sessions with zero tokens should not crash."""
+        db.create_session(session_id="s1", source="cli", model="test-model")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert report["empty"] is False
+        assert report["overview"]["total_tokens"] == 0
+        assert report["overview"]["estimated_cost"] == 0.0
+
+    def test_session_with_no_end_time(self, db):
+        """Active (non-ended) sessions should be included but duration = 0."""
+        db.create_session(session_id="s1", source="cli", model="test-model")
+        db.update_token_counts("s1", input_tokens=1000, output_tokens=500)
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        # Session included
+        assert report["overview"]["total_sessions"] == 1
+        assert report["overview"]["total_tokens"] == 1500
+        # But no duration stats (session not ended)
+        assert report["overview"]["total_hours"] == 0
+
+    def test_session_with_no_model(self, db):
+        """Sessions with NULL model should not crash."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=1000, output_tokens=500)
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert report["empty"] is False
+
+        models = report["models"]
+        assert len(models) == 1
+        assert models[0]["model"] == "unknown"
+
+    def test_single_session_streak(self, db):
+        """Single session should have streak of 0 or 1."""
+        db.create_session(session_id="s1", source="cli", model="test")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert report["activity"]["max_streak"] <= 1
+
+    def test_no_tool_calls(self, db):
+        """Sessions with no tool calls should produce empty tools list."""
+        db.create_session(session_id="s1", source="cli", model="test")
+        db.append_message("s1", role="user", content="hello")
+        db.append_message("s1", role="assistant", content="hi there")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert report["tools"] == []
+
+    def test_only_one_platform(self, db):
+        """Single-platform usage should still work."""
+        db.create_session(session_id="s1", source="cli", model="test")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=30)
+        assert len(report["platforms"]) == 1
+        assert report["platforms"][0]["platform"] == "cli"
+
+        # Terminal format should NOT show platform section for single platform
+        text = engine.format_terminal(report)
+        # (it still shows platforms section if there's only cli and nothing else)
+        # Actually the condition is > 1 platforms OR non-cli, so single cli won't show
+
+    def test_large_days_value(self, db):
+        """Very large days value should not crash."""
+        db.create_session(session_id="s1", source="cli", model="test")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=365)
+        assert report["empty"] is False
+
+    def test_zero_days(self, db):
+        """Zero days should return empty (nothing is in the future)."""
+        db.create_session(session_id="s1", source="cli", model="test")
+        db._conn.commit()
+
+        engine = InsightsEngine(db)
+        report = engine.generate(days=0)
+        # Depending on timing, might catch the session if created <1s ago
+        # Just verify it doesn't crash
+        assert "empty" in report