From 177be32b7f9174bca7fceebba097b187aa1d9c5f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:23:19 -0800
Subject: [PATCH] feat(cli): add /usage command to display session token usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.
---
 cli.py                 | 38 ++++++++++++++++++++++++++++++++++++++
 hermes_cli/commands.py |  1 +
 run_agent.py           | 11 +++++++++++
 3 files changed, 50 insertions(+)

diff --git a/cli.py b/cli.py
index 7f2b160b..2081c7aa 100755
--- a/cli.py
+++ b/cli.py
@@ -1724,6 +1724,8 @@ class HermesCLI:
             self._toggle_verbose()
         elif cmd_lower == "/compress":
             self._manual_compress()
+        elif cmd_lower == "/usage":
+            self._show_usage()
         else:
             # Check for skill slash commands (/gif-search, /axolotl, etc.)
             base_cmd = cmd_lower.split()[0]
@@ -1800,6 +1802,42 @@ class HermesCLI:
         except Exception as e:
             print(f"  ❌ Compression failed: {e}")
 
+    def _show_usage(self):
+        """Show cumulative token usage for the current session."""
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        agent = self.agent
+        prompt = agent.session_prompt_tokens
+        completion = agent.session_completion_tokens
+        total = agent.session_total_tokens
+        calls = agent.session_api_calls
+
+        if calls == 0:
+            print("(._.) No API calls made yet in this session.")
+            return
+
+        # Current context window state
+        compressor = agent.context_compressor
+        last_prompt = compressor.last_prompt_tokens
+        ctx_len = compressor.context_length
+        pct = (last_prompt / ctx_len * 100) if ctx_len else 0
+        compressions = compressor.compression_count
+
+        msg_count = len(self.conversation_history)
+
+        print(f"  📊 Session Token Usage")
+        print(f"  {'─' * 40}")
+        print(f"  Prompt tokens (input):     {prompt:>10,}")
+        print(f"  Completion tokens (output): {completion:>9,}")
+        print(f"  Total tokens:              {total:>10,}")
+        print(f"  API calls:                 {calls:>10,}")
+        print(f"  {'─' * 40}")
+        print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
+        print(f"  Messages:         {msg_count}")
+        print(f"  Compressions:     {compressions}")
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 5de1c6bc..b091a790 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -27,6 +27,7 @@ COMMANDS = {
     "/platforms": "Show gateway/messaging platform status",
     "/verbose": "Cycle tool progress display: off → new → all → verbose",
     "/compress": "Manually compress conversation context (flush memories + summarize)",
+    "/usage": "Show token usage for the current session",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 
diff --git a/run_agent.py b/run_agent.py
index 32b69489..65dd3c2f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -535,6 +535,12 @@ class AIAgent:
         )
         self.compression_enabled = compression_enabled
         self._user_turn_count = 0
+
+        # Cumulative token usage for the session
+        self.session_prompt_tokens = 0
+        self.session_completion_tokens = 0
+        self.session_total_tokens = 0
+        self.session_api_calls = 0
         
         if not self.quiet_mode:
             if compression_enabled:
@@ -3105,6 +3111,11 @@ class AIAgent:
                             "total_tokens": total_tokens,
                         }
                         self.context_compressor.update_from_response(usage_dict)
+
+                        self.session_prompt_tokens += prompt_tokens
+                        self.session_completion_tokens += completion_tokens
+                        self.session_total_tokens += total_tokens
+                        self.session_api_calls += 1
                         
                         if self.verbose_logging:
                             logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")