fix(cli): reasoning tag suppression during streaming + fix fallback detection
Fixes two issues found during live testing: 1. Reasoning tag suppression: close tags like </REASONING_SCRATCHPAD> that arrive split across stream tokens (e.g. '</REASONING_SCRATCH' + 'PAD>\n\nHello') were being lost because the buffer was discarded. Fix: keep a sliding window of the tail (max close tag length) so partial tags survive across tokens. 2. Streaming fallback detection was too broad — 'stream' matched any error containing that word (including 'stream_options' rejections). Narrowed to specific phrases: 'streaming is not', 'streaming not support', 'does not support stream', 'not available'. Verified with real API calls: streaming works end-to-end with reasoning block suppression, response box framing, and proper fallback to Rich Panel when streaming isn't active.
This commit is contained in:
parent
2219695d92
commit
ac739e485f
2 changed files with 82 additions and 4 deletions
79
cli.py
79
cli.py
|
|
@ -1416,12 +1416,86 @@ class HermesCLI:
|
|||
Receives text deltas from the agent as tokens arrive. Buffers
|
||||
partial lines and emits complete lines via _cprint to work
|
||||
reliably with prompt_toolkit's patch_stdout.
|
||||
|
||||
Reasoning/thinking blocks (<REASONING_SCRATCHPAD>, <think>, etc.)
|
||||
are suppressed during streaming since they'd display raw XML tags.
|
||||
The agent strips them from the final response anyway.
|
||||
"""
|
||||
if not text:
|
||||
return
|
||||
|
||||
# Open the response box header on the very first delta
|
||||
self._stream_started = True
|
||||
|
||||
# ── Tag-based reasoning suppression ──
|
||||
# Track whether we're inside a reasoning/thinking block.
|
||||
# These tags are model-generated (system prompt tells the model
|
||||
# to use them) and get stripped from final_response. We must
|
||||
# suppress them during streaming too.
|
||||
_OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>")
|
||||
_CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>")
|
||||
|
||||
# Append to a pre-filter buffer first
|
||||
self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
|
||||
|
||||
# Check if we're entering a reasoning block
|
||||
if not getattr(self, "_in_reasoning_block", False):
|
||||
for tag in _OPEN_TAGS:
|
||||
idx = self._stream_prefilt.find(tag)
|
||||
if idx != -1:
|
||||
# Emit everything before the tag
|
||||
before = self._stream_prefilt[:idx]
|
||||
if before:
|
||||
self._emit_stream_text(before)
|
||||
self._in_reasoning_block = True
|
||||
self._stream_prefilt = self._stream_prefilt[idx + len(tag):]
|
||||
break
|
||||
|
||||
# Could also be a partial open tag at the end — hold it back
|
||||
if not getattr(self, "_in_reasoning_block", False):
|
||||
# Check for partial tag match at the end
|
||||
safe = self._stream_prefilt
|
||||
for tag in _OPEN_TAGS:
|
||||
for i in range(1, len(tag)):
|
||||
if self._stream_prefilt.endswith(tag[:i]):
|
||||
safe = self._stream_prefilt[:-i]
|
||||
break
|
||||
if safe:
|
||||
self._emit_stream_text(safe)
|
||||
self._stream_prefilt = self._stream_prefilt[len(safe):]
|
||||
return
|
||||
|
||||
# Inside a reasoning block — look for close tag.
|
||||
# Keep accumulating _stream_prefilt because close tags can arrive
|
||||
# split across multiple tokens (e.g. "</REASONING_SCRATCH" + "PAD>...").
|
||||
if getattr(self, "_in_reasoning_block", False):
|
||||
for tag in _CLOSE_TAGS:
|
||||
idx = self._stream_prefilt.find(tag)
|
||||
if idx != -1:
|
||||
self._in_reasoning_block = False
|
||||
after = self._stream_prefilt[idx + len(tag):]
|
||||
self._stream_prefilt = ""
|
||||
# Process remaining text after close tag
|
||||
if after:
|
||||
self._emit_stream_text(after)
|
||||
return
|
||||
# Still inside reasoning block — keep only the tail that could
|
||||
# be a partial close tag prefix (save memory on long blocks).
|
||||
max_tag_len = max(len(t) for t in _CLOSE_TAGS)
|
||||
if len(self._stream_prefilt) > max_tag_len:
|
||||
self._stream_prefilt = self._stream_prefilt[-max_tag_len:]
|
||||
return
|
||||
|
||||
def _emit_stream_text(self, text: str) -> None:
|
||||
"""Emit filtered text to the streaming display."""
|
||||
if not text:
|
||||
return
|
||||
|
||||
# Open the response box header on the very first visible text
|
||||
if not self._stream_box_opened:
|
||||
# Strip leading whitespace/newlines before first visible content
|
||||
text = text.lstrip("\n")
|
||||
if not text:
|
||||
return
|
||||
self._stream_box_opened = True
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
|
|
@ -1433,7 +1507,6 @@ class HermesCLI:
|
|||
fill = w - 2 - len(label)
|
||||
_cprint(f"\n{_GOLD}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
|
||||
|
||||
self._stream_started = True
|
||||
self._stream_buf += text
|
||||
|
||||
# Emit complete lines, keep partial remainder in buffer
|
||||
|
|
@ -1457,6 +1530,8 @@ class HermesCLI:
|
|||
self._stream_buf = ""
|
||||
self._stream_started = False
|
||||
self._stream_box_opened = False
|
||||
self._stream_prefilt = ""
|
||||
self._in_reasoning_block = False
|
||||
|
||||
def _slow_command_status(self, command: str) -> str:
|
||||
"""Return a user-facing status message for slower slash commands."""
|
||||
|
|
|
|||
|
|
@ -3209,10 +3209,13 @@ class AIAgent:
|
|||
result["response"] = _call_chat_completions()
|
||||
except Exception as e:
|
||||
err_text = str(e).lower()
|
||||
# Fall back to non-streaming if provider doesn't support it
|
||||
# Fall back to non-streaming if provider doesn't support it.
|
||||
# Be specific in matching — "stream" alone is too broad and
|
||||
# catches unrelated errors like "stream_options" rejections.
|
||||
stream_unsupported = any(
|
||||
kw in err_text
|
||||
for kw in ("stream", "not support", "unsupported", "not available")
|
||||
for kw in ("streaming is not", "streaming not support",
|
||||
"does not support stream", "not available")
|
||||
)
|
||||
if stream_unsupported:
|
||||
logger.info("Streaming not supported by provider, falling back to non-streaming: %s", e)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue