fix: add title validation — sanitize, length limit, control char stripping
- Add SessionDB.sanitize_title() static method: - Strips ASCII control chars (null, bell, ESC, etc.) except whitespace - Strips problematic Unicode controls (zero-width, RTL override, BOM) - Collapses whitespace runs, strips edges - Normalizes empty/whitespace-only to None - Enforces 100 char max length (raises ValueError) - set_session_title() now calls sanitize_title() internally, so all call sites (CLI, gateway, auto-lineage) are protected - CLI /title handler sanitizes early to show correct feedback - Gateway /title handler sanitizes early to show correct feedback - 24 new tests: sanitize_title (17 cases covering control chars, zero-width, RTL, BOM, emoji, CJK, length, integration), gateway validation (too long, control chars, only-control-chars)
This commit is contained in:
parent
4fdd6c0dac
commit
34b4fe495e
5 changed files with 201 additions and 14 deletions
|
|
@ -122,6 +122,48 @@ class TestHandleTitleCommand:
|
|||
result = await runner._handle_title_command(event)
|
||||
assert "not available" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_title_too_long(self, tmp_path):
|
||||
"""Setting a title that exceeds max length returns error."""
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("test_session_123", "telegram")
|
||||
|
||||
runner = _make_runner(session_db=db)
|
||||
long_title = "A" * 150
|
||||
event = _make_event(text=f"/title {long_title}")
|
||||
result = await runner._handle_title_command(event)
|
||||
assert "too long" in result
|
||||
assert "⚠️" in result
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_title_control_chars_sanitized(self, tmp_path):
|
||||
"""Control characters are stripped and sanitized title is stored."""
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("test_session_123", "telegram")
|
||||
|
||||
runner = _make_runner(session_db=db)
|
||||
event = _make_event(text="/title hello\x00world")
|
||||
result = await runner._handle_title_command(event)
|
||||
assert "helloworld" in result
|
||||
assert db.get_session_title("test_session_123") == "helloworld"
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_title_only_control_chars(self, tmp_path):
|
||||
"""Title with only control chars returns empty error."""
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("test_session_123", "telegram")
|
||||
|
||||
runner = _make_runner(session_db=db)
|
||||
event = _make_event(text="/title \x00\x01\x02")
|
||||
result = await runner._handle_title_command(event)
|
||||
assert "empty after cleanup" in result
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_works_across_platforms(self, tmp_path):
|
||||
"""The /title command works for Discord, Slack, and WhatsApp too."""
|
||||
|
|
|
|||
|
|
@ -435,6 +435,89 @@ class TestSessionTitle:
|
|||
assert session["ended_at"] is not None
|
||||
|
||||
|
||||
class TestSanitizeTitle:
|
||||
"""Tests for SessionDB.sanitize_title() validation and cleaning."""
|
||||
|
||||
def test_normal_title_unchanged(self):
|
||||
assert SessionDB.sanitize_title("My Project") == "My Project"
|
||||
|
||||
def test_strips_whitespace(self):
|
||||
assert SessionDB.sanitize_title(" hello world ") == "hello world"
|
||||
|
||||
def test_collapses_internal_whitespace(self):
|
||||
assert SessionDB.sanitize_title("hello world") == "hello world"
|
||||
|
||||
def test_tabs_and_newlines_collapsed(self):
|
||||
assert SessionDB.sanitize_title("hello\t\nworld") == "hello world"
|
||||
|
||||
def test_none_returns_none(self):
|
||||
assert SessionDB.sanitize_title(None) is None
|
||||
|
||||
def test_empty_string_returns_none(self):
|
||||
assert SessionDB.sanitize_title("") is None
|
||||
|
||||
def test_whitespace_only_returns_none(self):
|
||||
assert SessionDB.sanitize_title(" \t\n ") is None
|
||||
|
||||
def test_control_chars_stripped(self):
|
||||
# Null byte, bell, backspace, etc.
|
||||
assert SessionDB.sanitize_title("hello\x00world") == "helloworld"
|
||||
assert SessionDB.sanitize_title("\x07\x08test\x1b") == "test"
|
||||
|
||||
def test_del_char_stripped(self):
|
||||
assert SessionDB.sanitize_title("hello\x7fworld") == "helloworld"
|
||||
|
||||
def test_zero_width_chars_stripped(self):
|
||||
# Zero-width space (U+200B), zero-width joiner (U+200D)
|
||||
assert SessionDB.sanitize_title("hello\u200bworld") == "helloworld"
|
||||
assert SessionDB.sanitize_title("hello\u200dworld") == "helloworld"
|
||||
|
||||
def test_rtl_override_stripped(self):
|
||||
# Right-to-left override (U+202E) — used in filename spoofing attacks
|
||||
assert SessionDB.sanitize_title("hello\u202eworld") == "helloworld"
|
||||
|
||||
def test_bom_stripped(self):
|
||||
# Byte order mark (U+FEFF)
|
||||
assert SessionDB.sanitize_title("\ufeffhello") == "hello"
|
||||
|
||||
def test_only_control_chars_returns_none(self):
|
||||
assert SessionDB.sanitize_title("\x00\x01\x02\u200b\ufeff") is None
|
||||
|
||||
def test_max_length_allowed(self):
|
||||
title = "A" * 100
|
||||
assert SessionDB.sanitize_title(title) == title
|
||||
|
||||
def test_exceeds_max_length_raises(self):
|
||||
title = "A" * 101
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
SessionDB.sanitize_title(title)
|
||||
|
||||
def test_unicode_emoji_allowed(self):
|
||||
assert SessionDB.sanitize_title("🚀 My Project 🎉") == "🚀 My Project 🎉"
|
||||
|
||||
def test_cjk_characters_allowed(self):
|
||||
assert SessionDB.sanitize_title("我的项目") == "我的项目"
|
||||
|
||||
def test_accented_characters_allowed(self):
|
||||
assert SessionDB.sanitize_title("Résumé éditing") == "Résumé éditing"
|
||||
|
||||
def test_special_punctuation_allowed(self):
|
||||
title = "PR #438 — fixing the 'auth' middleware"
|
||||
assert SessionDB.sanitize_title(title) == title
|
||||
|
||||
def test_sanitize_applied_in_set_session_title(self, db):
|
||||
"""set_session_title applies sanitize_title internally."""
|
||||
db.create_session("s1", "cli")
|
||||
db.set_session_title("s1", " hello\x00 world ")
|
||||
assert db.get_session("s1")["title"] == "hello world"
|
||||
|
||||
def test_too_long_title_rejected_by_set(self, db):
|
||||
"""set_session_title raises ValueError for overly long titles."""
|
||||
db.create_session("s1", "cli")
|
||||
with pytest.raises(ValueError, match="too long"):
|
||||
db.set_session_title("s1", "X" * 150)
|
||||
|
||||
|
||||
class TestSchemaInit:
|
||||
def test_wal_mode(self, db):
|
||||
cursor = db._conn.execute("PRAGMA journal_mode")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue