fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs)
Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM needs the real ID to tag users. Redaction now only applies to WhatsApp, Signal, and Telegram where IDs are pure routing metadata. Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack.
This commit is contained in:
parent
c51e7b4af7
commit
9a423c3487
3 changed files with 42 additions and 2 deletions
|
|
@ -183,6 +183,16 @@ class SessionContext:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_PII_SAFE_PLATFORMS = frozenset({
|
||||||
|
Platform.WHATSAPP,
|
||||||
|
Platform.SIGNAL,
|
||||||
|
Platform.TELEGRAM,
|
||||||
|
})
|
||||||
|
"""Platforms where user IDs can be safely redacted (no in-message mention system
|
||||||
|
that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||||
|
and the LLM needs the real ID to tag users."""
|
||||||
|
|
||||||
|
|
||||||
def build_session_context_prompt(
|
def build_session_context_prompt(
|
||||||
context: SessionContext,
|
context: SessionContext,
|
||||||
*,
|
*,
|
||||||
|
|
@ -196,10 +206,14 @@ def build_session_context_prompt(
|
||||||
- What platforms are connected
|
- What platforms are connected
|
||||||
- Where it can deliver scheduled task outputs
|
- Where it can deliver scheduled task outputs
|
||||||
|
|
||||||
When *redact_pii* is True, phone numbers are stripped and user/chat IDs
|
When *redact_pii* is True **and** the source platform is in
|
||||||
|
``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs
|
||||||
are replaced with deterministic hashes before being sent to the LLM.
|
are replaced with deterministic hashes before being sent to the LLM.
|
||||||
|
Platforms like Discord are excluded because mentions need real IDs.
|
||||||
Routing still uses the original values (they stay in SessionSource).
|
Routing still uses the original values (they stay in SessionSource).
|
||||||
"""
|
"""
|
||||||
|
# Only apply redaction on platforms where IDs aren't needed for mentions
|
||||||
|
redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
|
||||||
lines = [
|
lines = [
|
||||||
"## Current Session Context",
|
"## Current Session Context",
|
||||||
"",
|
"",
|
||||||
|
|
|
||||||
|
|
@ -130,3 +130,27 @@ class TestBuildSessionContextPromptRedaction:
|
||||||
p1 = build_session_context_prompt(ctx1, redact_pii=True)
|
p1 = build_session_context_prompt(ctx1, redact_pii=True)
|
||||||
p2 = build_session_context_prompt(ctx2, redact_pii=True)
|
p2 = build_session_context_prompt(ctx2, redact_pii=True)
|
||||||
assert p1 != p2
|
assert p1 != p2
|
||||||
|
|
||||||
|
def test_discord_ids_not_redacted_even_with_flag(self):
|
||||||
|
"""Discord needs real IDs for <@user_id> mentions."""
|
||||||
|
ctx = _make_context(user_id="123456789", platform=Platform.DISCORD)
|
||||||
|
prompt = build_session_context_prompt(ctx, redact_pii=True)
|
||||||
|
assert "123456789" in prompt
|
||||||
|
|
||||||
|
def test_whatsapp_ids_redacted(self):
|
||||||
|
ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP)
|
||||||
|
prompt = build_session_context_prompt(ctx, redact_pii=True)
|
||||||
|
assert "+15551234567" not in prompt
|
||||||
|
assert "user_" in prompt
|
||||||
|
|
||||||
|
def test_signal_ids_redacted(self):
|
||||||
|
ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL)
|
||||||
|
prompt = build_session_context_prompt(ctx, redact_pii=True)
|
||||||
|
assert "+15551234567" not in prompt
|
||||||
|
assert "user_" in prompt
|
||||||
|
|
||||||
|
def test_slack_ids_not_redacted(self):
|
||||||
|
"""Slack may need IDs for mentions too."""
|
||||||
|
ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK)
|
||||||
|
prompt = build_session_context_prompt(ctx, redact_pii=True)
|
||||||
|
assert "U12345ABC" in prompt
|
||||||
|
|
|
||||||
|
|
@ -839,7 +839,7 @@ privacy:
|
||||||
redact_pii: false # Strip PII from LLM context (gateway only)
|
redact_pii: false # Strip PII from LLM context (gateway only)
|
||||||
```
|
```
|
||||||
|
|
||||||
When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM:
|
When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM on supported platforms:
|
||||||
|
|
||||||
| Field | Treatment |
|
| Field | Treatment |
|
||||||
|-------|-----------|
|
|-------|-----------|
|
||||||
|
|
@ -849,6 +849,8 @@ When `redact_pii` is `true`, the gateway redacts personally identifiable informa
|
||||||
| Home channel IDs | Numeric portion hashed |
|
| Home channel IDs | Numeric portion hashed |
|
||||||
| User names / usernames | **Not affected** (user-chosen, publicly visible) |
|
| User names / usernames | **Not affected** (user-chosen, publicly visible) |
|
||||||
|
|
||||||
|
**Platform support:** Redaction applies to WhatsApp, Signal, and Telegram. Discord and Slack are excluded because their mention systems (`<@user_id>`) require the real ID in the LLM context.
|
||||||
|
|
||||||
Hashes are deterministic — the same user always maps to the same hash, so the model can still distinguish between users in group chats. Routing and delivery use the original values internally.
|
Hashes are deterministic — the same user always maps to the same hash, so the model can still distinguish between users in group chats. Routing and delivery use the original values internally.
|
||||||
|
|
||||||
## Speech-to-Text (STT)
|
## Speech-to-Text (STT)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue