feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled
Add privacy.redact_pii config option (boolean, default false). When enabled, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM provider: - Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256> - User IDs → hashed to user_<sha256> - Chat IDs → numeric portion hashed, platform prefix preserved - Home channel IDs → hashed - Names/usernames → NOT affected (user-chosen, publicly visible) Hashes are deterministic (same user → same hash) so the model can still distinguish users in group chats. Routing and delivery use the original values internally — redaction only affects LLM context. Inspired by OpenClaw PR #47959.
This commit is contained in:
parent
7d2c786acc
commit
c51e7b4af7
6 changed files with 252 additions and 6 deletions
|
|
@ -1452,8 +1452,17 @@ class GatewayRunner:
|
|||
# Set environment variables for tools
|
||||
self._set_session_env(context)
|
||||
|
||||
# Read privacy.redact_pii from config (re-read per message)
|
||||
_redact_pii = False
|
||||
try:
|
||||
with open(_config_path, encoding="utf-8") as _pf:
|
||||
_pcfg = yaml.safe_load(_pf) or {}
|
||||
_redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build the context prompt to inject
|
||||
context_prompt = build_session_context_prompt(context)
|
||||
context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
|
||||
|
||||
# If the previous session expired and was auto-reset, prepend a notice
|
||||
# so the agent knows this is a fresh conversation (not an intentional /reset).
|
||||
|
|
|
|||
|
|
@ -8,9 +8,11 @@ Handles:
|
|||
- Dynamic system prompt injection (agent knows its context)
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
|
@ -19,6 +21,41 @@ from typing import Dict, List, Optional, Any
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PII redaction helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$")
|
||||
|
||||
|
||||
def _hash_id(value: str) -> str:
|
||||
"""Deterministic 12-char hex hash of an identifier."""
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
|
||||
def _hash_sender_id(value: str) -> str:
|
||||
"""Hash a sender ID to ``user_<12hex>``."""
|
||||
return f"user_{_hash_id(value)}"
|
||||
|
||||
|
||||
def _hash_chat_id(value: str) -> str:
|
||||
"""Hash the numeric portion of a chat ID, preserving platform prefix.
|
||||
|
||||
``telegram:12345`` → ``telegram:<hash>``
|
||||
``12345`` → ``<hash>``
|
||||
"""
|
||||
colon = value.find(":")
|
||||
if colon > 0:
|
||||
prefix = value[:colon]
|
||||
return f"{prefix}:{_hash_id(value[colon + 1:])}"
|
||||
return _hash_id(value)
|
||||
|
||||
|
||||
def _looks_like_phone(value: str) -> bool:
|
||||
"""Return True if *value* looks like a phone number (E.164 or similar)."""
|
||||
return bool(_PHONE_RE.match(value.strip()))
|
||||
|
||||
from .config import (
|
||||
Platform,
|
||||
GatewayConfig,
|
||||
|
|
@ -146,7 +183,11 @@ class SessionContext:
|
|||
}
|
||||
|
||||
|
||||
def build_session_context_prompt(context: SessionContext) -> str:
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
redact_pii: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Build the dynamic system prompt section that tells the agent about its context.
|
||||
|
||||
|
|
@ -154,6 +195,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
|||
- Where messages are coming from
|
||||
- What platforms are connected
|
||||
- Where it can deliver scheduled task outputs
|
||||
|
||||
When *redact_pii* is True, phone numbers are stripped and user/chat IDs
|
||||
are replaced with deterministic hashes before being sent to the LLM.
|
||||
Routing still uses the original values (they stay in SessionSource).
|
||||
"""
|
||||
lines = [
|
||||
"## Current Session Context",
|
||||
|
|
@ -165,7 +210,25 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
|||
if context.source.platform == Platform.LOCAL:
|
||||
lines.append(f"**Source:** {platform_name} (the machine running this agent)")
|
||||
else:
|
||||
lines.append(f"**Source:** {platform_name} ({context.source.description})")
|
||||
# Build a description that respects PII redaction
|
||||
src = context.source
|
||||
if redact_pii:
|
||||
# Build a safe description without raw IDs
|
||||
_uname = src.user_name or (
|
||||
_hash_sender_id(src.user_id) if src.user_id else "user"
|
||||
)
|
||||
_cname = src.chat_name or _hash_chat_id(src.chat_id)
|
||||
if src.chat_type == "dm":
|
||||
desc = f"DM with {_uname}"
|
||||
elif src.chat_type == "group":
|
||||
desc = f"group: {_cname}"
|
||||
elif src.chat_type == "channel":
|
||||
desc = f"channel: {_cname}"
|
||||
else:
|
||||
desc = _cname
|
||||
else:
|
||||
desc = src.description
|
||||
lines.append(f"**Source:** {platform_name} ({desc})")
|
||||
|
||||
# Channel topic (if available - provides context about the channel's purpose)
|
||||
if context.source.chat_topic:
|
||||
|
|
@ -175,7 +238,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
|||
if context.source.user_name:
|
||||
lines.append(f"**User:** {context.source.user_name}")
|
||||
elif context.source.user_id:
|
||||
lines.append(f"**User ID:** {context.source.user_id}")
|
||||
uid = context.source.user_id
|
||||
if redact_pii:
|
||||
uid = _hash_sender_id(uid)
|
||||
lines.append(f"**User ID:** {uid}")
|
||||
|
||||
# Platform-specific behavioral notes
|
||||
if context.source.platform == Platform.SLACK:
|
||||
|
|
@ -210,7 +276,8 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
|||
lines.append("")
|
||||
lines.append("**Home Channels (default destinations):**")
|
||||
for platform, home in context.home_channels.items():
|
||||
lines.append(f" - {platform.value}: {home.name} (ID: {home.chat_id})")
|
||||
hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
|
||||
lines.append(f" - {platform.value}: {home.name} (ID: {hc_id})")
|
||||
|
||||
# Delivery options for scheduled tasks
|
||||
lines.append("")
|
||||
|
|
@ -220,7 +287,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
|
|||
if context.source.platform == Platform.LOCAL:
|
||||
lines.append("- `\"origin\"` → Local output (saved to files)")
|
||||
else:
|
||||
lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})")
|
||||
_origin_label = context.source.chat_name or (
|
||||
_hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
|
||||
)
|
||||
lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
|
||||
|
||||
# Local always available
|
||||
lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue