feat: add voice channel awareness — inject participant and speaking state into agent context
This commit is contained in:
parent
49f3f0fc62
commit
1ad5e0ed15
3 changed files with 230 additions and 6 deletions
|
|
@ -441,6 +441,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
intents.dm_messages = True
|
intents.dm_messages = True
|
||||||
intents.guild_messages = True
|
intents.guild_messages = True
|
||||||
intents.members = True
|
intents.members = True
|
||||||
|
intents.voice_states = True
|
||||||
|
|
||||||
# Create bot
|
# Create bot
|
||||||
self._client = commands.Bot(
|
self._client = commands.Bot(
|
||||||
|
|
@ -494,7 +495,40 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
# "all" falls through to handle_message
|
# "all" falls through to handle_message
|
||||||
|
|
||||||
await self._handle_message(message)
|
await self._handle_message(message)
|
||||||
|
|
||||||
|
@self._client.event
|
||||||
|
async def on_voice_state_update(member, before, after):
|
||||||
|
"""Track voice channel join/leave events."""
|
||||||
|
# Only track channels where the bot is connected
|
||||||
|
bot_guild_ids = set(adapter_self._voice_clients.keys())
|
||||||
|
if not bot_guild_ids:
|
||||||
|
return
|
||||||
|
guild_id = member.guild.id
|
||||||
|
if guild_id not in bot_guild_ids:
|
||||||
|
return
|
||||||
|
# Ignore the bot itself
|
||||||
|
if member == adapter_self._client.user:
|
||||||
|
return
|
||||||
|
|
||||||
|
joined = before.channel is None and after.channel is not None
|
||||||
|
left = before.channel is not None and after.channel is None
|
||||||
|
switched = (
|
||||||
|
before.channel is not None
|
||||||
|
and after.channel is not None
|
||||||
|
and before.channel != after.channel
|
||||||
|
)
|
||||||
|
|
||||||
|
if joined or left or switched:
|
||||||
|
logger.info(
|
||||||
|
"Voice state: %s (%d) %s (guild %d)",
|
||||||
|
member.display_name,
|
||||||
|
member.id,
|
||||||
|
"joined " + after.channel.name if joined
|
||||||
|
else "left " + before.channel.name if left
|
||||||
|
else f"moved {before.channel.name} -> {after.channel.name}",
|
||||||
|
guild_id,
|
||||||
|
)
|
||||||
|
|
||||||
# Register slash commands
|
# Register slash commands
|
||||||
self._register_slash_commands()
|
self._register_slash_commands()
|
||||||
|
|
||||||
|
|
@ -864,6 +898,75 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
vc = self._voice_clients.get(guild_id)
|
vc = self._voice_clients.get(guild_id)
|
||||||
return vc is not None and vc.is_connected()
|
return vc is not None and vc.is_connected()
|
||||||
|
|
||||||
|
def get_voice_channel_info(self, guild_id: int) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Return voice channel awareness info for the given guild.
|
||||||
|
|
||||||
|
Returns None if the bot is not in a voice channel. Otherwise
|
||||||
|
returns a dict with channel name, member list, count, and
|
||||||
|
currently-speaking user IDs (from SSRC mapping).
|
||||||
|
"""
|
||||||
|
vc = self._voice_clients.get(guild_id)
|
||||||
|
if not vc or not vc.is_connected():
|
||||||
|
return None
|
||||||
|
|
||||||
|
channel = vc.channel
|
||||||
|
if not channel:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Members currently in the voice channel (includes bot)
|
||||||
|
members_info = []
|
||||||
|
bot_user = self._client.user if self._client else None
|
||||||
|
for m in channel.members:
|
||||||
|
if bot_user and m.id == bot_user.id:
|
||||||
|
continue # skip the bot itself
|
||||||
|
members_info.append({
|
||||||
|
"user_id": m.id,
|
||||||
|
"display_name": m.display_name,
|
||||||
|
"is_bot": m.bot,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Currently speaking users (from SSRC mapping + active buffers)
|
||||||
|
speaking_user_ids: set = set()
|
||||||
|
receiver = self._voice_receivers.get(guild_id)
|
||||||
|
if receiver:
|
||||||
|
import time as _time
|
||||||
|
now = _time.monotonic()
|
||||||
|
with receiver._lock:
|
||||||
|
for ssrc, last_t in receiver._last_packet_time.items():
|
||||||
|
# Consider "speaking" if audio received within last 2 seconds
|
||||||
|
if now - last_t < 2.0:
|
||||||
|
uid = receiver._ssrc_to_user.get(ssrc)
|
||||||
|
if uid:
|
||||||
|
speaking_user_ids.add(uid)
|
||||||
|
|
||||||
|
# Tag speaking status on members
|
||||||
|
for info in members_info:
|
||||||
|
info["is_speaking"] = info["user_id"] in speaking_user_ids
|
||||||
|
|
||||||
|
return {
|
||||||
|
"channel_name": channel.name,
|
||||||
|
"member_count": len(members_info),
|
||||||
|
"members": members_info,
|
||||||
|
"speaking_count": len(speaking_user_ids),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_voice_channel_context(self, guild_id: int) -> str:
|
||||||
|
"""Return a human-readable voice channel context string.
|
||||||
|
|
||||||
|
Suitable for injection into the system/ephemeral prompt so the
|
||||||
|
agent is always aware of voice channel state.
|
||||||
|
"""
|
||||||
|
info = self.get_voice_channel_info(guild_id)
|
||||||
|
if not info:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
parts = [f"[Voice channel: #{info['channel_name']} — {info['member_count']} participant(s)]"]
|
||||||
|
for m in info["members"]:
|
||||||
|
status = " (speaking)" if m["is_speaking"] else ""
|
||||||
|
parts.append(f" - {m['display_name']}{status}")
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Voice listening (Phase 2)
|
# Voice listening (Phase 2)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1408,6 +1408,19 @@ class GatewayRunner:
|
||||||
f"or ignore to skip."
|
f"or ignore to skip."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
# Voice channel awareness — inject current voice channel state
|
||||||
|
# into context so the agent knows who is in the channel and who
|
||||||
|
# is speaking, without needing a separate tool call.
|
||||||
|
# -----------------------------------------------------------------
|
||||||
|
if source.platform == Platform.DISCORD:
|
||||||
|
adapter = self.adapters.get(Platform.DISCORD)
|
||||||
|
guild_id = self._get_guild_id(event)
|
||||||
|
if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
|
||||||
|
vc_context = adapter.get_voice_channel_context(guild_id)
|
||||||
|
if vc_context:
|
||||||
|
context_prompt += f"\n\n{vc_context}"
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
# Auto-analyze images sent by the user
|
# Auto-analyze images sent by the user
|
||||||
#
|
#
|
||||||
|
|
@ -2156,11 +2169,18 @@ class GatewayRunner:
|
||||||
# Append voice channel info if connected
|
# Append voice channel info if connected
|
||||||
adapter = self.adapters.get(event.source.platform)
|
adapter = self.adapters.get(event.source.platform)
|
||||||
guild_id = self._get_guild_id(event)
|
guild_id = self._get_guild_id(event)
|
||||||
if guild_id and hasattr(adapter, "is_in_voice_channel"):
|
if guild_id and hasattr(adapter, "get_voice_channel_info"):
|
||||||
if adapter.is_in_voice_channel(guild_id):
|
info = adapter.get_voice_channel_info(guild_id)
|
||||||
vc = adapter._voice_clients.get(guild_id)
|
if info:
|
||||||
ch_name = vc.channel.name if vc and vc.channel else "unknown"
|
lines = [
|
||||||
return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}"
|
f"Voice mode: {labels.get(mode, mode)}",
|
||||||
|
f"Voice channel: #{info['channel_name']}",
|
||||||
|
f"Participants: {info['member_count']}",
|
||||||
|
]
|
||||||
|
for m in info["members"]:
|
||||||
|
status = " (speaking)" if m.get("is_speaking") else ""
|
||||||
|
lines.append(f" - {m['display_name']}{status}")
|
||||||
|
return "\n".join(lines)
|
||||||
return f"Voice mode: {labels.get(mode, mode)}"
|
return f"Voice mode: {labels.get(mode, mode)}"
|
||||||
else:
|
else:
|
||||||
# Toggle: off → on, on/all → off
|
# Toggle: off → on, on/all → off
|
||||||
|
|
|
||||||
|
|
@ -1852,3 +1852,104 @@ class TestAutoTtsTempFileCleanup:
|
||||||
assert finally_idx > 0, "play_tts must be in a try/finally block"
|
assert finally_idx > 0, "play_tts must be in a try/finally block"
|
||||||
assert remove_idx > 0, "finally block must call os.remove on _tts_path"
|
assert remove_idx > 0, "finally block must call os.remove on _tts_path"
|
||||||
assert remove_idx > finally_idx, "os.remove must be inside the finally block"
|
assert remove_idx > finally_idx, "os.remove must be inside the finally block"
|
||||||
|
|
||||||
|
|
||||||
|
# =====================================================================
|
||||||
|
# Voice channel awareness (get_voice_channel_info / context)
|
||||||
|
# =====================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestVoiceChannelAwareness:
|
||||||
|
"""Tests for get_voice_channel_info() and get_voice_channel_context()."""
|
||||||
|
|
||||||
|
def _make_adapter(self):
|
||||||
|
from gateway.platforms.discord import DiscordAdapter
|
||||||
|
from gateway.config import PlatformConfig
|
||||||
|
config = PlatformConfig(enabled=True, extra={})
|
||||||
|
config.token = "fake-token"
|
||||||
|
adapter = object.__new__(DiscordAdapter)
|
||||||
|
adapter._voice_clients = {}
|
||||||
|
adapter._voice_text_channels = {}
|
||||||
|
adapter._voice_receivers = {}
|
||||||
|
adapter._client = MagicMock()
|
||||||
|
adapter._client.user = SimpleNamespace(id=99999, name="HermesBot")
|
||||||
|
return adapter
|
||||||
|
|
||||||
|
def _make_member(self, user_id, display_name, is_bot=False):
|
||||||
|
return SimpleNamespace(
|
||||||
|
id=user_id, display_name=display_name, bot=is_bot,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_returns_none_when_not_connected(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
assert adapter.get_voice_channel_info(111) is None
|
||||||
|
|
||||||
|
def test_returns_none_when_vc_disconnected(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
vc = MagicMock()
|
||||||
|
vc.is_connected.return_value = False
|
||||||
|
adapter._voice_clients[111] = vc
|
||||||
|
assert adapter.get_voice_channel_info(111) is None
|
||||||
|
|
||||||
|
def test_returns_info_with_members(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
vc = MagicMock()
|
||||||
|
vc.is_connected.return_value = True
|
||||||
|
bot_member = self._make_member(99999, "HermesBot", is_bot=True)
|
||||||
|
user_a = self._make_member(1001, "Alice")
|
||||||
|
user_b = self._make_member(1002, "Bob")
|
||||||
|
vc.channel.name = "general-voice"
|
||||||
|
vc.channel.members = [bot_member, user_a, user_b]
|
||||||
|
adapter._voice_clients[111] = vc
|
||||||
|
|
||||||
|
info = adapter.get_voice_channel_info(111)
|
||||||
|
assert info is not None
|
||||||
|
assert info["channel_name"] == "general-voice"
|
||||||
|
assert info["member_count"] == 2 # bot excluded
|
||||||
|
names = [m["display_name"] for m in info["members"]]
|
||||||
|
assert "Alice" in names
|
||||||
|
assert "Bob" in names
|
||||||
|
assert "HermesBot" not in names
|
||||||
|
|
||||||
|
def test_speaking_detection(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
vc = MagicMock()
|
||||||
|
vc.is_connected.return_value = True
|
||||||
|
user_a = self._make_member(1001, "Alice")
|
||||||
|
user_b = self._make_member(1002, "Bob")
|
||||||
|
vc.channel.name = "voice"
|
||||||
|
vc.channel.members = [user_a, user_b]
|
||||||
|
adapter._voice_clients[111] = vc
|
||||||
|
|
||||||
|
# Set up a mock receiver with Alice speaking
|
||||||
|
import time as _time
|
||||||
|
receiver = MagicMock()
|
||||||
|
receiver._lock = threading.Lock()
|
||||||
|
receiver._last_packet_time = {100: _time.monotonic()} # ssrc 100 is active
|
||||||
|
receiver._ssrc_to_user = {100: 1001} # ssrc 100 -> Alice
|
||||||
|
adapter._voice_receivers[111] = receiver
|
||||||
|
|
||||||
|
info = adapter.get_voice_channel_info(111)
|
||||||
|
alice = [m for m in info["members"] if m["display_name"] == "Alice"][0]
|
||||||
|
bob = [m for m in info["members"] if m["display_name"] == "Bob"][0]
|
||||||
|
assert alice["is_speaking"] is True
|
||||||
|
assert bob["is_speaking"] is False
|
||||||
|
assert info["speaking_count"] == 1
|
||||||
|
|
||||||
|
def test_context_string_format(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
vc = MagicMock()
|
||||||
|
vc.is_connected.return_value = True
|
||||||
|
user_a = self._make_member(1001, "Alice")
|
||||||
|
vc.channel.name = "chat-room"
|
||||||
|
vc.channel.members = [user_a]
|
||||||
|
adapter._voice_clients[111] = vc
|
||||||
|
|
||||||
|
ctx = adapter.get_voice_channel_context(111)
|
||||||
|
assert "#chat-room" in ctx
|
||||||
|
assert "1 participant" in ctx
|
||||||
|
assert "Alice" in ctx
|
||||||
|
|
||||||
|
def test_context_empty_when_not_connected(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
assert adapter.get_voice_channel_context(111) == ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue