diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 47ab6e21..142304d5 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -441,6 +441,7 @@ class DiscordAdapter(BasePlatformAdapter): intents.dm_messages = True intents.guild_messages = True intents.members = True + intents.voice_states = True # Create bot self._client = commands.Bot( @@ -494,7 +495,40 @@ class DiscordAdapter(BasePlatformAdapter): # "all" falls through to handle_message await self._handle_message(message) - + + @self._client.event + async def on_voice_state_update(member, before, after): + """Track voice channel join/leave events.""" + # Only track channels where the bot is connected + bot_guild_ids = set(adapter_self._voice_clients.keys()) + if not bot_guild_ids: + return + guild_id = member.guild.id + if guild_id not in bot_guild_ids: + return + # Ignore the bot itself + if member == adapter_self._client.user: + return + + joined = before.channel is None and after.channel is not None + left = before.channel is not None and after.channel is None + switched = ( + before.channel is not None + and after.channel is not None + and before.channel != after.channel + ) + + if joined or left or switched: + logger.info( + "Voice state: %s (%d) %s (guild %d)", + member.display_name, + member.id, + "joined " + after.channel.name if joined + else "left " + before.channel.name if left + else f"moved {before.channel.name} -> {after.channel.name}", + guild_id, + ) + # Register slash commands self._register_slash_commands() @@ -864,6 +898,75 @@ class DiscordAdapter(BasePlatformAdapter): vc = self._voice_clients.get(guild_id) return vc is not None and vc.is_connected() + def get_voice_channel_info(self, guild_id: int) -> Optional[Dict[str, Any]]: + """Return voice channel awareness info for the given guild. + + Returns None if the bot is not in a voice channel. Otherwise + returns a dict with channel name, member list, count, and + currently-speaking user IDs (from SSRC mapping). + """ + vc = self._voice_clients.get(guild_id) + if not vc or not vc.is_connected(): + return None + + channel = vc.channel + if not channel: + return None + + # Members currently in the voice channel (includes bot) + members_info = [] + bot_user = self._client.user if self._client else None + for m in channel.members: + if bot_user and m.id == bot_user.id: + continue # skip the bot itself + members_info.append({ + "user_id": m.id, + "display_name": m.display_name, + "is_bot": m.bot, + }) + + # Currently speaking users (from SSRC mapping + active buffers) + speaking_user_ids: set = set() + receiver = self._voice_receivers.get(guild_id) + if receiver: + import time as _time + now = _time.monotonic() + with receiver._lock: + for ssrc, last_t in receiver._last_packet_time.items(): + # Consider "speaking" if audio received within last 2 seconds + if now - last_t < 2.0: + uid = receiver._ssrc_to_user.get(ssrc) + if uid: + speaking_user_ids.add(uid) + + # Tag speaking status on members + for info in members_info: + info["is_speaking"] = info["user_id"] in speaking_user_ids + + return { + "channel_name": channel.name, + "member_count": len(members_info), + "members": members_info, + "speaking_count": len(speaking_user_ids), + } + + def get_voice_channel_context(self, guild_id: int) -> str: + """Return a human-readable voice channel context string. + + Suitable for injection into the system/ephemeral prompt so the + agent is always aware of voice channel state. + """ + info = self.get_voice_channel_info(guild_id) + if not info: + return "" + + parts = [f"[Voice channel: #{info['channel_name']} — {info['member_count']} participant(s)]"] + for m in info["members"]: + status = " (speaking)" if m["is_speaking"] else "" + parts.append(f" - {m['display_name']}{status}") + + return "\n".join(parts) + # ------------------------------------------------------------------ # Voice listening (Phase 2) # ------------------------------------------------------------------ diff --git a/gateway/run.py b/gateway/run.py index a24efe01..173b2551 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1408,6 +1408,19 @@ class GatewayRunner: f"or ignore to skip." ) + # ----------------------------------------------------------------- + # Voice channel awareness — inject current voice channel state + # into context so the agent knows who is in the channel and who + # is speaking, without needing a separate tool call. + # ----------------------------------------------------------------- + if source.platform == Platform.DISCORD: + adapter = self.adapters.get(Platform.DISCORD) + guild_id = self._get_guild_id(event) + if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"): + vc_context = adapter.get_voice_channel_context(guild_id) + if vc_context: + context_prompt += f"\n\n{vc_context}" + # ----------------------------------------------------------------- # Auto-analyze images sent by the user # @@ -2156,11 +2169,18 @@ class GatewayRunner: # Append voice channel info if connected adapter = self.adapters.get(event.source.platform) guild_id = self._get_guild_id(event) - if guild_id and hasattr(adapter, "is_in_voice_channel"): - if adapter.is_in_voice_channel(guild_id): - vc = adapter._voice_clients.get(guild_id) - ch_name = vc.channel.name if vc and vc.channel else "unknown" - return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}" + if guild_id and hasattr(adapter, "get_voice_channel_info"): + info = adapter.get_voice_channel_info(guild_id) + if info: + lines = [ + f"Voice mode: {labels.get(mode, mode)}", + f"Voice channel: #{info['channel_name']}", + f"Participants: {info['member_count']}", + ] + for m in info["members"]: + status = " (speaking)" if m.get("is_speaking") else "" + lines.append(f" - {m['display_name']}{status}") + return "\n".join(lines) return f"Voice mode: {labels.get(mode, mode)}" else: # Toggle: off → on, on/all → off diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index e6fc2731..45595d35 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -1852,3 +1852,104 @@ class TestAutoTtsTempFileCleanup: assert finally_idx > 0, "play_tts must be in a try/finally block" assert remove_idx > 0, "finally block must call os.remove on _tts_path" assert remove_idx > finally_idx, "os.remove must be inside the finally block" + + +# ===================================================================== +# Voice channel awareness (get_voice_channel_info / context) +# ===================================================================== + + +class TestVoiceChannelAwareness: + """Tests for get_voice_channel_info() and get_voice_channel_context().""" + + def _make_adapter(self): + from gateway.platforms.discord import DiscordAdapter + from gateway.config import PlatformConfig + config = PlatformConfig(enabled=True, extra={}) + config.token = "fake-token" + adapter = object.__new__(DiscordAdapter) + adapter._voice_clients = {} + adapter._voice_text_channels = {} + adapter._voice_receivers = {} + adapter._client = MagicMock() + adapter._client.user = SimpleNamespace(id=99999, name="HermesBot") + return adapter + + def _make_member(self, user_id, display_name, is_bot=False): + return SimpleNamespace( + id=user_id, display_name=display_name, bot=is_bot, + ) + + def test_returns_none_when_not_connected(self): + adapter = self._make_adapter() + assert adapter.get_voice_channel_info(111) is None + + def test_returns_none_when_vc_disconnected(self): + adapter = self._make_adapter() + vc = MagicMock() + vc.is_connected.return_value = False + adapter._voice_clients[111] = vc + assert adapter.get_voice_channel_info(111) is None + + def test_returns_info_with_members(self): + adapter = self._make_adapter() + vc = MagicMock() + vc.is_connected.return_value = True + bot_member = self._make_member(99999, "HermesBot", is_bot=True) + user_a = self._make_member(1001, "Alice") + user_b = self._make_member(1002, "Bob") + vc.channel.name = "general-voice" + vc.channel.members = [bot_member, user_a, user_b] + adapter._voice_clients[111] = vc + + info = adapter.get_voice_channel_info(111) + assert info is not None + assert info["channel_name"] == "general-voice" + assert info["member_count"] == 2 # bot excluded + names = [m["display_name"] for m in info["members"]] + assert "Alice" in names + assert "Bob" in names + assert "HermesBot" not in names + + def test_speaking_detection(self): + adapter = self._make_adapter() + vc = MagicMock() + vc.is_connected.return_value = True + user_a = self._make_member(1001, "Alice") + user_b = self._make_member(1002, "Bob") + vc.channel.name = "voice" + vc.channel.members = [user_a, user_b] + adapter._voice_clients[111] = vc + + # Set up a mock receiver with Alice speaking + import time as _time + receiver = MagicMock() + receiver._lock = threading.Lock() + receiver._last_packet_time = {100: _time.monotonic()} # ssrc 100 is active + receiver._ssrc_to_user = {100: 1001} # ssrc 100 -> Alice + adapter._voice_receivers[111] = receiver + + info = adapter.get_voice_channel_info(111) + alice = [m for m in info["members"] if m["display_name"] == "Alice"][0] + bob = [m for m in info["members"] if m["display_name"] == "Bob"][0] + assert alice["is_speaking"] is True + assert bob["is_speaking"] is False + assert info["speaking_count"] == 1 + + def test_context_string_format(self): + adapter = self._make_adapter() + vc = MagicMock() + vc.is_connected.return_value = True + user_a = self._make_member(1001, "Alice") + vc.channel.name = "chat-room" + vc.channel.members = [user_a] + adapter._voice_clients[111] = vc + + ctx = adapter.get_voice_channel_context(111) + assert "#chat-room" in ctx + assert "1 participant" in ctx + assert "Alice" in ctx + + def test_context_empty_when_not_connected(self): + adapter = self._make_adapter() + assert adapter.get_voice_channel_context(111) == ""