Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.
This commit is contained in:
teknium1 2026-02-15 16:10:50 -08:00
parent eb49936a60
commit 5404a8fcd8
7 changed files with 303 additions and 35 deletions

View file

@ -38,6 +38,7 @@ from gateway.platforms.base import (
MessageEvent,
MessageType,
SendResult,
cache_image_from_bytes,
)
@ -303,7 +304,7 @@ class TelegramAdapter(BasePlatformAdapter):
await self.handle_message(event)
async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming media messages."""
"""Handle incoming media messages, downloading images to local cache."""
if not update.message:
return
@ -327,6 +328,30 @@ class TelegramAdapter(BasePlatformAdapter):
if msg.caption:
event.text = msg.caption
# Download photo to local image cache so the vision tool can access it
# even after Telegram's ephemeral file URLs expire (~1 hour).
if msg.photo:
try:
# msg.photo is a list of PhotoSize sorted by size; take the largest
photo = msg.photo[-1]
file_obj = await photo.get_file()
# Download the image bytes directly into memory
image_bytes = await file_obj.download_as_bytearray()
# Determine extension from the file path if available
ext = ".jpg"
if file_obj.file_path:
for candidate in [".png", ".webp", ".gif", ".jpeg", ".jpg"]:
if file_obj.file_path.lower().endswith(candidate):
ext = candidate
break
# Save to cache and populate media_urls with the local path
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [f"image/{ext.lstrip('.')}"]
print(f"[Telegram] Cached user photo: {cached_path}", flush=True)
except Exception as e:
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
await self.handle_message(event)
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent: