From 4f8e10df1613bb3d89baef121d062c53092e894a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=BB=D0=BE=D0=BD=D0=BE=D0=B2=D0=B0=20=D0=90=D0=BD?= =?UTF-8?q?=D0=BD=D0=B0?= Date: Sun, 22 Mar 2026 00:29:51 +0300 Subject: [PATCH 1/5] Do base message recognition --- .env.example | 11 +++ README.md | 5 +- main.py | 226 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | Bin 0 -> 916 bytes 4 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 .env.example create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9175207 --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +HOMESERVER=https://matrix.org + +# Bot's Matrix username (full MXID) +MATRIX_USERNAME=@your_bot:matrix.org + +# Either use password OR access token +PASSWORD= +ACCESS_TOKEN=syt_... + +# Allowed rooms (comma-separated, no spaces) +ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org \ No newline at end of file diff --git a/README.md b/README.md index 9611f7f..84cef0a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ -# b2b_assistants +# Бот для автоматической генерации отчётов -Репозиторий для разработки B to B рушений \ No newline at end of file +Принимает фото/аудио файлы и текстовые сообщения, когда в течение 15 секунд нет новых сообщений, +формирует отчёт по отправленному. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..005b623 --- /dev/null +++ b/main.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +import asyncio +import os +import tempfile +import time +from typing import Dict, Optional, Tuple +from dotenv import load_dotenv + +from nio import ( + AsyncClient, + RoomMessageText, + RoomMessageImage, + RoomMessageAudio, + LoginResponse, + AsyncClientConfig, + ErrorResponse, +) + +load_dotenv() + +HOMESERVER = os.getenv("HOMESERVER", "https://matrix.org") +USERNAME = os.getenv("MATRIX_USERNAME") +PASSWORD = os.getenv("PASSWORD") +ALLOWED_ROOMS = set(room.strip() for room in os.getenv("ALLOWED_ROOMS", "").split(",") if room.strip()) + +TEMP_DIR = tempfile.gettempdir() +GROUPING_TIMEOUT = 15.0 + +client: AsyncClient = None + +pending_by_conversation: Dict[Tuple[str, str], Dict] = {} +pending_by_event_id: Dict[str, Dict] = {} + + +async def process_audio(audio_bytes: bytes) -> str: + print(f"[AUDIO] Получено {len(audio_bytes)} байт аудио") + return "Placeholder" + + +async def process_image(image_bytes: bytes) -> str: + print(f"[IMAGE] Получено {len(image_bytes)} байт изображения") + return "Placeholder" + + +async def generate_report(text: str, image_descriptions: list, audio_texts: list) -> str: + print(f"[REPORT] text: {text}, images: {len(image_descriptions)}, audio: {len(audio_texts)}") + #TODO whisper + отчёт + return "Placeholder" + + +async def send_error_message(room_id: str, error_text: str): + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", "body": f"❌ Ошибка: {error_text}"} + ) + + +async def process_complete_message(data: Dict): + room_id = data["room_id"] + image_descriptions = [] + for img_bytes in data.get("images", []): + desc = await process_image(img_bytes) + image_descriptions.append(desc) + audio_texts = [] + for aud_bytes in data.get("audio", []): + text = await process_audio(aud_bytes) + audio_texts.append(text) + report = await generate_report(data.get("text", ""), image_descriptions, audio_texts) + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", "body": report} + ) + if "event_id" in data: + pending_by_event_id.pop(data["event_id"], None) + pending_by_conversation.pop((room_id, data["sender"]), None) + + +async def delayed_processing(data: Dict): + await asyncio.sleep(GROUPING_TIMEOUT) + key = (data["room_id"], data["sender"]) + if pending_by_conversation.get(key) is data: + await process_complete_message(data) + + +def get_or_create_pending(room_id: str, sender: str, event_id: Optional[str] = None) -> Dict: + if event_id and event_id in pending_by_event_id: + return pending_by_event_id[event_id] + + key = (room_id, sender) + if key in pending_by_conversation: + return pending_by_conversation[key] + + data = { + "room_id": room_id, + "sender": sender, + "text": None, + "images": [], + "audio": [], + "timestamp": time.time(), + "task": None, + } + if event_id: + data["event_id"] = event_id + pending_by_conversation[key] = data + if event_id: + pending_by_event_id[event_id] = data + return data + + +def reset_timer(data: Dict): + if data["task"] and not data["task"].done(): + data["task"].cancel() + data["timestamp"] = time.time() + data["task"] = asyncio.create_task(delayed_processing(data)) + + +async def on_text_message(room, event: RoomMessageText): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + event_id = event.event_id + data = get_or_create_pending(room.room_id, event.sender, event_id) + data["text"] = event.body + reset_timer(data) + print(f"[TEXT] Добавлен текст в сообщение от {event.sender}: {event.body}") + + +async def on_image_message(room, event: RoomMessageImage): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + related_event_id = None + if hasattr(event, "source") and "content" in event.source: + content = event.source["content"] + if "m.relates_to" in content and "event_id" in content["m.relates_to"]: + related_event_id = content["m.relates_to"]["event_id"] + + data = get_or_create_pending(room.room_id, event.sender, related_event_id) + + download_result = await client.download(event.url) + if isinstance(download_result, ErrorResponse): + print(f"Ошибка скачивания изображения: {download_result.status_code}") + await send_error_message(room.room_id, "Не удалось загрузить изображение.") + return + + data["images"].append(download_result.body) + reset_timer(data) + print(f"[IMAGE] Добавлено изображение в сообщение от {event.sender}") + + +async def on_audio_message(room, event: RoomMessageAudio): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + related_event_id = None + if hasattr(event, "source") and "content" in event.source: + content = event.source["content"] + if "m.relates_to" in content and "event_id" in content["m.relates_to"]: + related_event_id = content["m.relates_to"]["event_id"] + + data = get_or_create_pending(room.room_id, event.sender, related_event_id) + + download_result = await client.download(event.url) + if isinstance(download_result, ErrorResponse): + print(f"Ошибка скачивания аудио: {download_result.status_code}") + await send_error_message(room.room_id, "Не удалось загрузить аудио.") + return + + data["audio"].append(download_result.body) + reset_timer(data) + print(f"[AUDIO] Добавлено аудио в сообщение от {event.sender}") + + +async def main(): + global client + + config = AsyncClientConfig( + max_timeouts=10, + store_sync_tokens=True, + encryption_enabled=False, + ) + client = AsyncClient( + homeserver=HOMESERVER, + user=USERNAME, + device_id=None, + config=config, + ) + + try: + if PASSWORD: + response = await client.login(PASSWORD) + else: + response = await client.login(token=os.environ.get("ACCESS_TOKEN", "")) + + if isinstance(response, LoginResponse): + print(f"Бот {USERNAME} успешно авторизован на {HOMESERVER}") + print(f"Access token: {client.access_token}") + else: + print(f"Ошибка авторизации: {response}") + return + except Exception as e: + print(f"Исключение при авторизации: {e}") + return + + client.add_event_callback(on_text_message, RoomMessageText) + client.add_event_callback(on_image_message, RoomMessageImage) + client.add_event_callback(on_audio_message, RoomMessageAudio) + + print("Бот запущен, ожидание событий...") + try: + await client.sync_forever(timeout=30000) + except KeyboardInterrupt: + print("Бот остановлен пользователем") + finally: + await client.close() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8456314e1bdb9396d4af963de03c3bf1c46f49c GIT binary patch literal 916 zcmZvb(N2S44264b;-j!^pi?h=7Nf8Y)B$14I3Hg9PP>0>#*qFndU|?#=zhMRt+CG5 zobR@>ojr2pc4ISJ5Oce>D+^4`z2`cBQ@rr|oh#MMZ0S|!6{C$*h_mpV&f z!CvEvda9qMNN;=8NO#IjnrmF@={Ml+0BXy#P?x$It9&A5!lQ-z^XiVYoTb%NZed70 za_0yI|KT#K)VX9%ec6)Qk$Xp{^tI;9bITkLM08XK`PbN|WgvxDM>!GL+?8!@kAvc? zi1O&8rn)okM?Oip|oFXp>5Ca%rSjMI?{pM1JP8up>Tbg3OU4^R@Qj3gvn3 zk$5NU<%WCvVSCe%+m^$X8ahI#-1 literal 0 HcmV?d00001 From 471e9bcc3b92500bd897cb66700c149b913271e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=BB=D0=BE=D0=BD=D0=BE=D0=B2=D0=B0=20=D0=90=D0=BD?= =?UTF-8?q?=D0=BD=D0=B0?= Date: Sun, 22 Mar 2026 21:15:39 +0300 Subject: [PATCH 2/5] Do audio recognition --- .env.example | 6 +- .gitignore | 8 ++ main.py | 205 ++++++++++++++++++++++++++++++++++++++++------- requirements.txt | Bin 916 -> 972 bytes 4 files changed, 191 insertions(+), 28 deletions(-) diff --git a/.env.example b/.env.example index 9175207..e6010b4 100644 --- a/.env.example +++ b/.env.example @@ -8,4 +8,8 @@ PASSWORD= ACCESS_TOKEN=syt_... # Allowed rooms (comma-separated, no spaces) -ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org \ No newline at end of file +ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org + +# Whisper settings +WHISPER_LANGUAGE=ru +WHISPER_MODEL=small \ No newline at end of file diff --git a/.gitignore b/.gitignore index ab3e8ce..a11843e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,14 @@ __pycache__/ *.py[cod] *$py.class +.idea/* +.idea + +venv1 +venv1/ +venv1/* +venv1/** + # C extensions *.so diff --git a/main.py b/main.py index 005b623..75af493 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,8 @@ import asyncio import os import tempfile import time +import shutil +import subprocess from typing import Dict, Optional, Tuple from dotenv import load_dotenv @@ -16,62 +18,182 @@ from nio import ( ErrorResponse, ) +from faster_whisper import WhisperModel + load_dotenv() HOMESERVER = os.getenv("HOMESERVER", "https://matrix.org") USERNAME = os.getenv("MATRIX_USERNAME") PASSWORD = os.getenv("PASSWORD") ALLOWED_ROOMS = set(room.strip() for room in os.getenv("ALLOWED_ROOMS", "").split(",") if room.strip()) +WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ru") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") TEMP_DIR = tempfile.gettempdir() GROUPING_TIMEOUT = 15.0 client: AsyncClient = None - pending_by_conversation: Dict[Tuple[str, str], Dict] = {} pending_by_event_id: Dict[str, Dict] = {} - -async def process_audio(audio_bytes: bytes) -> str: - print(f"[AUDIO] Получено {len(audio_bytes)} байт аудио") - return "Placeholder" +whisper_model = None -async def process_image(image_bytes: bytes) -> str: - print(f"[IMAGE] Получено {len(image_bytes)} байт изображения") - return "Placeholder" +def get_whisper_model(): + global whisper_model + if whisper_model is None: + whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8") + print(f"Whisper модель {WHISPER_MODEL} загружена (faster-whisper).") + return whisper_model -async def generate_report(text: str, image_descriptions: list, audio_texts: list) -> str: +def ffmpeg_available() -> bool: + found = shutil.which("ffmpeg") is not None + if not found: + print("[ERROR] ffmpeg не найден в системе. Установите ffmpeg и добавьте в PATH.") + return found + + +def get_file_extension(mimetype: str) -> str: + ext_map = { + "audio/ogg": ".ogg", + "audio/mpeg": ".mp3", + "audio/mp4": ".m4a", + "audio/x-m4a": ".m4a", + "audio/wav": ".wav", + "audio/webm": ".webm", + } + return ext_map.get(mimetype, ".tmp") + + +async def convert_to_wav(input_path: str) -> Optional[str]: + output_fd, output_path = tempfile.mkstemp(suffix=".wav") + os.close(output_fd) + cmd = [ + "ffmpeg", "-i", input_path, + "-map", "0:a:0", + "-map_metadata", "-1", + "-vn", + "-acodec", "pcm_s16le", + "-ar", "16000", + "-ac", "1", + "-y", + output_path + ] + try: + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, lambda: subprocess.run(cmd, capture_output=True, check=True)) + return output_path + except subprocess.CalledProcessError as e: + print(f"[AUDIO] Ошибка конвертации ffmpeg: {e.stderr.decode()}") + if os.path.exists(output_path): + os.unlink(output_path) + return None + + +async def transcribe_audio(audio_bytes: bytes, mimetype: str) -> Optional[str]: + if not ffmpeg_available(): + print("[AUDIO] Ошибка: ffmpeg не установлен.") + return None + + ext = get_file_extension(mimetype) + loop = asyncio.get_running_loop() + with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: + tmp.write(audio_bytes) + input_path = tmp.name + + wav_path = None + try: + wav_path = await convert_to_wav(input_path) + if not wav_path: + print("[AUDIO] Конвертация в WAV не удалась.") + return None + + model = get_whisper_model() + segments, info = await loop.run_in_executor( + None, + lambda: model.transcribe(wav_path, beam_size=5, language=WHISPER_LANGUAGE) + ) + text = " ".join([segment.text for segment in segments]) + return text.strip() + except Exception as e: + print(f"[AUDIO] Ошибка при распознавании: {e}") + return None + finally: + if os.path.exists(input_path): + os.unlink(input_path) + if wav_path and os.path.exists(wav_path): + os.unlink(wav_path) + + +async def process_audio(audio_data: Dict) -> str: + audio_bytes = audio_data["bytes"] + mimetype = audio_data.get("mimetype", "audio/ogg") + print(f"[AUDIO] Получено {len(audio_bytes)} байт аудио, тип: {mimetype}") + text = await transcribe_audio(audio_bytes, mimetype) + if text is None: + print("[AUDIO] Распознавание не удалось.") + return "" + return text + + +async def process_image(image_data: Dict) -> str: + print(f"[IMAGE] Получено {len(image_data['bytes'])} байт изображения") + return "[Описание изображения будет добавлено позже]" + + +async def generate_report(text: str, images_data: list, audios_data: list) -> str: + audio_texts = [] + for audio in audios_data: + audio_text = await process_audio(audio) + if audio_text: + audio_texts.append(audio_text) + + image_descriptions = [] + for img in images_data: + desc = await process_image(img) + if desc: + image_descriptions.append(desc) + + parts = [] + if text: + parts.append(f"**Текст сообщения:**\n{text}") + if audio_texts: + parts.append("**Распознанный текст из аудио:**\n" + "\n\n".join(audio_texts)) + if image_descriptions: + parts.append("**Описания изображений:**\n" + "\n".join(image_descriptions)) + + if not parts: + return "Не удалось обработать сообщение (нет текста, не распознано аудио или ошибка)." + print(f"[REPORT] text: {text}, images: {len(image_descriptions)}, audio: {len(audio_texts)}") - #TODO whisper + отчёт - return "Placeholder" + return "\n\n".join(parts) async def send_error_message(room_id: str, error_text: str): await client.room_send( room_id, "m.room.message", - {"msgtype": "m.text", "body": f"❌ Ошибка: {error_text}"} + {"msgtype": "m.text", "body": f"❌ {error_text}"} ) async def process_complete_message(data: Dict): room_id = data["room_id"] - image_descriptions = [] - for img_bytes in data.get("images", []): - desc = await process_image(img_bytes) - image_descriptions.append(desc) - audio_texts = [] - for aud_bytes in data.get("audio", []): - text = await process_audio(aud_bytes) - audio_texts.append(text) - report = await generate_report(data.get("text", ""), image_descriptions, audio_texts) + # Объединяем все текстовые сообщения, которые были в этой группе + text_parts = data.get("text", []) + text = "\n".join(text_parts) if text_parts else "" + images_data = data.get("images", []) + audios_data = data.get("audio", []) + + report = await generate_report(text, images_data, audios_data) + await client.room_send( room_id, "m.room.message", {"msgtype": "m.text", "body": report} ) + if "event_id" in data: pending_by_event_id.pop(data["event_id"], None) pending_by_conversation.pop((room_id, data["sender"]), None) @@ -95,7 +217,7 @@ def get_or_create_pending(room_id: str, sender: str, event_id: Optional[str] = N data = { "room_id": room_id, "sender": sender, - "text": None, + "text": [], # список строк, а не одна строка "images": [], "audio": [], "timestamp": time.time(), @@ -124,7 +246,8 @@ async def on_text_message(room, event: RoomMessageText): event_id = event.event_id data = get_or_create_pending(room.room_id, event.sender, event_id) - data["text"] = event.body + # Добавляем текст в список, а не заменяем + data["text"].append(event.body) reset_timer(data) print(f"[TEXT] Добавлен текст в сообщение от {event.sender}: {event.body}") @@ -145,11 +268,20 @@ async def on_image_message(room, event: RoomMessageImage): download_result = await client.download(event.url) if isinstance(download_result, ErrorResponse): - print(f"Ошибка скачивания изображения: {download_result.status_code}") + print(f"[IMAGE] Ошибка скачивания: {download_result.status_code} - {download_result.message}") await send_error_message(room.room_id, "Не удалось загрузить изображение.") return - data["images"].append(download_result.body) + mimetype = getattr(event, "mimetype", None) + if not mimetype and hasattr(event, "info") and isinstance(event.info, dict): + mimetype = event.info.get("mimetype") + if not mimetype: + mimetype = "image/jpeg" + + data["images"].append({ + "bytes": download_result.body, + "mimetype": mimetype, + }) reset_timer(data) print(f"[IMAGE] Добавлено изображение в сообщение от {event.sender}") @@ -170,11 +302,20 @@ async def on_audio_message(room, event: RoomMessageAudio): download_result = await client.download(event.url) if isinstance(download_result, ErrorResponse): - print(f"Ошибка скачивания аудио: {download_result.status_code}") + print(f"[AUDIO] Ошибка скачивания: {download_result.status_code} - {download_result.message}") await send_error_message(room.room_id, "Не удалось загрузить аудио.") return - data["audio"].append(download_result.body) + mimetype = None + if hasattr(event, "info") and isinstance(event.info, dict): + mimetype = event.info.get("mimetype") + if not mimetype: + mimetype = "audio/ogg" + + data["audio"].append({ + "bytes": download_result.body, + "mimetype": mimetype, + }) reset_timer(data) print(f"[AUDIO] Добавлено аудио в сообщение от {event.sender}") @@ -210,6 +351,15 @@ async def main(): print(f"Исключение при авторизации: {e}") return + if not ffmpeg_available(): + print("ВНИМАНИЕ: ffmpeg не найден. Бот не сможет распознавать аудио.") + print("Установите ffmpeg (https://ffmpeg.org/download.html) и добавьте в PATH.") + else: + print("ffmpeg найден, аудио будет обрабатываться.") + + if WHISPER_LANGUAGE: + print(f"Язык распознавания: {WHISPER_LANGUAGE}") + client.add_event_callback(on_text_message, RoomMessageText) client.add_event_callback(on_image_message, RoomMessageImage) client.add_event_callback(on_audio_message, RoomMessageAudio) @@ -222,5 +372,6 @@ async def main(): finally: await client.close() + if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d8456314e1bdb9396d4af963de03c3bf1c46f49c..3fc940d775a2ab714ed750f3b23cc291bd8a4c2f 100644 GIT binary patch literal 972 zcmYLI%aYq55WMp*qJ%6z_Mk)VNmZ^nxiCV8SONu5*4nR6H{K*SG0gPz18E;8KifPE zT`!evh4#_ub=~c5Z+f12Ax~1(dMWEL9HctP!{75YNAo~aE7j+8_V=LYVNkLsTnMq~ zV#j8ThHb6vQQUnT1}yA|TuR~^Y~jFfjGocf3S(38lgCwy%T9JjVPAm$?oOnwTsRK~eDhBt&-s zF}L0CKHMj5?A2{IHl2yEgZz~Ag0YuJehj(>p~TM|LpIJXC}16#{4X`+3kQuZ9&MHR z5zZ*F==2!lduV)V)}5m?e99>IMJJB+!De>ygaPvaO6du%QhE3dIPDaYJS)j zVs!}qAYus5w1whH8^b+vrxltK(L|gh87mR&O)2&ON80D*xb=>Px# literal 916 zcmZvb(N2S44264b;-j!^pi?h=7Nf8Y)B$14I3Hg9PP>0>#*qFndU|?#=zhMRt+CG5 zobR@>ojr2pc4ISJ5Oce>D+^4`z2`cBQ@rr|oh#MMZ0S|!6{C$*h_mpV&f z!CvEvda9qMNN;=8NO#IjnrmF@={Ml+0BXy#P?x$It9&A5!lQ-z^XiVYoTb%NZed70 za_0yI|KT#K)VX9%ec6)Qk$Xp{^tI;9bITkLM08XK`PbN|WgvxDM>!GL+?8!@kAvc? zi1O&8rn)okM?Oip|oFXp>5Ca%rSjMI?{pM1JP8up>Tbg3OU4^R@Qj3gvn3 zk$5NU<%WCvVSCe%+m^$X8ahI#-1 From 7ba74f3b5126e70d5e4130af27b48d14f1c7a711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=BD=D0=B0=20=D0=A1=D0=BB=D0=BE=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0?= Date: Thu, 26 Mar 2026 00:56:48 +0300 Subject: [PATCH 3/5] Do PDF report --- .env.example | 28 +- .gitignore | 344 +++++++++---------- README.md | 6 +- base_prompt.txt | 18 + main.py | 873 +++++++++++++++++++++++++++-------------------- requirements.txt | 14 + 6 files changed, 717 insertions(+), 566 deletions(-) create mode 100644 base_prompt.txt diff --git a/.env.example b/.env.example index e6010b4..cbd76b2 100644 --- a/.env.example +++ b/.env.example @@ -1,15 +1,15 @@ -HOMESERVER=https://matrix.org - -# Bot's Matrix username (full MXID) -MATRIX_USERNAME=@your_bot:matrix.org - -# Either use password OR access token -PASSWORD= -ACCESS_TOKEN=syt_... - -# Allowed rooms (comma-separated, no spaces) -ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org - -# Whisper settings -WHISPER_LANGUAGE=ru +HOMESERVER=https://matrix.org + +# Bot's Matrix username (full MXID) +MATRIX_USERNAME=@your_bot:matrix.org + +# Either use password OR access token +PASSWORD= +ACCESS_TOKEN=syt_... + +# Allowed rooms (comma-separated, no spaces) +ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org + +# Whisper settings +WHISPER_LANGUAGE=ru WHISPER_MODEL=small \ No newline at end of file diff --git a/.gitignore b/.gitignore index a11843e..533ba24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,172 +1,172 @@ -# ---> Python -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -.idea/* -.idea - -venv1 -venv1/ -venv1/* -venv1/** - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +.idea/* +.idea + +venv1 +venv1/ +venv1/* +venv1/** + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + diff --git a/README.md b/README.md index 84cef0a..88ef005 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Бот для автоматической генерации отчётов - -Принимает фото/аудио файлы и текстовые сообщения, когда в течение 15 секунд нет новых сообщений, +# Бот для автоматической генерации отчётов + +Принимает фото/аудио файлы и текстовые сообщения, когда в течение 15 секунд нет новых сообщений, формирует отчёт по отправленному. \ No newline at end of file diff --git a/base_prompt.txt b/base_prompt.txt new file mode 100644 index 0000000..303ae00 --- /dev/null +++ b/base_prompt.txt @@ -0,0 +1,18 @@ +Твоя роль: сотрудник IT-компании. +Составь краткий отчёт о встрече на основе следующего текста. +Выдели ключевые темы, решения и действия, а также тезисно изложи суть и ход совещания. +Текст - расшифровка аудиозаписи встречи, и возможно, текстовые сообщения пользователя, +которому нужен отчёт. +Твой отчёт должен по стилю и содержанию вписываться в формат корпоративного отчёта +по прошедшей встрече, быть информативным, фиксировать все необходимые сведения, особенно те, +которые могут быть важны для дальнейших действий сотрудников. +НЕ ИСПОЛЬЗУЙ эмодзи. Твой отчёт должен содержать достаточный объём информации, отображая +все важные моменты. Вне зависимости от содержания полученных на вход данных оформи +ответ в формате html-страницы. Твой ответ ОБЯЗАТЕЛЬНО должен представлять собой html-страницу. +НЕ НУЖНО слишком сильно уходить в визуальное оформление отчёта: он должен быть удобен для +чтения и печати (в том числе) на ч/б принтере, должен быть оформлен аккуратно и легко для восприятия, +но не перегружено, в формате документа, основную смысловую часть которого составляет +текстовая информация. Размер шрифт должен быть не крупным, но удобно читаемым (около 12 пунктов). +Фон ВСЕГДА должен быть исключительно белым. +Если это требуется, ответ может быть достаточно большим по размеру. Не ограничивай себя в длине +ответа, но и ненужную информацию оставлять не нужно. \ No newline at end of file diff --git a/main.py b/main.py index 75af493..440ae13 100644 --- a/main.py +++ b/main.py @@ -1,377 +1,496 @@ -#!/usr/bin/env python3 -import asyncio -import os -import tempfile -import time -import shutil -import subprocess -from typing import Dict, Optional, Tuple -from dotenv import load_dotenv - -from nio import ( - AsyncClient, - RoomMessageText, - RoomMessageImage, - RoomMessageAudio, - LoginResponse, - AsyncClientConfig, - ErrorResponse, -) - -from faster_whisper import WhisperModel - -load_dotenv() - -HOMESERVER = os.getenv("HOMESERVER", "https://matrix.org") -USERNAME = os.getenv("MATRIX_USERNAME") -PASSWORD = os.getenv("PASSWORD") -ALLOWED_ROOMS = set(room.strip() for room in os.getenv("ALLOWED_ROOMS", "").split(",") if room.strip()) -WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ru") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") - -TEMP_DIR = tempfile.gettempdir() -GROUPING_TIMEOUT = 15.0 - -client: AsyncClient = None -pending_by_conversation: Dict[Tuple[str, str], Dict] = {} -pending_by_event_id: Dict[str, Dict] = {} - -whisper_model = None - - -def get_whisper_model(): - global whisper_model - if whisper_model is None: - whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8") - print(f"Whisper модель {WHISPER_MODEL} загружена (faster-whisper).") - return whisper_model - - -def ffmpeg_available() -> bool: - found = shutil.which("ffmpeg") is not None - if not found: - print("[ERROR] ffmpeg не найден в системе. Установите ffmpeg и добавьте в PATH.") - return found - - -def get_file_extension(mimetype: str) -> str: - ext_map = { - "audio/ogg": ".ogg", - "audio/mpeg": ".mp3", - "audio/mp4": ".m4a", - "audio/x-m4a": ".m4a", - "audio/wav": ".wav", - "audio/webm": ".webm", - } - return ext_map.get(mimetype, ".tmp") - - -async def convert_to_wav(input_path: str) -> Optional[str]: - output_fd, output_path = tempfile.mkstemp(suffix=".wav") - os.close(output_fd) - cmd = [ - "ffmpeg", "-i", input_path, - "-map", "0:a:0", - "-map_metadata", "-1", - "-vn", - "-acodec", "pcm_s16le", - "-ar", "16000", - "-ac", "1", - "-y", - output_path - ] - try: - loop = asyncio.get_running_loop() - await loop.run_in_executor(None, lambda: subprocess.run(cmd, capture_output=True, check=True)) - return output_path - except subprocess.CalledProcessError as e: - print(f"[AUDIO] Ошибка конвертации ffmpeg: {e.stderr.decode()}") - if os.path.exists(output_path): - os.unlink(output_path) - return None - - -async def transcribe_audio(audio_bytes: bytes, mimetype: str) -> Optional[str]: - if not ffmpeg_available(): - print("[AUDIO] Ошибка: ffmpeg не установлен.") - return None - - ext = get_file_extension(mimetype) - loop = asyncio.get_running_loop() - with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: - tmp.write(audio_bytes) - input_path = tmp.name - - wav_path = None - try: - wav_path = await convert_to_wav(input_path) - if not wav_path: - print("[AUDIO] Конвертация в WAV не удалась.") - return None - - model = get_whisper_model() - segments, info = await loop.run_in_executor( - None, - lambda: model.transcribe(wav_path, beam_size=5, language=WHISPER_LANGUAGE) - ) - text = " ".join([segment.text for segment in segments]) - return text.strip() - except Exception as e: - print(f"[AUDIO] Ошибка при распознавании: {e}") - return None - finally: - if os.path.exists(input_path): - os.unlink(input_path) - if wav_path and os.path.exists(wav_path): - os.unlink(wav_path) - - -async def process_audio(audio_data: Dict) -> str: - audio_bytes = audio_data["bytes"] - mimetype = audio_data.get("mimetype", "audio/ogg") - print(f"[AUDIO] Получено {len(audio_bytes)} байт аудио, тип: {mimetype}") - text = await transcribe_audio(audio_bytes, mimetype) - if text is None: - print("[AUDIO] Распознавание не удалось.") - return "" - return text - - -async def process_image(image_data: Dict) -> str: - print(f"[IMAGE] Получено {len(image_data['bytes'])} байт изображения") - return "[Описание изображения будет добавлено позже]" - - -async def generate_report(text: str, images_data: list, audios_data: list) -> str: - audio_texts = [] - for audio in audios_data: - audio_text = await process_audio(audio) - if audio_text: - audio_texts.append(audio_text) - - image_descriptions = [] - for img in images_data: - desc = await process_image(img) - if desc: - image_descriptions.append(desc) - - parts = [] - if text: - parts.append(f"**Текст сообщения:**\n{text}") - if audio_texts: - parts.append("**Распознанный текст из аудио:**\n" + "\n\n".join(audio_texts)) - if image_descriptions: - parts.append("**Описания изображений:**\n" + "\n".join(image_descriptions)) - - if not parts: - return "Не удалось обработать сообщение (нет текста, не распознано аудио или ошибка)." - - print(f"[REPORT] text: {text}, images: {len(image_descriptions)}, audio: {len(audio_texts)}") - return "\n\n".join(parts) - - -async def send_error_message(room_id: str, error_text: str): - await client.room_send( - room_id, - "m.room.message", - {"msgtype": "m.text", "body": f"❌ {error_text}"} - ) - - -async def process_complete_message(data: Dict): - room_id = data["room_id"] - # Объединяем все текстовые сообщения, которые были в этой группе - text_parts = data.get("text", []) - text = "\n".join(text_parts) if text_parts else "" - images_data = data.get("images", []) - audios_data = data.get("audio", []) - - report = await generate_report(text, images_data, audios_data) - - await client.room_send( - room_id, - "m.room.message", - {"msgtype": "m.text", "body": report} - ) - - if "event_id" in data: - pending_by_event_id.pop(data["event_id"], None) - pending_by_conversation.pop((room_id, data["sender"]), None) - - -async def delayed_processing(data: Dict): - await asyncio.sleep(GROUPING_TIMEOUT) - key = (data["room_id"], data["sender"]) - if pending_by_conversation.get(key) is data: - await process_complete_message(data) - - -def get_or_create_pending(room_id: str, sender: str, event_id: Optional[str] = None) -> Dict: - if event_id and event_id in pending_by_event_id: - return pending_by_event_id[event_id] - - key = (room_id, sender) - if key in pending_by_conversation: - return pending_by_conversation[key] - - data = { - "room_id": room_id, - "sender": sender, - "text": [], # список строк, а не одна строка - "images": [], - "audio": [], - "timestamp": time.time(), - "task": None, - } - if event_id: - data["event_id"] = event_id - pending_by_conversation[key] = data - if event_id: - pending_by_event_id[event_id] = data - return data - - -def reset_timer(data: Dict): - if data["task"] and not data["task"].done(): - data["task"].cancel() - data["timestamp"] = time.time() - data["task"] = asyncio.create_task(delayed_processing(data)) - - -async def on_text_message(room, event: RoomMessageText): - if event.sender == client.user_id: - return - if room.room_id not in ALLOWED_ROOMS: - return - - event_id = event.event_id - data = get_or_create_pending(room.room_id, event.sender, event_id) - # Добавляем текст в список, а не заменяем - data["text"].append(event.body) - reset_timer(data) - print(f"[TEXT] Добавлен текст в сообщение от {event.sender}: {event.body}") - - -async def on_image_message(room, event: RoomMessageImage): - if event.sender == client.user_id: - return - if room.room_id not in ALLOWED_ROOMS: - return - - related_event_id = None - if hasattr(event, "source") and "content" in event.source: - content = event.source["content"] - if "m.relates_to" in content and "event_id" in content["m.relates_to"]: - related_event_id = content["m.relates_to"]["event_id"] - - data = get_or_create_pending(room.room_id, event.sender, related_event_id) - - download_result = await client.download(event.url) - if isinstance(download_result, ErrorResponse): - print(f"[IMAGE] Ошибка скачивания: {download_result.status_code} - {download_result.message}") - await send_error_message(room.room_id, "Не удалось загрузить изображение.") - return - - mimetype = getattr(event, "mimetype", None) - if not mimetype and hasattr(event, "info") and isinstance(event.info, dict): - mimetype = event.info.get("mimetype") - if not mimetype: - mimetype = "image/jpeg" - - data["images"].append({ - "bytes": download_result.body, - "mimetype": mimetype, - }) - reset_timer(data) - print(f"[IMAGE] Добавлено изображение в сообщение от {event.sender}") - - -async def on_audio_message(room, event: RoomMessageAudio): - if event.sender == client.user_id: - return - if room.room_id not in ALLOWED_ROOMS: - return - - related_event_id = None - if hasattr(event, "source") and "content" in event.source: - content = event.source["content"] - if "m.relates_to" in content and "event_id" in content["m.relates_to"]: - related_event_id = content["m.relates_to"]["event_id"] - - data = get_or_create_pending(room.room_id, event.sender, related_event_id) - - download_result = await client.download(event.url) - if isinstance(download_result, ErrorResponse): - print(f"[AUDIO] Ошибка скачивания: {download_result.status_code} - {download_result.message}") - await send_error_message(room.room_id, "Не удалось загрузить аудио.") - return - - mimetype = None - if hasattr(event, "info") and isinstance(event.info, dict): - mimetype = event.info.get("mimetype") - if not mimetype: - mimetype = "audio/ogg" - - data["audio"].append({ - "bytes": download_result.body, - "mimetype": mimetype, - }) - reset_timer(data) - print(f"[AUDIO] Добавлено аудио в сообщение от {event.sender}") - - -async def main(): - global client - - config = AsyncClientConfig( - max_timeouts=10, - store_sync_tokens=True, - encryption_enabled=False, - ) - client = AsyncClient( - homeserver=HOMESERVER, - user=USERNAME, - device_id=None, - config=config, - ) - - try: - if PASSWORD: - response = await client.login(PASSWORD) - else: - response = await client.login(token=os.environ.get("ACCESS_TOKEN", "")) - - if isinstance(response, LoginResponse): - print(f"Бот {USERNAME} успешно авторизован на {HOMESERVER}") - print(f"Access token: {client.access_token}") - else: - print(f"Ошибка авторизации: {response}") - return - except Exception as e: - print(f"Исключение при авторизации: {e}") - return - - if not ffmpeg_available(): - print("ВНИМАНИЕ: ffmpeg не найден. Бот не сможет распознавать аудио.") - print("Установите ffmpeg (https://ffmpeg.org/download.html) и добавьте в PATH.") - else: - print("ffmpeg найден, аудио будет обрабатываться.") - - if WHISPER_LANGUAGE: - print(f"Язык распознавания: {WHISPER_LANGUAGE}") - - client.add_event_callback(on_text_message, RoomMessageText) - client.add_event_callback(on_image_message, RoomMessageImage) - client.add_event_callback(on_audio_message, RoomMessageAudio) - - print("Бот запущен, ожидание событий...") - try: - await client.sync_forever(timeout=30000) - except KeyboardInterrupt: - print("Бот остановлен пользователем") - finally: - await client.close() - - -if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file +#!/usr/bin/env python3 +import asyncio +import os +import tempfile +import aiofiles +import time +import shutil +import subprocess +from weasyprint import HTML +import io +import json +from typing import Dict, Optional, Tuple +from dotenv import load_dotenv + +import aiohttp +from nio import ( + AsyncClient, + RoomMessageText, + RoomMessageImage, + RoomMessageAudio, + LoginResponse, + AsyncClientConfig, + ErrorResponse, + UploadResponse, + UploadError, +) + +from faster_whisper import WhisperModel + +load_dotenv() + +HOMESERVER = os.getenv("HOMESERVER", "https://matrix.org") +USERNAME = os.getenv("MATRIX_USERNAME") +PASSWORD = os.getenv("PASSWORD") +ALLOWED_ROOMS = set(room.strip() for room in os.getenv("ALLOWED_ROOMS", "").split(",") if room.strip()) +WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ru") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") + +# Qwen API +QWEN_API_KEY = os.getenv("QWEN_API_KEY") +QWEN_ENDPOINT = os.getenv("QWEN_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions") +QWEN_MODEL = os.getenv("QWEN_MODEL", "qwen3.5-122b") +QWEN_PROMPT_TEMPLATE = "" +with open("base_prompt.txt", "r") as f: + QWEN_PROMPT_TEMPLATE += f.read() + +TEMP_DIR = tempfile.gettempdir() +GROUPING_TIMEOUT = 15.0 + +client: AsyncClient = None +pending_by_conversation: Dict[Tuple[str, str], Dict] = {} +pending_by_event_id: Dict[str, Dict] = {} + +whisper_model = None + + +def get_whisper_model(): + global whisper_model + if whisper_model is None: + whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8") + print(f"Whisper модель {WHISPER_MODEL} загружена (faster-whisper).") + return whisper_model + + +def ffmpeg_available() -> bool: + found = shutil.which("ffmpeg") is not None + if not found: + print("[ERROR] ffmpeg не найден в системе. Установите ffmpeg и добавьте в PATH.") + return found + + +def get_file_extension(mimetype: str) -> str: + ext_map = { + "audio/ogg": ".ogg", + "audio/mpeg": ".mp3", + "audio/mp4": ".m4a", + "audio/x-m4a": ".m4a", + "audio/wav": ".wav", + "audio/webm": ".webm", + } + return ext_map.get(mimetype, ".tmp") + + +async def convert_to_wav(input_path: str) -> Optional[str]: + output_fd, output_path = tempfile.mkstemp(suffix=".wav") + os.close(output_fd) + cmd = [ + "ffmpeg", "-i", input_path, + "-map", "0:a:0", + "-map_metadata", "-1", + "-vn", + "-acodec", "pcm_s16le", + "-ar", "16000", + "-ac", "1", + "-y", + output_path + ] + try: + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, lambda: subprocess.run(cmd, capture_output=True, check=True)) + return output_path + except subprocess.CalledProcessError as e: + print(f"[AUDIO] Ошибка конвертации ffmpeg: {e.stderr.decode()}") + if os.path.exists(output_path): + os.unlink(output_path) + return None + + +async def transcribe_audio(audio_bytes: bytes, mimetype: str) -> Optional[str]: + if not ffmpeg_available(): + print("[AUDIO] Ошибка: ffmpeg не установлен.") + return None + + ext = get_file_extension(mimetype) + loop = asyncio.get_running_loop() + with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: + tmp.write(audio_bytes) + input_path = tmp.name + + wav_path = None + try: + wav_path = await convert_to_wav(input_path) + if not wav_path: + print("[AUDIO] Конвертация в WAV не удалась.") + return None + + model = get_whisper_model() + segments, info = await loop.run_in_executor( + None, + lambda: model.transcribe(wav_path, beam_size=5, language=WHISPER_LANGUAGE) + ) + text = " ".join([segment.text for segment in segments]) + return text.strip() + except Exception as e: + print(f"[AUDIO] Ошибка при распознавании: {e}") + return None + finally: + if os.path.exists(input_path): + os.unlink(input_path) + if wav_path and os.path.exists(wav_path): + os.unlink(wav_path) + + +async def call_qwen_api(prompt: str) -> str: + """ + Асинхронный вызов Qwen API для генерации отчёта. + Возвращает текст ответа или сообщение об ошибке. + """ + if not QWEN_API_KEY: + print("[QWEN] API ключ не задан, возвращаем заглушку.") + return "API ключ Qwen не настроен. Отчёт не может быть сгенерирован." + + headers = { + "Authorization": f"Bearer {QWEN_API_KEY}", + "Content-Type": "application/json" + } + payload = { + "model": QWEN_MODEL, + "messages": [ + {"role": "user", "content": prompt} + ] + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post(QWEN_ENDPOINT, headers=headers, json=payload) as resp: + if resp.status == 200: + data = await resp.json() + if "choices" in data and len(data["choices"]) > 0: + content = data["choices"][0]["message"]["content"] + return content.strip() + else: + print(f"[QWEN] Неожиданный формат ответа: {data}") + return "Ошибка: не удалось извлечь ответ из API." + else: + text = await resp.text() + print(f"[QWEN] Ошибка API: {resp.status} - {text}") + return f"Ошибка при обращении к Qwen API (HTTP {resp.status})." + except Exception as e: + print(f"[QWEN] Исключение: {e}") + return "Не удалось соединиться с Qwen API." + + +async def generate_report(text: str, images_data: list, audios_data: list) -> Optional[bytes]: + # Собираем транскрипции аудио + audio_texts = [] + for audio in audios_data: + audio_text = await transcribe_audio(audio["bytes"], audio.get("mimetype", "audio/ogg")) + if audio_text: + audio_texts.append(audio_text) + + # Формируем полный текст для отчёта + parts = [] + if text: + parts.append(f"Текстовые сообщения:\n{text}") + if audio_texts: + parts.append("Расшифровка аудио:\n" + "\n\n".join(audio_texts)) + if images_data: + parts.append(f"Количество изображений: {len(images_data)} (анализ не выполнен)") + + full_text = "\n\n".join(parts) + if not full_text.strip(): + return None + + prompt = f"{QWEN_PROMPT_TEMPLATE}\n Текст: {full_text}" + print("[QWEN] Отправка запроса...") + report = await call_qwen_api(prompt) + print(f"[QWEN] Получен ответ: {report[:200]}...") + + # Если API вернул ошибку, не генерируем PDF + if report.startswith("Ошибка:"): + print(f"[QWEN] Ошибка API: {report}") + return None + report = report.replace('```html', '') + report = report.replace('```', '') + + try: + pdf_bytes = HTML(string=report).write_pdf() + return pdf_bytes + except Exception as e: + print(f"[PDF] Ошибка генерации: {e}") + return None + + +async def send_error_message(room_id: str, error_text: str): + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", "body": f"❌ {error_text}"} + ) + + +async def process_audio(audio_data: Dict) -> str: + audio_bytes = audio_data["bytes"] + mimetype = audio_data.get("mimetype", "audio/ogg") + print(f"[AUDIO] Получено {len(audio_bytes)} байт аудио, тип: {mimetype}") + text = await transcribe_audio(audio_bytes, mimetype) + if text is None: + print("[AUDIO] Распознавание не удалось.") + return "" + return text + + +async def process_image(image_data: Dict) -> str: + print(f"[IMAGE] Получено {len(image_data['bytes'])} байт изображения") + return "[Описание изображения будет добавлено позже]" + + +async def process_complete_message(data: Dict): + room_id = data["room_id"] + text_parts = data.get("text", []) + text = "\n".join(text_parts) if text_parts else "" + images_data = data.get("images", []) + audios_data = data.get("audio", []) + + pdf_bytes = await generate_report(text, images_data, audios_data) + + if pdf_bytes is None: + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", + "body": "Не удалось обработать сообщение (нет текста, не распознано аудио или ошибка)."} + ) + else: + print("[FILE] Загрузка файла на сервер...") + # Создаём файловый объект из байтов + file_like = io.BytesIO(pdf_bytes) + upload_result = await client.upload( + file_like, + content_type="application/pdf", + filename="report.pdf", + filesize=len(pdf_bytes) # обязательно указываем размер + ) + + # Результат может быть кортежем (UploadError, None) или объектом UploadResponse + if isinstance(upload_result, tuple) and len(upload_result) > 0: + result_obj = upload_result[0] + else: + result_obj = upload_result + + if isinstance(result_obj, UploadError): + print(f"[FILE] Ошибка загрузки: {result_obj.status_code} - {result_obj.message}") + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", "body": "❌ Не удалось загрузить отчёт на сервер."} + ) + elif isinstance(result_obj, UploadResponse): + mxc_url = result_obj.content_uri + await client.room_send( + room_id, + "m.room.message", + { + "msgtype": "m.file", + "body": "report.pdf", + "url": mxc_url, + "filename": "report.pdf", + "info": { + "mimetype": "application/pdf", + "size": len(pdf_bytes) + } + } + ) + print("[FILE] PDF отправлен") + else: + print(f"[FILE] Неизвестный тип ответа: {result_obj}") + await client.room_send( + room_id, + "m.room.message", + {"msgtype": "m.text", "body": "❌ Ошибка при загрузке отчёта (неизвестный ответ сервера)."} + ) + + # Очистка данных + if "event_id" in data: + pending_by_event_id.pop(data["event_id"], None) + pending_by_conversation.pop((room_id, data["sender"]), None) + + +async def delayed_processing(data: Dict): + await asyncio.sleep(GROUPING_TIMEOUT) + key = (data["room_id"], data["sender"]) + if pending_by_conversation.get(key) is data: + await process_complete_message(data) + + +def get_or_create_pending(room_id: str, sender: str, event_id: Optional[str] = None) -> Dict: + if event_id and event_id in pending_by_event_id: + return pending_by_event_id[event_id] + + key = (room_id, sender) + if key in pending_by_conversation: + return pending_by_conversation[key] + + data = { + "room_id": room_id, + "sender": sender, + "text": [], + "images": [], + "audio": [], + "timestamp": time.time(), + "task": None, + } + if event_id: + data["event_id"] = event_id + pending_by_conversation[key] = data + if event_id: + pending_by_event_id[event_id] = data + return data + + +def reset_timer(data: Dict): + if data["task"] and not data["task"].done(): + data["task"].cancel() + data["timestamp"] = time.time() + data["task"] = asyncio.create_task(delayed_processing(data)) + + +async def on_text_message(room, event: RoomMessageText): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + event_id = event.event_id + data = get_or_create_pending(room.room_id, event.sender, event_id) + data["text"].append(event.body) + reset_timer(data) + print(f"[TEXT] Добавлен текст в сообщение от {event.sender}: {event.body}") + + +async def on_image_message(room, event: RoomMessageImage): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + related_event_id = None + if hasattr(event, "source") and "content" in event.source: + content = event.source["content"] + if "m.relates_to" in content and "event_id" in content["m.relates_to"]: + related_event_id = content["m.relates_to"]["event_id"] + + data = get_or_create_pending(room.room_id, event.sender, related_event_id) + + download_result = await client.download(event.url) + if isinstance(download_result, ErrorResponse): + print(f"[IMAGE] Ошибка скачивания: {download_result.status_code} - {download_result.message}") + await send_error_message(room.room_id, "Не удалось загрузить изображение.") + return + + mimetype = getattr(event, "mimetype", None) + if not mimetype and hasattr(event, "info") and isinstance(event.info, dict): + mimetype = event.info.get("mimetype") + if not mimetype: + mimetype = "image/jpeg" + + data["images"].append({ + "bytes": download_result.body, + "mimetype": mimetype, + }) + reset_timer(data) + print(f"[IMAGE] Добавлено изображение в сообщение от {event.sender}") + + +async def on_audio_message(room, event: RoomMessageAudio): + if event.sender == client.user_id: + return + if room.room_id not in ALLOWED_ROOMS: + return + + related_event_id = None + if hasattr(event, "source") and "content" in event.source: + content = event.source["content"] + if "m.relates_to" in content and "event_id" in content["m.relates_to"]: + related_event_id = content["m.relates_to"]["event_id"] + + data = get_or_create_pending(room.room_id, event.sender, related_event_id) + + download_result = await client.download(event.url) + if isinstance(download_result, ErrorResponse): + print(f"[AUDIO] Ошибка скачивания: {download_result.status_code} - {download_result.message}") + await send_error_message(room.room_id, "Не удалось загрузить аудио.") + return + + mimetype = None + if hasattr(event, "info") and isinstance(event.info, dict): + mimetype = event.info.get("mimetype") + if not mimetype: + mimetype = "audio/ogg" + + data["audio"].append({ + "bytes": download_result.body, + "mimetype": mimetype, + }) + reset_timer(data) + print(f"[AUDIO] Добавлено аудио в сообщение от {event.sender}") + + +async def main(): + global client + + config = AsyncClientConfig( + max_timeouts=10, + store_sync_tokens=True, + encryption_enabled=False, + ) + client = AsyncClient( + homeserver=HOMESERVER, + user=USERNAME, + device_id=None, + config=config, + ) + + try: + if PASSWORD: + response = await client.login(PASSWORD) + else: + response = await client.login(token=os.environ.get("ACCESS_TOKEN", "")) + + if isinstance(response, LoginResponse): + print(f"Бот {USERNAME} успешно авторизован на {HOMESERVER}") + else: + print(f"Ошибка авторизации: {response}") + return + except Exception as e: + print(f"Исключение при авторизации: {e}") + return + + if not ffmpeg_available(): + print("ВНИМАНИЕ: ffmpeg не найден. Бот не сможет распознавать аудио.") + print("Установите ffmpeg (https://ffmpeg.org/download.html) и добавьте в PATH.") + else: + print("ffmpeg найден, аудио будет обрабатываться.") + + if WHISPER_LANGUAGE: + print(f"Язык распознавания: {WHISPER_LANGUAGE}") + + if not QWEN_API_KEY: + print("ВНИМАНИЕ: QWEN_API_KEY не задан. Генерация отчётов будет недоступна.") + + client.add_event_callback(on_text_message, RoomMessageText) + client.add_event_callback(on_image_message, RoomMessageImage) + client.add_event_callback(on_audio_message, RoomMessageAudio) + + print("Бот запущен, ожидание событий...") + try: + await client.sync_forever(timeout=30000) + except KeyboardInterrupt: + print("Бот остановлен пользователем") + finally: + await client.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/requirements.txt b/requirements.txt index 3fc940d..5ef9bae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,13 +7,17 @@ annotated-doc==0.0.4 anyio==4.12.1 attrs==26.1.0 av==17.0.0 +brotli==1.2.0 certifi==2026.2.25 +cffi==2.0.0 click==8.3.1 +cssselect2==0.9.0 ctranslate2==4.7.1 dotenv==0.9.9 faster-whisper==1.2.1 filelock==3.25.2 flatbuffers==25.12.19 +fonttools==4.62.1 frozenlist==1.8.0 fsspec==2026.2.0 h11==0.16.0 @@ -27,6 +31,7 @@ hyperframe==6.1.0 idna==3.11 jsonschema==4.26.0 jsonschema-specifications==2025.9.1 +Markdown==3.10.2 markdown-it-py==4.0.0 matrix-nio==0.25.2 mdurl==0.1.2 @@ -35,10 +40,14 @@ multidict==6.7.1 numpy==2.4.3 onnxruntime==1.24.4 packaging==26.0 +pillow==12.1.1 propcache==0.4.1 protobuf==7.34.1 +pycparser==3.0 pycryptodome==3.23.0 +pydyf==0.12.1 Pygments==2.19.2 +pyphen==0.17.2 python-dotenv==1.2.2 python-socks==2.8.1 PyYAML==6.0.3 @@ -48,9 +57,14 @@ rpds-py==0.30.0 setuptools==82.0.1 shellingham==1.5.4 sympy==1.14.0 +tinycss2==1.5.1 +tinyhtml5==2.1.0 tokenizers==0.22.2 tqdm==4.67.3 typer==0.24.1 typing_extensions==4.15.0 unpaddedbase64==2.1.0 +weasyprint==68.1 +webencodings==0.5.1 yarl==1.23.0 +zopfli==0.4.1 From 806e85fcec7751e5f3cd4c75840471ad45d37afc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=BB=D0=BE=D0=BD=D0=BE=D0=B2=D0=B0=20=D0=90=D0=BD?= =?UTF-8?q?=D0=BD=D0=B0?= Date: Thu, 26 Mar 2026 01:00:46 +0300 Subject: [PATCH 4/5] Add qwen settings to env.example --- .env.example | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index cbd76b2..b7c5249 100644 --- a/.env.example +++ b/.env.example @@ -12,4 +12,9 @@ ALLOWED_ROOMS=!roomid1:matrix.org,!roomid2:matrix.org # Whisper settings WHISPER_LANGUAGE=ru -WHISPER_MODEL=small \ No newline at end of file +WHISPER_MODEL=small + +#Qwen settings +QWEN_API_KEY=your_api_key +QWEN_ENDPOINT=your_endpoint +QWEN_MODEL=qwen_model \ No newline at end of file From 4f360d35b20b3e0cc6e5550f775da0bfcfb60b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=BB=D0=BE=D0=BD=D0=BE=D0=B2=D0=B0=20=D0=90=D0=BD?= =?UTF-8?q?=D0=BD=D0=B0?= Date: Wed, 1 Apr 2026 01:35:03 +0300 Subject: [PATCH 5/5] Do quality assessment --- base_prompt.txt | 3 ++- main.py | 10 ++++++++++ quality_prompt.txt | 9 +++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 quality_prompt.txt diff --git a/base_prompt.txt b/base_prompt.txt index 303ae00..6a618ec 100644 --- a/base_prompt.txt +++ b/base_prompt.txt @@ -15,4 +15,5 @@ текстовая информация. Размер шрифт должен быть не крупным, но удобно читаемым (около 12 пунктов). Фон ВСЕГДА должен быть исключительно белым. Если это требуется, ответ может быть достаточно большим по размеру. Не ограничивай себя в длине -ответа, но и ненужную информацию оставлять не нужно. \ No newline at end of file +ответа, но и ненужную информацию оставлять не нужно. +Если какой-либо информации в тексте сообщений/аудио нет - НЕ ДОДУМЫВАЙ её, а обозначь её отсутствие. \ No newline at end of file diff --git a/main.py b/main.py index 440ae13..950553e 100644 --- a/main.py +++ b/main.py @@ -44,6 +44,10 @@ QWEN_PROMPT_TEMPLATE = "" with open("base_prompt.txt", "r") as f: QWEN_PROMPT_TEMPLATE += f.read() +QWEN_QUALITY_PROMPT_TEMPLATE = "" +with open("quality_prompt.txt", "r") as f: + QWEN_QUALITY_PROMPT_TEMPLATE += f.read() + TEMP_DIR = tempfile.gettempdir() GROUPING_TIMEOUT = 15.0 @@ -211,6 +215,11 @@ async def generate_report(text: str, images_data: list, audios_data: list) -> Op if report.startswith("Ошибка:"): print(f"[QWEN] Ошибка API: {report}") return None + + quality_prompt = f"{QWEN_QUALITY_PROMPT_TEMPLATE}\n Исходные данные: {full_text} Отчёт: {report}" + quality_report = await call_qwen_api(quality_prompt) + print(f"[QWEN] Оценка качества: {quality_report}") + report = report.replace('```html', '') report = report.replace('```', '') @@ -238,6 +247,7 @@ async def process_audio(audio_data: Dict) -> str: if text is None: print("[AUDIO] Распознавание не удалось.") return "" + print(f"[AUDIO] Распознанный текст: {text[:300]}...") return text diff --git a/quality_prompt.txt b/quality_prompt.txt new file mode 100644 index 0000000..e0659af --- /dev/null +++ b/quality_prompt.txt @@ -0,0 +1,9 @@ +Тебе дан текст расшифровки аудиозаписи рабочей встречи в IT-компании и сопутствующие +сообщения пользователя. Далее тебе дан отчёт по этой встрече, автоматически сгенерированный ИИ-агентом +на основе этого текста и сообщений пользователя. +Твоя задача - оценить качество работы агента на основе исходного текста и написанного отчёта. +Сперва дай оценку качества по шкале от 0 до 100. Далее, если есть проблемы, выдели их. +Проверь, нет ли противоречий в тексте отчёта и исходном, не упущена ли важная информация и т.д. +Укажи на имеющиеся ошибки/недочёты, если они есть, и предположи, с чем они могут быть связаны и как +их исправить. +Твой ответ должен быть удобочитаемым в .txt формате. \ No newline at end of file