add counts to each student + OCR score

2026-04-19 22:25:44 +03:00 · 2026-04-19 22:25:44 +03:00 · dcc36f8f26
commit dcc36f8f26
parent 811e4d3ffa
257 changed files with 12550 additions and 93 deletions
--- a/ege-checker.py
+++ b/ege-checker.py
@ -1,38 +1,41 @@
 import os
 import base64
+import asyncio
 import time
+import io
 from dotenv import load_dotenv
-from openai import OpenAI
+from openai import AsyncOpenAI  # Переходим на АСИНХРОННЫЙ
 import httpx
 from PIL import Image

 load_dotenv()

-MODELS_TO_TEST = [
-    "qwen3.5-122b"
-]
+# Оставляем только Qwen, как ты и хотел
+MODELS_TO_TEST = ["qwen3.5-122b"]

-client = OpenAI(
-    api_key=os.getenv("API_KEY"),
-    base_url=os.getenv("LAOZHANG_BASE_URL"),
-    http_client=httpx.Client(timeout=httpx.Timeout(600.0, connect=30.0))
+# Асинхронный клиент с запасом по таймауту
+client = AsyncOpenAI(
+    api_key=os.getenv("QWEN_API_KEY"),
+    base_url=os.getenv("QWEN_BASE_URL"),
+    http_client=httpx.AsyncClient(timeout=httpx.Timeout(900.0, connect=60.0))
 )


 def encode_image(image_path):
-    """Кодирует изображение в base64, конвертируя TIFF в JPEG при необходимости"""
-    if image_path.lower().endswith('.tif') or image_path.lower().endswith('.tiff'):
-        with Image.open(image_path) as img:
-            if img.mode not in ('RGB', 'L'):
-                img = img.convert('RGB')
-            import io
-            buffer = io.BytesIO()
-            img.save(buffer, format='JPEG', quality=95)
-            buffer.seek(0)
-            return base64.b64encode(buffer.read()).decode('utf-8')
-    else:
-        with open(image_path, "rb") as img:
-            return base64.b64encode(img.read()).decode('utf-8')
+    """Сжимает и кодирует изображение, чтобы избежать Connection Error"""
+    with Image.open(image_path) as img:
+        # Конвертируем в RGB если нужно
+        if img.mode not in ('RGB', 'L'):
+            img = img.convert('RGB')
+
+        # Уменьшаем размер до разумного (max 1600px по широкой стороне)
+        # Это критически важно для стабильности соединения!
+        img.thumbnail((1600, 1600))
+
+        buffer = io.BytesIO()
+        # Сжатие 85% - золотая середина
+        img.save(buffer, format='JPEG', quality=85)
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')


 def get_instructions(criteria_file):
@ -44,81 +47,96 @@ def get_instructions(criteria_file):
    return f"{skill}\n\n{criteria}"


-def run_mass_check(base_dir="photo", criteria_file="russian-essay-criteria.md"):
-    students = [d for d in os.listdir(
-        base_dir) if os.path.isdir(os.path.join(base_dir, d))]
-
-    if not students:
-        print("[!] В папке photo пусто. Создай там папки с именами учеников.")
+async def process_student(student, student_path, instructions):
+    """Логика проверки одного студента"""
+    # Поиск фото
+    photos = sorted([f for f in os.listdir(student_path)
+                    if f.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff'))])
+    if not photos:
        return

-    print(f"=== ЗАПУСК МАССОВОЙ ПРОВЕРКИ: {len(students)} учеников ===")
-    instructions = get_instructions(criteria_file)
+    # ПОИСК ИСХОДНОГО ТЕКСТА (source.txt)
+    source_text = ""
+    source_path = os.path.join(student_path, "source.txt")
+    if os.path.exists(source_path):
+        with open(source_path, "r", encoding="utf-8") as f:
+            source_text = f.read()

-    for student in students:
-        student_path = os.path.join(base_dir, student)
-        photos = sorted([f for f in os.listdir(student_path)
-                        if f.lower().endswith(('.jpg', '.jpeg', '.png', '.tif', '.tiff'))])
+    print(f"\n>>> РАБОТАЕМ С: {student.upper()} ({len(photos)} листа)")

-        if not photos:
-            print(f"[SKIP] У {student} нет фото.")
-            continue
+    prompt_text = "Распознай текст и проверь сочинение строго по критериям."
+    if source_text:
+        prompt_text += f"\n\nИСХОДНЫЙ ТЕКСТ ДЛЯ СВЕРКИ:\n{source_text}"

-        print(f"\n>>> РАБОТАЕМ С: {student.upper()} ({len(photos)} листа)")
+    message_content = [{"type": "text", "text": prompt_text}]

-        message_content = [
-            {"type": "text", "text": "Распознай рукописный текст и проверь сочинение строго по критериям ФИПИ."}]
-
-        for p in photos:
+    # Кодируем фото (теперь со сжатием!)
+    for p in photos:
+        try:
            b64 = encode_image(os.path.join(student_path, p))
            message_content.append({
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{b64}"}
            })
+        except Exception as e:
+            print(f"  [!] Ошибка обработки фото {p}: {e}")

-        for model_id in MODELS_TO_TEST:
-            safe_name = model_id.replace("/", "_")
+    for model_id in MODELS_TO_TEST:
+        safe_name = model_id.replace("/", "_")
+        output_file = os.path.join(student_path, f"REPORT_{safe_name}.md")

-            # ✅ ОТРЕДАКТИРОВАНО: отчёт сохраняется ВНУТРЬ папки ученика
-            output_file = os.path.join(student_path, f"REPORT_{safe_name}.md")
+        # 1. Формируем базовое имя, которое мы ХОТИМ создать
+        safe_name = model_id.replace("/", "_")
+        output_file = os.path.join(student_path, f"REPORT_{safe_name}.md")

-            old_file = os.path.join(
-                student_path, f"REPORT_{student}_{safe_name}.md")
+        # 2. УМНЫЙ ПОИСК: проверяем, есть ли ВООБЩЕ любой файл, начинающийся на REPORT_ и содержащий имя модели
+        existing_reports = [f for f in os.listdir(student_path)
+                            if f.startswith("REPORT_") and safe_name in f]

-            new_file = os.path.join(student_path, f"REPORT_{safe_name}.md")
+        if existing_reports:
+            print(
+                f"  [-] {model_id}: Уже есть отчет ({existing_reports[0]}). Пропускаю.")
+            continue

-            if os.path.exists(old_file) or os.path.exists(new_file):
-                print(f"  [-] {model_id}: Уже проверено.")
-                continue
+        print(f"  [!] Запуск {model_id}...")
+        start_time = time.time()

-            print(f"  [!] Запуск {model_id}...")
-            start_time = time.time()
+        try:
+            # Асинхронный вызов
+            response = await client.chat.completions.create(
+                model=model_id,
+                messages=[
+                    {"role": "system", "content": instructions},
+                    {"role": "user", "content": message_content}
+                ],
+                temperature=0.0
+            )

-            try:
-                response = client.chat.completions.create(
-                    model=model_id,
-                    messages=[
-                        {"role": "system", "content": instructions},
-                        {"role": "user", "content": message_content}
-                    ],
-                    temperature=0.0
-                )
+            res_text = response.choices[0].message.content
+            duration = round(time.time() - start_time, 1)

-                res_text = response.choices[0].message.content
-                duration = round(time.time() - start_time, 1)
+            with open(output_file, "w", encoding="utf-8") as f:
+                header = f"--- \n**Ученик:** {student}\n**Модель:** {model_id}\n**Время:** {duration} сек.\n---\n\n"
+                f.write(header + res_text)

-                with open(output_file, "w", encoding="utf-8") as f:
-                    header = f"--- \n**Ученик:** {student}\n**Модель:** {model_id}\n**Время:** {duration} сек.\n---\n\n"
-                    f.write(header + res_text)
+            print(f"  [OK] Готово! ({duration} сек.)")
+            await asyncio.sleep(10)  # Пауза между запросами

-                print(f"  [OK] Готово! ({duration} сек.)")
-                print(f"  📁 Сохранено: {output_file}")
-                time.sleep(12)
+        except Exception as e:
+            print(f"  [ERR] Ошибка у {model_id} для {student}: {str(e)}")

-            except Exception as e:
-                print(f"  [ERR] Ошибка у {model_id}: {str(e)}")

+async def run_mass_check(base_dir="photo", criteria_file="russian-essay-criteria.md"):
+    students = [d for d in os.listdir(
+        base_dir) if os.path.isdir(os.path.join(base_dir, d))]
+    if not students:
+        return
+
+    instructions = get_instructions(criteria_file)
+
+    # Обрабатываем по одному, чтобы не перегружать канал и не ловить Connection Error
+    for student in students:
+        await process_student(student, os.path.join(base_dir, student), instructions)

 if __name__ == "__main__":
-    run_mass_check()
-    print("\n=== ВСЕ ПРОВЕРКИ ВЫПОЛНЕНЫ ===")
+    asyncio.run(run_mass_check())