Make scripts better

2026-04-30 10:57:18 +03:00 · 2026-04-30 10:57:18 +03:00 · e8ad7df469
commit e8ad7df469
parent ba56147e95
12 changed files with 614 additions and 432 deletions
--- a/scripts/transcribe.sh
+++ b/scripts/transcribe.sh
@ -1,59 +1,42 @@
 #!/bin/bash
-# Transcribe audio recordings using local faster-whisper
-# Supports multiple sources: Zoom H2n (4ch WAV), Saramonic (mono WAV), etc.
-#
-# Usage:
-#   ./transcribe.sh /absolute/path/to/meeting_folder
-#   ./transcribe.sh /absolute/path/to/meeting_folder specific.WAV output_name
-#
-# Examples:
-#   ./transcribe.sh /app/hermes_data/meetings/2026-02-18
-#   ./transcribe.sh /app/hermes_data/meetings/2026-02-18 SR003XY.WAV h2n_xy
+# Transcribe audio recordings using local whisper server (with API key)
+# Usage: ./transcribe.sh <meeting_dir> [<file.WAV> <output_name>]

 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-WHISPER_MODEL="base"
+: ${WHISPER_URL:?ERROR: WHISPER_URL not set (e.g., https://llm.lambda.coredump.ru/v1)}
+: ${WHISPER_API_KEY:?ERROR: WHISPER_API_KEY not set}
+MODEL="whisper-1"  # Изменено с конкретной модели на общее название
 LANGUAGE="ru"

-# Load hotwords
-HOTWORDS_FILE="$SCRIPT_DIR/hotwords.txt"
+# Hotwords
+HOTWORDS_FILE="${HOTWORKS_PATH:-$SCRIPT_DIR/hotwords.txt}"
+HOTWORDS=""
 if [ -f "$HOTWORDS_FILE" ]; then
    HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//')
    echo "Loaded hotwords from $HOTWORDS_FILE"
-else
-    HOTWORDS=""
-    echo "Warning: hotwords.txt not found, proceeding without hotwords"
 fi

+# ---------- argument parsing ----------
 if [ $# -lt 1 ]; then
-    echo "Usage: $0 <absolute_meeting_dir> [<file.WAV> <output_name>]"
-    echo "Example: $0 /app/hermes_data/meetings/2026-02-18"
+    echo "Usage: $0 <meeting_dir> [<file.WAV> <output_name>]"
    exit 1
 fi

 MEETING_DIR="$1"
-if [[ "$MEETING_DIR" != /* ]]; then
-    MEETING_DIR="$(realpath "$MEETING_DIR")"
-else
-    MEETING_DIR="$(realpath "$MEETING_DIR")"
-fi
-
-WORK_DIR="$MEETING_DIR"
+WORK_DIR="$(cd "$SCRIPT_DIR/$MEETING_DIR" && pwd)"
 OUTPUT_DIR="$WORK_DIR/transcription"
 mkdir -p "$OUTPUT_DIR"

-# Function: convert WAV(s) to mono mp3
 convert_to_mp3() {
    local output_mp3="$1"
    shift
    local inputs=("$@")
-
    if [ -f "$output_mp3" ]; then
        echo "  $output_mp3 already exists, skipping conversion"
        return
    fi
-
    if [ ${#inputs[@]} -eq 1 ]; then
        echo "  Converting ${inputs[0]} -> $output_mp3"
        ffmpeg -y -i "${inputs[0]}" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
@ -67,13 +50,8 @@ convert_to_mp3() {
        ffmpeg -y -f concat -safe 0 -i "$listfile" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
        rm -f "$listfile"
    fi
-
-    local dur
-    dur=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$output_mp3" | cut -d. -f1)
-    echo "  Duration: ${dur}s ($(( dur / 60 ))m$(( dur % 60 ))s)"
 }

-# Function: transcribe using local faster-whisper (with chunking if needed)
 transcribe_file() {
    local mp3_file="$1"
    local name="$2"
@ -84,21 +62,42 @@ transcribe_file() {
        return
    fi

-    # Check duration of mp3
-    local duration=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$mp3_file" | cut -d. -f1)
-    if [ "$duration" -gt 1800 ]; then  # > 30 minutes
-        echo "  Audio is ${duration}s long (>30 min), using chunked transcription..."
-        bash "$SCRIPT_DIR/transcribe_chunked.sh" "$mp3_file" "$name" "$OUTPUT_DIR"
-        return
-    fi
-
-    echo "  Transcribing $name (local faster-whisper)..."
+    echo "  Transcribing $name..."
    local started
    started=$(date +%s)

-    MKL_SERVICE_FORCE_INTEL=1 OMP_NUM_THREADS=2 python3 "$SCRIPT_DIR/local_whisper.py" "$mp3_file" "$json_file" "$WHISPER_MODEL" "$HOTWORDS"
+    # Form the full URL for transcription
+    local full_url="${WHISPER_URL}/audio/transcriptions"

+    local curl_args=(
+        -s -w "%{http_code}" -o "$json_file"
+        -X POST "$full_url"
+        -H "Authorization: Bearer $WHISPER_API_KEY"
+        -F "file=@${mp3_file}"
+        -F "model=${MODEL}"
+        -F "language=${LANGUAGE}"
+        -F "response_format=verbose_json"
+        -F "temperature=0.0"
+        --max-time 3600
+    )
+    if [ -n "$HOTWORDS" ]; then
+        curl_args+=(-F "hotwords=${HOTWORDS}")
+    fi
+
+    local http_code
+    http_code=$(curl "${curl_args[@]}")
    local elapsed=$(( $(date +%s) - started ))
+
+    if [ "$http_code" != "200" ]; then
+        echo "    ERROR: HTTP $http_code"
+        # Display error response body for debugging
+        if [ -f "$json_file" ]; then
+            cat "$json_file"
+        fi
+        rm -f "$json_file"
+        return 1
+    fi
+
    echo "    Done in ${elapsed}s"

    # Extract plain text and timestamped text
@ -135,7 +134,7 @@ print(f"    {len(segs)} segments, {len(plain)} chars")
 PYEOF
 }

-# Manual mode: specific file
+# ---------- manual mode (specific file) ----------
 if [ $# -ge 3 ]; then
    WAV_FILE="$WORK_DIR/$2"
    NAME="$3"
@ -148,14 +147,11 @@ if [ $# -ge 3 ]; then
    exit 0
 fi

-# Auto mode: detect and transcribe all sources
+# ---------- auto mode ----------
 echo "=== Auto-detecting audio sources in $WORK_DIR ==="

-# Detect H2n files (SR*XY.WAV, SR*MS.WAV)
 H2N_XY=$(find "$WORK_DIR" -maxdepth 1 -name "SR*XY.WAV" | head -1)
 H2N_MS=$(find "$WORK_DIR" -maxdepth 1 -name "SR*MS.WAV" | head -1)
-
-# Detect Saramonic / other timestamped WAV files (not SR*)
 mapfile -t SARAMONIC_FILES < <(find "$WORK_DIR" -maxdepth 1 -name "*.WAV" ! -name "SR*" | sort)

 SOURCES=()
@ -201,10 +197,4 @@ done

 echo ""
 echo "=== Done! ==="
-echo "Results in: $OUTPUT_DIR/"
-for entry in "${SOURCES[@]}"; do
-    name="${entry%%:*}"
-    echo "  ${name}.json        - whisper JSON with segments"
-    echo "  ${name}.txt         - timestamped transcription"
-    echo "  ${name}_plain.txt   - plain text"
-done
+echo "Results in: $OUTPUT_DIR/"