auto-report-skill/scripts/transcribe.sh

#!/bin/bash
# Transcribe audio recordings using local faster-whisper
# Supports multiple sources: Zoom H2n (4ch WAV), Saramonic (mono WAV), etc.
#
# Usage:
#   ./transcribe.sh /absolute/path/to/meeting_folder
#   ./transcribe.sh /absolute/path/to/meeting_folder specific.WAV output_name
#
# Examples:
#   ./transcribe.sh /app/hermes_data/meetings/2026-02-18
#   ./transcribe.sh /app/hermes_data/meetings/2026-02-18 SR003XY.WAV h2n_xy

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
WHISPER_MODEL="base"
LANGUAGE="ru"

# Load hotwords
HOTWORDS_FILE="$SCRIPT_DIR/hotwords.txt"
if [ -f "$HOTWORDS_FILE" ]; then
    HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//')
    echo "Loaded hotwords from $HOTWORDS_FILE"
else
    HOTWORDS=""
    echo "Warning: hotwords.txt not found, proceeding without hotwords"
fi

if [ $# -lt 1 ]; then
    echo "Usage: $0 <absolute_meeting_dir> [<file.WAV> <output_name>]"
    echo "Example: $0 /app/hermes_data/meetings/2026-02-18"
    exit 1
fi

MEETING_DIR="$1"
if [[ "$MEETING_DIR" != /* ]]; then
    MEETING_DIR="$(realpath "$MEETING_DIR")"
else
    MEETING_DIR="$(realpath "$MEETING_DIR")"
fi

WORK_DIR="$MEETING_DIR"
OUTPUT_DIR="$WORK_DIR/transcription"
mkdir -p "$OUTPUT_DIR"

# Function: convert WAV(s) to mono mp3
convert_to_mp3() {
    local output_mp3="$1"
    shift
    local inputs=("$@")

    if [ -f "$output_mp3" ]; then
        echo "  $output_mp3 already exists, skipping conversion"
        return
    fi

    if [ ${#inputs[@]} -eq 1 ]; then
        echo "  Converting ${inputs[0]} -> $output_mp3"
        ffmpeg -y -i "${inputs[0]}" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
    else
        local listfile
        listfile=$(mktemp /tmp/ffmpeg_concat_XXXXXX.txt)
        for f in "${inputs[@]}"; do
            echo "file '$f'" >> "$listfile"
        done
        echo "  Concatenating ${#inputs[@]} files -> $output_mp3"
        ffmpeg -y -f concat -safe 0 -i "$listfile" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
        rm -f "$listfile"
    fi

    local dur
    dur=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$output_mp3" | cut -d. -f1)
    echo "  Duration: ${dur}s ($(( dur / 60 ))m$(( dur % 60 ))s)"
}

# Function: transcribe using local faster-whisper (with chunking if needed)
transcribe_file() {
    local mp3_file="$1"
    local name="$2"
    local json_file="$OUTPUT_DIR/${name}.json"

    if [ -f "$json_file" ]; then
        echo "  $name already transcribed, skipping"
        return
    fi

    # Check duration of mp3
    local duration=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$mp3_file" | cut -d. -f1)
    if [ "$duration" -gt 1800 ]; then  # > 30 minutes
        echo "  Audio is ${duration}s long (>30 min), using chunked transcription..."
        bash "$SCRIPT_DIR/transcribe_chunked.sh" "$mp3_file" "$name" "$OUTPUT_DIR"
        return
    fi

    echo "  Transcribing $name (local faster-whisper)..."
    local started
    started=$(date +%s)

    MKL_SERVICE_FORCE_INTEL=1 OMP_NUM_THREADS=2 python3 "$SCRIPT_DIR/local_whisper.py" "$mp3_file" "$json_file" "$WHISPER_MODEL" "$HOTWORDS"

    local elapsed=$(( $(date +%s) - started ))
    echo "    Done in ${elapsed}s"

    # Extract plain text and timestamped text
    python3 - "$json_file" "$OUTPUT_DIR" "$name" <<'PYEOF'
import json, sys, os

json_path = sys.argv[1]
output_dir = sys.argv[2]
name = sys.argv[3]

with open(json_path) as f:
    data = json.load(f)

segs = data.get("segments", [])

# Timestamped text
lines = []
for seg in segs:
    start = seg.get("start", 0)
    h, m, s = int(start // 3600), int((start % 3600) // 60), int(start % 60)
    lines.append(f"[{h:02d}:{m:02d}:{s:02d}] {seg['text'].strip()}")

txt_path = os.path.join(output_dir, f"{name}.txt")
with open(txt_path, "w") as f:
    f.write("\n".join(lines))

# Plain text
plain = " ".join(seg["text"].strip() for seg in segs)
plain_path = os.path.join(output_dir, f"{name}_plain.txt")
with open(plain_path, "w") as f:
    f.write(plain)

print(f"    {len(segs)} segments, {len(plain)} chars")
PYEOF
}

# Manual mode: specific file
if [ $# -ge 3 ]; then
    WAV_FILE="$WORK_DIR/$2"
    NAME="$3"
    MP3_FILE="$OUTPUT_DIR/${NAME}.mp3"

    echo "=== Transcribing $2 as '$NAME' ==="
    convert_to_mp3 "$MP3_FILE" "$WAV_FILE"
    transcribe_file "$MP3_FILE" "$NAME"
    echo "=== Done ==="
    exit 0
fi

# Auto mode: detect and transcribe all sources
echo "=== Auto-detecting audio sources in $WORK_DIR ==="

# Detect H2n files (SR*XY.WAV, SR*MS.WAV)
H2N_XY=$(find "$WORK_DIR" -maxdepth 1 -name "SR*XY.WAV" | head -1)
H2N_MS=$(find "$WORK_DIR" -maxdepth 1 -name "SR*MS.WAV" | head -1)

# Detect Saramonic / other timestamped WAV files (not SR*)
mapfile -t SARAMONIC_FILES < <(find "$WORK_DIR" -maxdepth 1 -name "*.WAV" ! -name "SR*" | sort)

SOURCES=()

if [ -n "$H2N_XY" ]; then
    echo "  Found H2n XY: $(basename "$H2N_XY")"
    SOURCES+=("h2n_xy:$H2N_XY")
fi
if [ -n "$H2N_MS" ]; then
    echo "  Found H2n MS: $(basename "$H2N_MS")"
    SOURCES+=("h2n_ms:$H2N_MS")
fi
if [ ${#SARAMONIC_FILES[@]} -gt 0 ]; then
    echo "  Found Saramonic files: ${SARAMONIC_FILES[*]##*/}"
    joined=$(printf "|%s" "${SARAMONIC_FILES[@]}")
    joined="${joined:1}"
    SOURCES+=("saramonic:$joined")
fi

if [ ${#SOURCES[@]} -eq 0 ]; then
    echo "Error: No WAV files found in $WORK_DIR"
    exit 1
fi

echo ""
echo "=== Step 1: Converting to mp3 ==="
for entry in "${SOURCES[@]}"; do
    name="${entry%%:*}"
    paths="${entry#*:}"
    mp3="$OUTPUT_DIR/${name}.mp3"

    IFS='|' read -ra files <<< "$paths"
    convert_to_mp3 "$mp3" "${files[@]}"
done

echo ""
echo "=== Step 2: Transcribing ==="
for entry in "${SOURCES[@]}"; do
    name="${entry%%:*}"
    mp3="$OUTPUT_DIR/${name}.mp3"
    transcribe_file "$mp3" "$name"
done

echo ""
echo "=== Done! ==="
echo "Results in: $OUTPUT_DIR/"
for entry in "${SOURCES[@]}"; do
    name="${entry%%:*}"
    echo "  ${name}.json        - whisper JSON with segments"
    echo "  ${name}.txt         - timestamped transcription"
    echo "  ${name}_plain.txt   - plain text"
done