#!/bin/bash # Transcribe audio recordings using local faster-whisper # Supports multiple sources: Zoom H2n (4ch WAV), Saramonic (mono WAV), etc. # # Usage: # ./transcribe.sh /absolute/path/to/meeting_folder # ./transcribe.sh /absolute/path/to/meeting_folder specific.WAV output_name # # Examples: # ./transcribe.sh /app/hermes_data/meetings/2026-02-18 # ./transcribe.sh /app/hermes_data/meetings/2026-02-18 SR003XY.WAV h2n_xy set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" WHISPER_MODEL="base" LANGUAGE="ru" # Load hotwords HOTWORDS_FILE="$SCRIPT_DIR/hotwords.txt" if [ -f "$HOTWORDS_FILE" ]; then HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//') echo "Loaded hotwords from $HOTWORDS_FILE" else HOTWORDS="" echo "Warning: hotwords.txt not found, proceeding without hotwords" fi if [ $# -lt 1 ]; then echo "Usage: $0 [ ]" echo "Example: $0 /app/hermes_data/meetings/2026-02-18" exit 1 fi MEETING_DIR="$1" if [[ "$MEETING_DIR" != /* ]]; then MEETING_DIR="$(realpath "$MEETING_DIR")" else MEETING_DIR="$(realpath "$MEETING_DIR")" fi WORK_DIR="$MEETING_DIR" OUTPUT_DIR="$WORK_DIR/transcription" mkdir -p "$OUTPUT_DIR" # Function: convert WAV(s) to mono mp3 convert_to_mp3() { local output_mp3="$1" shift local inputs=("$@") if [ -f "$output_mp3" ]; then echo " $output_mp3 already exists, skipping conversion" return fi if [ ${#inputs[@]} -eq 1 ]; then echo " Converting ${inputs[0]} -> $output_mp3" ffmpeg -y -i "${inputs[0]}" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null else local listfile listfile=$(mktemp /tmp/ffmpeg_concat_XXXXXX.txt) for f in "${inputs[@]}"; do echo "file '$f'" >> "$listfile" done echo " Concatenating ${#inputs[@]} files -> $output_mp3" ffmpeg -y -f concat -safe 0 -i "$listfile" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null rm -f "$listfile" fi local dur dur=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$output_mp3" | cut -d. -f1) echo " Duration: ${dur}s ($(( dur / 60 ))m$(( dur % 60 ))s)" } # Function: transcribe using local faster-whisper (with chunking if needed) transcribe_file() { local mp3_file="$1" local name="$2" local json_file="$OUTPUT_DIR/${name}.json" if [ -f "$json_file" ]; then echo " $name already transcribed, skipping" return fi # Check duration of mp3 local duration=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$mp3_file" | cut -d. -f1) if [ "$duration" -gt 1800 ]; then # > 30 minutes echo " Audio is ${duration}s long (>30 min), using chunked transcription..." bash "$SCRIPT_DIR/transcribe_chunked.sh" "$mp3_file" "$name" "$OUTPUT_DIR" return fi echo " Transcribing $name (local faster-whisper)..." local started started=$(date +%s) MKL_SERVICE_FORCE_INTEL=1 OMP_NUM_THREADS=2 python3 "$SCRIPT_DIR/local_whisper.py" "$mp3_file" "$json_file" "$WHISPER_MODEL" "$HOTWORDS" local elapsed=$(( $(date +%s) - started )) echo " Done in ${elapsed}s" # Extract plain text and timestamped text python3 - "$json_file" "$OUTPUT_DIR" "$name" <<'PYEOF' import json, sys, os json_path = sys.argv[1] output_dir = sys.argv[2] name = sys.argv[3] with open(json_path) as f: data = json.load(f) segs = data.get("segments", []) # Timestamped text lines = [] for seg in segs: start = seg.get("start", 0) h, m, s = int(start // 3600), int((start % 3600) // 60), int(start % 60) lines.append(f"[{h:02d}:{m:02d}:{s:02d}] {seg['text'].strip()}") txt_path = os.path.join(output_dir, f"{name}.txt") with open(txt_path, "w") as f: f.write("\n".join(lines)) # Plain text plain = " ".join(seg["text"].strip() for seg in segs) plain_path = os.path.join(output_dir, f"{name}_plain.txt") with open(plain_path, "w") as f: f.write(plain) print(f" {len(segs)} segments, {len(plain)} chars") PYEOF } # Manual mode: specific file if [ $# -ge 3 ]; then WAV_FILE="$WORK_DIR/$2" NAME="$3" MP3_FILE="$OUTPUT_DIR/${NAME}.mp3" echo "=== Transcribing $2 as '$NAME' ===" convert_to_mp3 "$MP3_FILE" "$WAV_FILE" transcribe_file "$MP3_FILE" "$NAME" echo "=== Done ===" exit 0 fi # Auto mode: detect and transcribe all sources echo "=== Auto-detecting audio sources in $WORK_DIR ===" # Detect H2n files (SR*XY.WAV, SR*MS.WAV) H2N_XY=$(find "$WORK_DIR" -maxdepth 1 -name "SR*XY.WAV" | head -1) H2N_MS=$(find "$WORK_DIR" -maxdepth 1 -name "SR*MS.WAV" | head -1) # Detect Saramonic / other timestamped WAV files (not SR*) mapfile -t SARAMONIC_FILES < <(find "$WORK_DIR" -maxdepth 1 -name "*.WAV" ! -name "SR*" | sort) SOURCES=() if [ -n "$H2N_XY" ]; then echo " Found H2n XY: $(basename "$H2N_XY")" SOURCES+=("h2n_xy:$H2N_XY") fi if [ -n "$H2N_MS" ]; then echo " Found H2n MS: $(basename "$H2N_MS")" SOURCES+=("h2n_ms:$H2N_MS") fi if [ ${#SARAMONIC_FILES[@]} -gt 0 ]; then echo " Found Saramonic files: ${SARAMONIC_FILES[*]##*/}" joined=$(printf "|%s" "${SARAMONIC_FILES[@]}") joined="${joined:1}" SOURCES+=("saramonic:$joined") fi if [ ${#SOURCES[@]} -eq 0 ]; then echo "Error: No WAV files found in $WORK_DIR" exit 1 fi echo "" echo "=== Step 1: Converting to mp3 ===" for entry in "${SOURCES[@]}"; do name="${entry%%:*}" paths="${entry#*:}" mp3="$OUTPUT_DIR/${name}.mp3" IFS='|' read -ra files <<< "$paths" convert_to_mp3 "$mp3" "${files[@]}" done echo "" echo "=== Step 2: Transcribing ===" for entry in "${SOURCES[@]}"; do name="${entry%%:*}" mp3="$OUTPUT_DIR/${name}.mp3" transcribe_file "$mp3" "$name" done echo "" echo "=== Done! ===" echo "Results in: $OUTPUT_DIR/" for entry in "${SOURCES[@]}"; do name="${entry%%:*}" echo " ${name}.json - whisper JSON with segments" echo " ${name}.txt - timestamped transcription" echo " ${name}_plain.txt - plain text" done