#!/bin/bash # Transcribe long audio by splitting at silence boundaries # Uses Whisper API with authentication set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" : ${WHISPER_URL:?ERROR: WHISPER_URL not set} : ${WHISPER_API_KEY:?ERROR: WHISPER_API_KEY not set} MODEL="whisper-1" # Изменено с конкретной модели на общее название LANGUAGE="ru" TARGET_CHUNK=600 HOTWORDS_FILE="${HOTWORKS_PATH:-$SCRIPT_DIR/hotwords.txt}" HOTWORDS="" if [ -f "$HOTWORDS_FILE" ]; then HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//') fi MP3_FILE="$1" NAME="$2" OUTPUT_DIR="$3" CHUNKS_DIR="$OUTPUT_DIR/chunks_${NAME}" if [ -f "$OUTPUT_DIR/${NAME}.json" ]; then echo "$NAME already transcribed, skipping" exit 0 fi mkdir -p "$CHUNKS_DIR" DURATION=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$MP3_FILE" | cut -d. -f1) echo "=== Chunked transcription: $NAME ($DURATION s / $((DURATION/60))m) ===" # Find silence gaps SILENCES_FILE="$CHUNKS_DIR/silences.txt" if [ ! -f "$SILENCES_FILE" ]; then ffmpeg -i "$MP3_FILE" -af "silencedetect=noise=-35dB:d=0.5" -f null - 2>&1 \ | grep "silence_end" \ | sed 's/.*silence_end: \([0-9.]*\).*/\1/' \ > "$SILENCES_FILE" fi echo " Found $(wc -l < "$SILENCES_FILE") silence gaps" # Compute split points SPLIT_POINTS=$(python3 - "$SILENCES_FILE" "$TARGET_CHUNK" "$DURATION" <<'PYEOF' import sys silences_file = sys.argv[1] target = float(sys.argv[2]) duration = float(sys.argv[3]) with open(silences_file) as f: silences = [float(line.strip()) for line in f if line.strip()] if not silences: n = max(2, int(duration / target)) splits = [duration * i / n for i in range(1, n)] else: splits = [] t = target while t < duration - 30: best = min(silences, key=lambda s: abs(s - t)) if not splits or best > splits[-1] + 30: splits.append(best) t += target print(" ".join(f"{s:.2f}" for s in splits)) PYEOF ) IFS=' ' read -ra POINTS <<< "$SPLIT_POINTS" N_CHUNKS=$((${#POINTS[@]} + 1)) echo " Will create $N_CHUNKS chunks" # Split audio PREV=0 for i in $(seq 0 $((N_CHUNKS - 1))); do CHUNK_FILE="$CHUNKS_DIR/chunk_$(printf '%03d' $i).mp3" if [ -f "$CHUNK_FILE" ] && [ $(stat -c%s "$CHUNK_FILE") -gt 1000 ]; then if [ $i -lt ${#POINTS[@]} ]; then PREV="${POINTS[$i]}" fi echo " chunk_$(printf '%03d' $i): exists, skipping" continue fi if [ $i -lt ${#POINTS[@]} ]; then END="${POINTS[$i]}" DUR=$(python3 -c "print(f'{$END - $PREV:.2f}')") ffmpeg -y -i "$MP3_FILE" -ss "$PREV" -t "$DUR" -c copy "$CHUNK_FILE" 2>/dev/null PREV="$END" else ffmpeg -y -i "$MP3_FILE" -ss "$PREV" -c copy "$CHUNK_FILE" 2>/dev/null fi CHUNK_DUR=$(ffprobe -v error -show_entries format=duration -of csv=p=0 "$CHUNK_FILE" | cut -d. -f1) echo " chunk_$(printf '%03d' $i): ${CHUNK_DUR}s" done # Transcribe each chunk echo "Transcribing chunks..." for i in $(seq 0 $((N_CHUNKS - 1))); do CHUNK_FILE="$CHUNKS_DIR/chunk_$(printf '%03d' $i).mp3" CHUNK_JSON="$CHUNKS_DIR/chunk_$(printf '%03d' $i).json" if [ -f "$CHUNK_JSON" ]; then echo " chunk_$(printf '%03d' $i): already transcribed" continue fi echo -n " chunk_$(printf '%03d' $i): transcribing... " STARTED=$(date +%s) full_url="${WHISPER_URL}/audio/transcriptions" CURL_ARGS=( -s -w "%{http_code}" -o "$CHUNK_JSON" -X POST "$full_url" -H "Authorization: Bearer $WHISPER_API_KEY" -F "file=@${CHUNK_FILE}" -F "model=${MODEL}" -F "language=${LANGUAGE}" -F "response_format=verbose_json" -F "temperature=0.0" --max-time 600 ) [ -n "$HOTWORDS" ] && CURL_ARGS+=(-F "hotwords=${HOTWORDS}") HTTP_CODE=$(curl "${CURL_ARGS[@]}") ELAPSED=$(( $(date +%s) - STARTED )) if [ "$HTTP_CODE" != "200" ]; then echo "ERROR (HTTP $HTTP_CODE)" if [ -f "$CHUNK_JSON" ]; then cat "$CHUNK_JSON" fi rm -f "$CHUNK_JSON" exit 1 fi echo "done in ${ELAPSED}s" done # Merge chunks into final JSON echo "Merging chunks..." python3 - "$CHUNKS_DIR" "$OUTPUT_DIR" "$NAME" "$SPLIT_POINTS" <<'PYEOF' import json, sys, os, glob chunks_dir = sys.argv[1] output_dir = sys.argv[2] name = sys.argv[3] split_points_str = sys.argv[4] if len(sys.argv) > 4 else "" if split_points_str.strip(): split_points = [float(x) for x in split_points_str.strip().split()] else: split_points = [] offsets = [0.0] + split_points chunk_files = sorted(glob.glob(os.path.join(chunks_dir, "chunk_*.json"))) all_segments = [] total_duration = 0 for idx, cf in enumerate(chunk_files): with open(cf) as f: data = json.load(f) offset = offsets[idx] if idx < len(offsets) else offsets[-1] for seg in data.get("segments", []): all_segments.append({ "start": round(seg.get("start", 0) + offset, 2), "end": round(seg.get("end", 0) + offset, 2), "text": seg.get("text", "").strip(), }) chunk_dur = data.get("duration", 0) total_duration = max(total_duration, offset + chunk_dur) all_segments.sort(key=lambda s: s["start"]) merged = {"segments": all_segments, "duration": total_duration} json_path = os.path.join(output_dir, f"{name}.json") with open(json_path, "w") as f: json.dump(merged, f, ensure_ascii=False, indent=2) txt_path = os.path.join(output_dir, f"{name}.txt") with open(txt_path, "w") as f: for seg in all_segments: start = seg["start"] h, m, s = int(start // 3600), int((start % 3600) // 60), int(start % 60) f.write(f"[{h:02d}:{m:02d}:{s:02d}] {seg['text']}\n") plain_path = os.path.join(output_dir, f"{name}_plain.txt") with open(plain_path, "w") as f: f.write(" ".join(seg["text"] for seg in all_segments)) print(f" {len(all_segments)} segments total") print(f" Written: {json_path}, {txt_path}, {plain_path}") PYEOF echo "=== Done: $NAME ===" echo "=== Done: $NAME ==="