#!/bin/bash # Транскрипция с разбивкой на чанки для длинных аудио set -euo pipefail MEETING_DIR="${1:-.}" CHUNKS_DIR="$MEETING_DIR/transcription" mkdir -p "$CHUNKS_DIR" # Get audio file WAV_FILE=$(ls "$MEETING_DIR"/*.wav 2>/dev/null || ls "$MEETING_DIR"/*.WAV 2>/dev/null) if [ -z "$WAV_FILE" ] || [ ! -f "$WAV_FILE" ]; then echo "Error: No WAV file found" exit 1 fi # Duration DURATION=$(ffprobe -i "$WAV_FILE" -show_entries format=duration -v quiet -of csv="p=0") echo "Audio duration: $DURATION seconds" # Chunk settings chunk_duration=600 offset=0 chunk_num=0 echo "Extracting chunks..." while (( $(echo "$offset < $DURATION" | bc -l) )); do chunk_file="$CHUNKS_DIR/chunk_${chunk_num}.wav" echo "Extracting chunk $chunk_num at offset $offset..." # Retry logic for attempt in 1 2 3; do if ffmpeg -i "$WAV_FILE" -ss "$offset" -t "$chunk_duration" -acodec pcm_s16le -ar 16000 "$chunk_file" -y 2>/dev/null; then break elif [ $attempt -eq 3 ]; then echo "Error: Failed to extract chunk $chunk_num" exit 1 fi sleep 1 done offset=$((offset + chunk_duration)) ((chunk_num++)) done echo "Transcribing $chunk_num chunks..." # Transcribe each chunk for i in $(seq 0 $((chunk_num - 1))); do chunk_file="$CHUNKS_DIR/chunk_${i}.wav" output_file="$CHUNKS_DIR/chunk_${i}.txt" echo "Transcribing chunk $i..." MKL_SERVICE_FORCE_INTEL=1 OMP_NUM_THREADS=2 python3 "${BASH_SOURCE[0]%/*}/local_whisper.py" "$chunk_file" > "$output_file" done # Merge echo "Merging transcriptions..." cat "$CHUNKS_DIR"/chunk_*.txt > "$CHUNKS_DIR/merged_raw.txt" echo "Done. Output: $CHUNKS_DIR/merged_raw.txt"