Add scripts
This commit is contained in:
parent
992a748c51
commit
ba56147e95
4 changed files with 329 additions and 0 deletions
64
scripts/transcribe_chunked.sh
Normal file
64
scripts/transcribe_chunked.sh
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#!/bin/bash
|
||||
# Транскрипция с разбивкой на чанки для длинных аудио
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MEETING_DIR="${1:-.}"
|
||||
CHUNKS_DIR="$MEETING_DIR/transcription"
|
||||
|
||||
mkdir -p "$CHUNKS_DIR"
|
||||
|
||||
# Get audio file
|
||||
WAV_FILE=$(ls "$MEETING_DIR"/*.wav 2>/dev/null || ls "$MEETING_DIR"/*.WAV 2>/dev/null)
|
||||
if [ -z "$WAV_FILE" ] || [ ! -f "$WAV_FILE" ]; then
|
||||
echo "Error: No WAV file found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Duration
|
||||
DURATION=$(ffprobe -i "$WAV_FILE" -show_entries format=duration -v quiet -of csv="p=0")
|
||||
|
||||
echo "Audio duration: $DURATION seconds"
|
||||
|
||||
# Chunk settings
|
||||
chunk_duration=600
|
||||
offset=0
|
||||
chunk_num=0
|
||||
|
||||
echo "Extracting chunks..."
|
||||
|
||||
while (( $(echo "$offset < $DURATION" | bc -l) )); do
|
||||
chunk_file="$CHUNKS_DIR/chunk_${chunk_num}.wav"
|
||||
echo "Extracting chunk $chunk_num at offset $offset..."
|
||||
|
||||
# Retry logic
|
||||
for attempt in 1 2 3; do
|
||||
if ffmpeg -i "$WAV_FILE" -ss "$offset" -t "$chunk_duration" -acodec pcm_s16le -ar 16000 "$chunk_file" -y 2>/dev/null; then
|
||||
break
|
||||
elif [ $attempt -eq 3 ]; then
|
||||
echo "Error: Failed to extract chunk $chunk_num"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
offset=$((offset + chunk_duration))
|
||||
((chunk_num++))
|
||||
done
|
||||
|
||||
echo "Transcribing $chunk_num chunks..."
|
||||
|
||||
# Transcribe each chunk
|
||||
for i in $(seq 0 $((chunk_num - 1))); do
|
||||
chunk_file="$CHUNKS_DIR/chunk_${i}.wav"
|
||||
output_file="$CHUNKS_DIR/chunk_${i}.txt"
|
||||
|
||||
echo "Transcribing chunk $i..."
|
||||
MKL_SERVICE_FORCE_INTEL=1 OMP_NUM_THREADS=2 python3 "${BASH_SOURCE[0]%/*}/local_whisper.py" "$chunk_file" > "$output_file"
|
||||
done
|
||||
|
||||
# Merge
|
||||
echo "Merging transcriptions..."
|
||||
cat "$CHUNKS_DIR"/chunk_*.txt > "$CHUNKS_DIR/merged_raw.txt"
|
||||
|
||||
echo "Done. Output: $CHUNKS_DIR/merged_raw.txt"
|
||||
Loading…
Add table
Add a link
Reference in a new issue