200 lines
No EOL
5.4 KiB
Bash
200 lines
No EOL
5.4 KiB
Bash
#!/bin/bash
|
||
# Transcribe audio recordings using local whisper server (with API key)
|
||
# Usage: ./transcribe.sh <meeting_dir> [<file.WAV> <output_name>]
|
||
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
: ${WHISPER_URL:?ERROR: WHISPER_URL not set (e.g., https://llm.lambda.coredump.ru/v1)}
|
||
: ${WHISPER_API_KEY:?ERROR: WHISPER_API_KEY not set}
|
||
MODEL="whisper-1" # Изменено с конкретной модели на общее название
|
||
LANGUAGE="ru"
|
||
|
||
# Hotwords
|
||
HOTWORDS_FILE="${HOTWORKS_PATH:-$SCRIPT_DIR/hotwords.txt}"
|
||
HOTWORDS=""
|
||
if [ -f "$HOTWORDS_FILE" ]; then
|
||
HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//')
|
||
echo "Loaded hotwords from $HOTWORDS_FILE"
|
||
fi
|
||
|
||
# ---------- argument parsing ----------
|
||
if [ $# -lt 1 ]; then
|
||
echo "Usage: $0 <meeting_dir> [<file.WAV> <output_name>]"
|
||
exit 1
|
||
fi
|
||
|
||
MEETING_DIR="$1"
|
||
WORK_DIR="$(cd "$SCRIPT_DIR/$MEETING_DIR" && pwd)"
|
||
OUTPUT_DIR="$WORK_DIR/transcription"
|
||
mkdir -p "$OUTPUT_DIR"
|
||
|
||
convert_to_mp3() {
|
||
local output_mp3="$1"
|
||
shift
|
||
local inputs=("$@")
|
||
if [ -f "$output_mp3" ]; then
|
||
echo " $output_mp3 already exists, skipping conversion"
|
||
return
|
||
fi
|
||
if [ ${#inputs[@]} -eq 1 ]; then
|
||
echo " Converting ${inputs[0]} -> $output_mp3"
|
||
ffmpeg -y -i "${inputs[0]}" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
|
||
else
|
||
local listfile
|
||
listfile=$(mktemp /tmp/ffmpeg_concat_XXXXXX.txt)
|
||
for f in "${inputs[@]}"; do
|
||
echo "file '$f'" >> "$listfile"
|
||
done
|
||
echo " Concatenating ${#inputs[@]} files -> $output_mp3"
|
||
ffmpeg -y -f concat -safe 0 -i "$listfile" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
|
||
rm -f "$listfile"
|
||
fi
|
||
}
|
||
|
||
transcribe_file() {
|
||
local mp3_file="$1"
|
||
local name="$2"
|
||
local json_file="$OUTPUT_DIR/${name}.json"
|
||
|
||
if [ -f "$json_file" ]; then
|
||
echo " $name already transcribed, skipping"
|
||
return
|
||
fi
|
||
|
||
echo " Transcribing $name..."
|
||
local started
|
||
started=$(date +%s)
|
||
|
||
# Form the full URL for transcription
|
||
local full_url="${WHISPER_URL}/audio/transcriptions"
|
||
|
||
local curl_args=(
|
||
-s -w "%{http_code}" -o "$json_file"
|
||
-X POST "$full_url"
|
||
-H "Authorization: Bearer $WHISPER_API_KEY"
|
||
-F "file=@${mp3_file}"
|
||
-F "model=${MODEL}"
|
||
-F "language=${LANGUAGE}"
|
||
-F "response_format=verbose_json"
|
||
-F "temperature=0.0"
|
||
--max-time 3600
|
||
)
|
||
if [ -n "$HOTWORDS" ]; then
|
||
curl_args+=(-F "hotwords=${HOTWORDS}")
|
||
fi
|
||
|
||
local http_code
|
||
http_code=$(curl "${curl_args[@]}")
|
||
local elapsed=$(( $(date +%s) - started ))
|
||
|
||
if [ "$http_code" != "200" ]; then
|
||
echo " ERROR: HTTP $http_code"
|
||
# Display error response body for debugging
|
||
if [ -f "$json_file" ]; then
|
||
cat "$json_file"
|
||
fi
|
||
rm -f "$json_file"
|
||
return 1
|
||
fi
|
||
|
||
echo " Done in ${elapsed}s"
|
||
|
||
# Extract plain text and timestamped text
|
||
python3 - "$json_file" "$OUTPUT_DIR" "$name" <<'PYEOF'
|
||
import json, sys, os
|
||
|
||
json_path = sys.argv[1]
|
||
output_dir = sys.argv[2]
|
||
name = sys.argv[3]
|
||
|
||
with open(json_path) as f:
|
||
data = json.load(f)
|
||
|
||
segs = data.get("segments", [])
|
||
|
||
# Timestamped text
|
||
lines = []
|
||
for seg in segs:
|
||
start = seg.get("start", 0)
|
||
h, m, s = int(start // 3600), int((start % 3600) // 60), int(start % 60)
|
||
lines.append(f"[{h:02d}:{m:02d}:{s:02d}] {seg['text'].strip()}")
|
||
|
||
txt_path = os.path.join(output_dir, f"{name}.txt")
|
||
with open(txt_path, "w") as f:
|
||
f.write("\n".join(lines))
|
||
|
||
# Plain text
|
||
plain = " ".join(seg["text"].strip() for seg in segs)
|
||
plain_path = os.path.join(output_dir, f"{name}_plain.txt")
|
||
with open(plain_path, "w") as f:
|
||
f.write(plain)
|
||
|
||
print(f" {len(segs)} segments, {len(plain)} chars")
|
||
PYEOF
|
||
}
|
||
|
||
# ---------- manual mode (specific file) ----------
|
||
if [ $# -ge 3 ]; then
|
||
WAV_FILE="$WORK_DIR/$2"
|
||
NAME="$3"
|
||
MP3_FILE="$OUTPUT_DIR/${NAME}.mp3"
|
||
|
||
echo "=== Transcribing $2 as '$NAME' ==="
|
||
convert_to_mp3 "$MP3_FILE" "$WAV_FILE"
|
||
transcribe_file "$MP3_FILE" "$NAME"
|
||
echo "=== Done ==="
|
||
exit 0
|
||
fi
|
||
|
||
# ---------- auto mode ----------
|
||
echo "=== Auto-detecting audio sources in $WORK_DIR ==="
|
||
|
||
H2N_XY=$(find "$WORK_DIR" -maxdepth 1 -name "SR*XY.WAV" | head -1)
|
||
H2N_MS=$(find "$WORK_DIR" -maxdepth 1 -name "SR*MS.WAV" | head -1)
|
||
mapfile -t SARAMONIC_FILES < <(find "$WORK_DIR" -maxdepth 1 -name "*.WAV" ! -name "SR*" | sort)
|
||
|
||
SOURCES=()
|
||
|
||
if [ -n "$H2N_XY" ]; then
|
||
echo " Found H2n XY: $(basename "$H2N_XY")"
|
||
SOURCES+=("h2n_xy:$H2N_XY")
|
||
fi
|
||
if [ -n "$H2N_MS" ]; then
|
||
echo " Found H2n MS: $(basename "$H2N_MS")"
|
||
SOURCES+=("h2n_ms:$H2N_MS")
|
||
fi
|
||
if [ ${#SARAMONIC_FILES[@]} -gt 0 ]; then
|
||
echo " Found Saramonic files: ${SARAMONIC_FILES[*]##*/}"
|
||
joined=$(printf "|%s" "${SARAMONIC_FILES[@]}")
|
||
joined="${joined:1}"
|
||
SOURCES+=("saramonic:$joined")
|
||
fi
|
||
|
||
if [ ${#SOURCES[@]} -eq 0 ]; then
|
||
echo "Error: No WAV files found in $WORK_DIR"
|
||
exit 1
|
||
fi
|
||
|
||
echo ""
|
||
echo "=== Step 1: Converting to mp3 ==="
|
||
for entry in "${SOURCES[@]}"; do
|
||
name="${entry%%:*}"
|
||
paths="${entry#*:}"
|
||
mp3="$OUTPUT_DIR/${name}.mp3"
|
||
|
||
IFS='|' read -ra files <<< "$paths"
|
||
convert_to_mp3 "$mp3" "${files[@]}"
|
||
done
|
||
|
||
echo ""
|
||
echo "=== Step 2: Transcribing ==="
|
||
for entry in "${SOURCES[@]}"; do
|
||
name="${entry%%:*}"
|
||
mp3="$OUTPUT_DIR/${name}.mp3"
|
||
transcribe_file "$mp3" "$name"
|
||
done
|
||
|
||
echo ""
|
||
echo "=== Done! ==="
|
||
echo "Results in: $OUTPUT_DIR/" |