auto-report-skill/scripts/transcribe.sh

200 lines
No EOL
5.4 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Transcribe audio recordings using local whisper server (with API key)
# Usage: ./transcribe.sh <meeting_dir> [<file.WAV> <output_name>]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
: ${WHISPER_URL:?ERROR: WHISPER_URL not set (e.g., https://llm.lambda.coredump.ru/v1)}
: ${WHISPER_API_KEY:?ERROR: WHISPER_API_KEY not set}
MODEL="whisper-1" # Изменено с конкретной модели на общее название
LANGUAGE="ru"
# Hotwords
HOTWORDS_FILE="${HOTWORKS_PATH:-$SCRIPT_DIR/hotwords.txt}"
HOTWORDS=""
if [ -f "$HOTWORDS_FILE" ]; then
HOTWORDS=$(grep -v '^#' "$HOTWORDS_FILE" | grep -v '^$' | tr '\n' ',' | sed 's/,,*/,/g; s/^,//; s/,$//')
echo "Loaded hotwords from $HOTWORDS_FILE"
fi
# ---------- argument parsing ----------
if [ $# -lt 1 ]; then
echo "Usage: $0 <meeting_dir> [<file.WAV> <output_name>]"
exit 1
fi
MEETING_DIR="$1"
WORK_DIR="$(cd "$SCRIPT_DIR/$MEETING_DIR" && pwd)"
OUTPUT_DIR="$WORK_DIR/transcription"
mkdir -p "$OUTPUT_DIR"
convert_to_mp3() {
local output_mp3="$1"
shift
local inputs=("$@")
if [ -f "$output_mp3" ]; then
echo " $output_mp3 already exists, skipping conversion"
return
fi
if [ ${#inputs[@]} -eq 1 ]; then
echo " Converting ${inputs[0]} -> $output_mp3"
ffmpeg -y -i "${inputs[0]}" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
else
local listfile
listfile=$(mktemp /tmp/ffmpeg_concat_XXXXXX.txt)
for f in "${inputs[@]}"; do
echo "file '$f'" >> "$listfile"
done
echo " Concatenating ${#inputs[@]} files -> $output_mp3"
ffmpeg -y -f concat -safe 0 -i "$listfile" -ac 1 -ar 16000 -b:a 64k "$output_mp3" 2>/dev/null
rm -f "$listfile"
fi
}
transcribe_file() {
local mp3_file="$1"
local name="$2"
local json_file="$OUTPUT_DIR/${name}.json"
if [ -f "$json_file" ]; then
echo " $name already transcribed, skipping"
return
fi
echo " Transcribing $name..."
local started
started=$(date +%s)
# Form the full URL for transcription
local full_url="${WHISPER_URL}/audio/transcriptions"
local curl_args=(
-s -w "%{http_code}" -o "$json_file"
-X POST "$full_url"
-H "Authorization: Bearer $WHISPER_API_KEY"
-F "file=@${mp3_file}"
-F "model=${MODEL}"
-F "language=${LANGUAGE}"
-F "response_format=verbose_json"
-F "temperature=0.0"
--max-time 3600
)
if [ -n "$HOTWORDS" ]; then
curl_args+=(-F "hotwords=${HOTWORDS}")
fi
local http_code
http_code=$(curl "${curl_args[@]}")
local elapsed=$(( $(date +%s) - started ))
if [ "$http_code" != "200" ]; then
echo " ERROR: HTTP $http_code"
# Display error response body for debugging
if [ -f "$json_file" ]; then
cat "$json_file"
fi
rm -f "$json_file"
return 1
fi
echo " Done in ${elapsed}s"
# Extract plain text and timestamped text
python3 - "$json_file" "$OUTPUT_DIR" "$name" <<'PYEOF'
import json, sys, os
json_path = sys.argv[1]
output_dir = sys.argv[2]
name = sys.argv[3]
with open(json_path) as f:
data = json.load(f)
segs = data.get("segments", [])
# Timestamped text
lines = []
for seg in segs:
start = seg.get("start", 0)
h, m, s = int(start // 3600), int((start % 3600) // 60), int(start % 60)
lines.append(f"[{h:02d}:{m:02d}:{s:02d}] {seg['text'].strip()}")
txt_path = os.path.join(output_dir, f"{name}.txt")
with open(txt_path, "w") as f:
f.write("\n".join(lines))
# Plain text
plain = " ".join(seg["text"].strip() for seg in segs)
plain_path = os.path.join(output_dir, f"{name}_plain.txt")
with open(plain_path, "w") as f:
f.write(plain)
print(f" {len(segs)} segments, {len(plain)} chars")
PYEOF
}
# ---------- manual mode (specific file) ----------
if [ $# -ge 3 ]; then
WAV_FILE="$WORK_DIR/$2"
NAME="$3"
MP3_FILE="$OUTPUT_DIR/${NAME}.mp3"
echo "=== Transcribing $2 as '$NAME' ==="
convert_to_mp3 "$MP3_FILE" "$WAV_FILE"
transcribe_file "$MP3_FILE" "$NAME"
echo "=== Done ==="
exit 0
fi
# ---------- auto mode ----------
echo "=== Auto-detecting audio sources in $WORK_DIR ==="
H2N_XY=$(find "$WORK_DIR" -maxdepth 1 -name "SR*XY.WAV" | head -1)
H2N_MS=$(find "$WORK_DIR" -maxdepth 1 -name "SR*MS.WAV" | head -1)
mapfile -t SARAMONIC_FILES < <(find "$WORK_DIR" -maxdepth 1 -name "*.WAV" ! -name "SR*" | sort)
SOURCES=()
if [ -n "$H2N_XY" ]; then
echo " Found H2n XY: $(basename "$H2N_XY")"
SOURCES+=("h2n_xy:$H2N_XY")
fi
if [ -n "$H2N_MS" ]; then
echo " Found H2n MS: $(basename "$H2N_MS")"
SOURCES+=("h2n_ms:$H2N_MS")
fi
if [ ${#SARAMONIC_FILES[@]} -gt 0 ]; then
echo " Found Saramonic files: ${SARAMONIC_FILES[*]##*/}"
joined=$(printf "|%s" "${SARAMONIC_FILES[@]}")
joined="${joined:1}"
SOURCES+=("saramonic:$joined")
fi
if [ ${#SOURCES[@]} -eq 0 ]; then
echo "Error: No WAV files found in $WORK_DIR"
exit 1
fi
echo ""
echo "=== Step 1: Converting to mp3 ==="
for entry in "${SOURCES[@]}"; do
name="${entry%%:*}"
paths="${entry#*:}"
mp3="$OUTPUT_DIR/${name}.mp3"
IFS='|' read -ra files <<< "$paths"
convert_to_mp3 "$mp3" "${files[@]}"
done
echo ""
echo "=== Step 2: Transcribing ==="
for entry in "${SOURCES[@]}"; do
name="${entry%%:*}"
mp3="$OUTPUT_DIR/${name}.mp3"
transcribe_file "$mp3" "$name"
done
echo ""
echo "=== Done! ==="
echo "Results in: $OUTPUT_DIR/"