Files
2026-03-21_why-manifest/video2srt.py

189 lines
7.2 KiB
Python

import os
import time
import math
import shutil
import subprocess
import sys
from pathlib import Path
from groq import Groq
from logger import get_system_logger, get_ai_logger, log_exception
# ==========================================
# 接口配置 (Interface Configuration)
# ==========================================
GROQ_API_KEY = "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"
LANGUAGE = "zh"
KEEP_TEMP_AUDIO = False
MAX_FILE_SIZE_MB = 23
BITRATE_KBPS = 64
MODEL_NAME = "whisper-large-v3-turbo"
# 初始化日志
logger = get_system_logger('video2srt')
# ==========================================
client = Groq(api_key=GROQ_API_KEY)
def format_srt_time(seconds):
"""秒转 SRT 时间戳格式 (00:00:00,000)"""
td_hours = int(seconds // 3600)
td_mins = int((seconds % 3600) // 60)
td_secs = int(seconds % 60)
td_millis = int((seconds - int(seconds)) * 1000)
return f"{td_hours:02}:{td_mins:02}:{td_secs:02},{td_millis:03}"
def transcribe_with_retry(audio_file, ai_log):
"""处理 Groq API 速率限制的重试逻辑"""
ai_log.info(f"开始转录音频: {audio_file.name}")
ai_log.info(f"模型: {MODEL_NAME}, 语言: {LANGUAGE}")
retry_count = 0
while True:
try:
start_time = time.time()
with open(audio_file, "rb") as file:
response = client.audio.transcriptions.create(
file=(audio_file.name, file.read()),
model=MODEL_NAME,
response_format="verbose_json",
language=LANGUAGE,
temperature=0.0
)
elapsed = time.time() - start_time
ai_log.info(f"转录成功,耗时: {elapsed:.2f}")
ai_log.info(f"识别到 {len(response.segments)} 个语音片段")
return response.segments
except Exception as e:
retry_count += 1
err_str = str(e)
ai_log.error(f"转录失败 (尝试 {retry_count}): {err_str}")
if "429" in err_str or "rate_limit" in err_str.lower():
wait_time = 25
ai_log.warning(f"触发 API 速率限制,等待 {wait_time} 秒后重试...")
logger.warning(f"Groq API 速率限制,等待 {wait_time}")
time.sleep(wait_time)
else:
log_exception(ai_log, e, "Groq API 调用失败")
raise e
def process_single_video(raw_video_path, video_work_dir):
"""
1. 移动视频到工作区
2. 提取音频并分片
3. 转录生成 SRT
"""
raw_video_path = Path(raw_video_path)
video_work_dir = Path(video_work_dir)
video_work_dir.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info(f"开始处理视频: {raw_video_path.name}")
logger.info("="*50)
# 创建AI日志
ai_log, ai_log_file = get_ai_logger('groq', 'transcribe')
ai_log.info("="*50)
ai_log.info(f"Groq 转录任务开始")
ai_log.info(f"视频文件: {raw_video_path.name}")
ai_log.info(f"工作目录: {video_work_dir}")
ai_log.info("="*50)
# --- 新增步骤:将视频搬家到工作区 ---
target_video_path = video_work_dir / raw_video_path.name
if not target_video_path.exists():
logger.info(f"移动视频至工作区: {raw_video_path.name}")
ai_log.info(f"移动视频: {raw_video_path} -> {target_video_path}")
shutil.move(str(raw_video_path), str(target_video_path))
else:
logger.info(f"视频已在工作区中: {target_video_path.name}")
ai_log.info(f"视频已存在于工作区: {target_video_path}")
video_stem = target_video_path.stem
audio_temp_dir = video_work_dir / "temp_audio"
audio_temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 计算切分时长
seg_duration = math.floor((MAX_FILE_SIZE_MB * 8 * 1024) / BITRATE_KBPS)
logger.info(f"音频分片时长: {seg_duration}")
ai_log.info(f"音频分片参数: {seg_duration}秒/片, 比特率: {BITRATE_KBPS}kbps")
# 2. FFmpeg 提取并分片 (使用工作区内的视频路径)
logger.info("开始提取音频...")
ai_log.info("开始 FFmpeg 音频提取")
output_pattern = str(audio_temp_dir / "part_%03d.mp3")
cmd = [
'ffmpeg', '-y', '-i', str(target_video_path),
'-vn', '-acodec', 'libmp3lame', '-b:a', f'{BITRATE_KBPS}k',
'-ac', '1', '-ar', '22050',
'-f', 'segment', '-segment_time', str(seg_duration),
'-reset_timestamps', '1', output_pattern
]
try:
subprocess.run(cmd, check=True, capture_output=True)
logger.info("音频提取完成")
ai_log.info("FFmpeg 音频提取成功")
except Exception as e:
log_exception(logger, e, "FFmpeg 音频提取失败")
log_exception(ai_log, e, "FFmpeg 执行失败")
raise
segments = sorted(list(audio_temp_dir.glob("part_*.mp3")))
logger.info(f"音频分片数量: {len(segments)}")
ai_log.info(f"生成音频分片: {len(segments)}")
# 3. 转录并实时写入 SRT
logger.info(f"开始分片转录...")
ai_log.info("开始批量转录")
srt_path = video_work_dir / f"{video_stem}.srt"
global_idx = 1
with open(srt_path, "w", encoding="utf-8") as srt_file:
for i, seg in enumerate(segments):
offset = i * seg_duration
logger.info(f"转录进度: {i+1}/{len(segments)}")
ai_log.info(f"转录片段 {i+1}/{len(segments)}: {seg.name}")
seg_data = transcribe_with_retry(seg, ai_log)
for chunk in seg_data:
start = format_srt_time(chunk['start'] + offset)
end = format_srt_time(chunk['end'] + offset)
text = chunk['text'].strip()
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
global_idx += 1
logger.info(f"字幕文件已生成: {srt_path.name}")
ai_log.info(f"字幕生成完成: 共 {global_idx-1} 条字幕")
# 4. 清理临时音频
if not KEEP_TEMP_AUDIO:
logger.info("清理临时音频文件...")
ai_log.info("清理临时音频目录")
shutil.rmtree(audio_temp_dir, ignore_errors=True)
# --- 新增:生成转录完成标记 ---
(video_work_dir / "transcribe_done.flag").touch()
logger.info("生成转录完成标记")
ai_log.info("生成 transcribe_done.flag")
logger.info(f"处理完成。工作区目录: {video_work_dir}")
ai_log.info("="*50)
ai_log.info("Groq 转录任务完成")
ai_log.info("="*50)
logger.info(f"AI日志已保存: {ai_log_file}")
if __name__ == "__main__":
if len(sys.argv) >= 3:
# sys.argv[1]: 原始视频路径 (通常在 stage 下)
# sys.argv[2]: 目标工作区路径 (通常在 session 下)
logger.info(f"接收到转录任务: {sys.argv[1]}")
try:
process_single_video(sys.argv[1], sys.argv[2])
except Exception as e:
log_exception(logger, e, "视频转录失败")
raise
else:
logger.error("缺少参数。用法: python video2srt.py <视频路径> <工作区路径>")
print("错误: 缺少参数。用法: python video2srt.py <视频路径> <工作区路径>")