import os import time import math import shutil import subprocess import sys from pathlib import Path from groq import Groq from logger import get_system_logger, get_ai_logger, log_exception # ========================================== # 接口配置 (Interface Configuration) # ========================================== GROQ_API_KEY = "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" LANGUAGE = "zh" KEEP_TEMP_AUDIO = False MAX_FILE_SIZE_MB = 23 BITRATE_KBPS = 64 MODEL_NAME = "whisper-large-v3-turbo" # 初始化日志 logger = get_system_logger('video2srt') # ========================================== client = Groq(api_key=GROQ_API_KEY) def format_srt_time(seconds): """秒转 SRT 时间戳格式 (00:00:00,000)""" td_hours = int(seconds // 3600) td_mins = int((seconds % 3600) // 60) td_secs = int(seconds % 60) td_millis = int((seconds - int(seconds)) * 1000) return f"{td_hours:02}:{td_mins:02}:{td_secs:02},{td_millis:03}" def transcribe_with_retry(audio_file, ai_log): """处理 Groq API 速率限制的重试逻辑""" ai_log.info(f"开始转录音频: {audio_file.name}") ai_log.info(f"模型: {MODEL_NAME}, 语言: {LANGUAGE}") retry_count = 0 while True: try: start_time = time.time() with open(audio_file, "rb") as file: response = client.audio.transcriptions.create( file=(audio_file.name, file.read()), model=MODEL_NAME, response_format="verbose_json", language=LANGUAGE, temperature=0.0 ) elapsed = time.time() - start_time ai_log.info(f"转录成功,耗时: {elapsed:.2f}秒") ai_log.info(f"识别到 {len(response.segments)} 个语音片段") return response.segments except Exception as e: retry_count += 1 err_str = str(e) ai_log.error(f"转录失败 (尝试 {retry_count}): {err_str}") if "429" in err_str or "rate_limit" in err_str.lower(): wait_time = 25 ai_log.warning(f"触发 API 速率限制,等待 {wait_time} 秒后重试...") logger.warning(f"Groq API 速率限制,等待 {wait_time} 秒") time.sleep(wait_time) else: log_exception(ai_log, e, "Groq API 调用失败") raise e def process_single_video(raw_video_path, video_work_dir): """ 1. 移动视频到工作区 2. 提取音频并分片 3. 转录生成 SRT """ raw_video_path = Path(raw_video_path) video_work_dir = Path(video_work_dir) video_work_dir.mkdir(parents=True, exist_ok=True) logger.info("="*50) logger.info(f"开始处理视频: {raw_video_path.name}") logger.info("="*50) # 创建AI日志 ai_log, ai_log_file = get_ai_logger('groq', 'transcribe') ai_log.info("="*50) ai_log.info(f"Groq 转录任务开始") ai_log.info(f"视频文件: {raw_video_path.name}") ai_log.info(f"工作目录: {video_work_dir}") ai_log.info("="*50) # --- 新增步骤:将视频搬家到工作区 --- target_video_path = video_work_dir / raw_video_path.name if not target_video_path.exists(): logger.info(f"移动视频至工作区: {raw_video_path.name}") ai_log.info(f"移动视频: {raw_video_path} -> {target_video_path}") shutil.move(str(raw_video_path), str(target_video_path)) else: logger.info(f"视频已在工作区中: {target_video_path.name}") ai_log.info(f"视频已存在于工作区: {target_video_path}") video_stem = target_video_path.stem audio_temp_dir = video_work_dir / "temp_audio" audio_temp_dir.mkdir(parents=True, exist_ok=True) # 1. 计算切分时长 seg_duration = math.floor((MAX_FILE_SIZE_MB * 8 * 1024) / BITRATE_KBPS) logger.info(f"音频分片时长: {seg_duration} 秒") ai_log.info(f"音频分片参数: {seg_duration}秒/片, 比特率: {BITRATE_KBPS}kbps") # 2. FFmpeg 提取并分片 (使用工作区内的视频路径) logger.info("开始提取音频...") ai_log.info("开始 FFmpeg 音频提取") output_pattern = str(audio_temp_dir / "part_%03d.mp3") cmd = [ 'ffmpeg', '-y', '-i', str(target_video_path), '-vn', '-acodec', 'libmp3lame', '-b:a', f'{BITRATE_KBPS}k', '-ac', '1', '-ar', '22050', '-f', 'segment', '-segment_time', str(seg_duration), '-reset_timestamps', '1', output_pattern ] try: subprocess.run(cmd, check=True, capture_output=True) logger.info("音频提取完成") ai_log.info("FFmpeg 音频提取成功") except Exception as e: log_exception(logger, e, "FFmpeg 音频提取失败") log_exception(ai_log, e, "FFmpeg 执行失败") raise segments = sorted(list(audio_temp_dir.glob("part_*.mp3"))) logger.info(f"音频分片数量: {len(segments)}") ai_log.info(f"生成音频分片: {len(segments)} 个") # 3. 转录并实时写入 SRT logger.info(f"开始分片转录...") ai_log.info("开始批量转录") srt_path = video_work_dir / f"{video_stem}.srt" global_idx = 1 with open(srt_path, "w", encoding="utf-8") as srt_file: for i, seg in enumerate(segments): offset = i * seg_duration logger.info(f"转录进度: {i+1}/{len(segments)}") ai_log.info(f"转录片段 {i+1}/{len(segments)}: {seg.name}") seg_data = transcribe_with_retry(seg, ai_log) for chunk in seg_data: start = format_srt_time(chunk['start'] + offset) end = format_srt_time(chunk['end'] + offset) text = chunk['text'].strip() srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n") global_idx += 1 logger.info(f"字幕文件已生成: {srt_path.name}") ai_log.info(f"字幕生成完成: 共 {global_idx-1} 条字幕") # 4. 清理临时音频 if not KEEP_TEMP_AUDIO: logger.info("清理临时音频文件...") ai_log.info("清理临时音频目录") shutil.rmtree(audio_temp_dir, ignore_errors=True) # --- 新增:生成转录完成标记 --- (video_work_dir / "transcribe_done.flag").touch() logger.info("生成转录完成标记") ai_log.info("生成 transcribe_done.flag") logger.info(f"处理完成。工作区目录: {video_work_dir}") ai_log.info("="*50) ai_log.info("Groq 转录任务完成") ai_log.info("="*50) logger.info(f"AI日志已保存: {ai_log_file}") if __name__ == "__main__": if len(sys.argv) >= 3: # sys.argv[1]: 原始视频路径 (通常在 stage 下) # sys.argv[2]: 目标工作区路径 (通常在 session 下) logger.info(f"接收到转录任务: {sys.argv[1]}") try: process_single_video(sys.argv[1], sys.argv[2]) except Exception as e: log_exception(logger, e, "视频转录失败") raise else: logger.error("缺少参数。用法: python video2srt.py <视频路径> <工作区路径>") print("错误: 缺少参数。用法: python video2srt.py <视频路径> <工作区路径>")