commit 3925cb508f761a501ef73be8c7083abe7b90ed38 Author: theshy Date: Sat Mar 21 01:36:28 2026 +0800 Initial commit: sanitize repository for remote push diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d36b3e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +*.json +*.log +*.log.* +*.part +*.flv +data/ +session/ +logs/ +stage/ +__pycache__/ +qrcode.png +.venv/ +test/add_2_collection.py diff --git a/add_to_collection.py b/add_to_collection.py new file mode 100755 index 0000000..597c04e --- /dev/null +++ b/add_to_collection.py @@ -0,0 +1,172 @@ +import json +import time +import requests +import re +import shutil +import subprocess +import random +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ================= 配置区域 ================= +SESSION_DIR = Path("./session") +COOKIE_FILE = Path("./cookies.json") +CHECK_INTERVAL = 5 + +# 合集 ID 配置 +SEASON_ID_A = 7196643 # 合集 A (同名视频) +SEASON_ID_B = 7196624 # 合集 B (Upload切片) + +# 自动寻找 biliup +BILIUP_PATH = shutil.which("biliup") or "biliup" +# 初始化日志 +logger = get_system_logger("add_to_collection.py") +# =========================================== + +class BiliCollectionClient: + def __init__(self): + self.load_cookies() + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://member.bilibili.com/platform/upload-manager/distribution" + }) + + def load_cookies(self): + if not COOKIE_FILE.exists(): + raise FileNotFoundError(f"Cookies 文件不存在: {COOKIE_FILE}") + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data + self.csrf = self.cookies.get("bili_jct") + + def get_video_info(self, bvid): + url = "https://api.bilibili.com/x/web-interface/view" + try: + self.session.cookies.update(self.cookies) + res = self.session.get(url, params={"bvid": bvid}, timeout=10).json() + if res["code"] == 0: + d = res["data"] + return {"aid": d["aid"], "cid": d["cid"], "title": d["title"], "charging_pay": 0} + except Exception as e: + logger.error(f"获取视频信息失败: {e}") + return None + + def resolve_section_id(self, sid): + url = "https://member.bilibili.com/x2/creative/web/seasons" + try: + self.session.cookies.update(self.cookies) + res = self.session.get(url, params={"pn": 1, "ps": 50}).json() + for s in res.get("data", {}).get("seasons", []): + if s.get("season", {}).get("id") == sid: + return s.get("sections", {}).get("sections", [])[0]["id"] + except: pass + return None + + def add_videos_batch(self, section_id, episodes): + if not episodes: return True + # 频率控制 + wait = random.uniform(5.0, 10.0) + logger.info(f"☕ 模拟人工操作,等待 {wait:.2f}s 后提交到合集...") + time.sleep(wait) + + url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add" + params = {"csrf": self.csrf} + try: + res = self.session.post(url, params=params, json={"sectionId": section_id, "episodes": episodes}).json() + return res["code"] == 0 + except Exception as e: + log_exception(logger, e, "批量添加合集异常") + return False + +class CollectionHandler(FileSystemEventHandler): + def __init__(self, client, sid_a, sid_b): + self.client = client + self.sid_a = sid_a + self.sid_b = sid_b + self.ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]") + + def on_created(self, event): + # 监听文件夹创建或 bvid.txt 创建 + if event.is_directory or event.src_path.endswith("bvid.txt"): + self.process_all() + + def process_all(self): + recent = self.fetch_biliup_list() + pending_a, pending_b = [], [] + + for folder in SESSION_DIR.iterdir(): + if not folder.is_dir(): continue + + # 任务 A: 同名视频 -> 合集 A + flag_a = folder / "collection_a_done.flag" + if self.sid_a and not flag_a.exists(): + bvid = self.match_bvid(folder.name, recent) + if bvid: + info = self.client.get_video_info(bvid) + if info: pending_a.append((folder, info)) + + # 任务 B: 切片视频 -> 合集 B + flag_b = folder / "collection_b_done.flag" + txt = folder / "bvid.txt" + if self.sid_b and not flag_b.exists() and txt.exists(): + try: + bvid = txt.read_text(encoding='utf-8').strip() + if bvid.startswith("BV"): + info = self.client.get_video_info(bvid) + if info: pending_b.append((folder, info)) + except: pass + + # 批量执行提交 + if pending_a: + if self.client.add_videos_batch(self.sid_a, [i[1] for i in pending_a]): + for f, _ in pending_a: (f / "collection_a_done.flag").touch() + logger.info(f"合集 A 更新完成: {len(pending_a)}个任务") + + if pending_b: + if self.client.add_videos_batch(self.sid_b, [i[1] for i in pending_b]): + for f, _ in pending_b: (f / "collection_b_done.flag").touch() + logger.info(f"合集 B 更新完成: {len(pending_b)}个任务") + + def fetch_biliup_list(self): + try: + res = subprocess.run([BILIUP_PATH, "list"], capture_output=True, text=True, encoding='utf-8') + clean_out = self.ansi_escape.sub("", res.stdout) + return [{"bvid": l.split()[0], "title": "".join(l.split()[1:])} for l in clean_out.splitlines() if l.startswith("BV")] + except: return [] + + def match_bvid(self, name, vlist): + n = lambda x: re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', x).lower() + target = n(name) + for v in vlist: + vn = n(v['title']) + if target in vn or vn in target: return v['bvid'] + return None + +def main(): + logger.info("="*50) + logger.info("合集监控模块启动") + logger.info("="*50) + + client = BiliCollectionClient() + sid_a = client.resolve_section_id(SEASON_ID_A) if SEASON_ID_A > 0 else None + sid_b = client.resolve_section_id(SEASON_ID_B) if SEASON_ID_B > 0 else None + + handler = CollectionHandler(client, sid_a, sid_b) + handler.process_all() # 初始扫描 + + observer = Observer() + observer.schedule(handler, str(SESSION_DIR), recursive=False) + observer.start() + + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + observer.stop() + observer.join() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/archive_scripts/add_to_collection-2026-01-28-20-40-29.py b/archive_scripts/add_to_collection-2026-01-28-20-40-29.py new file mode 100755 index 0000000..de0b174 --- /dev/null +++ b/archive_scripts/add_to_collection-2026-01-28-20-40-29.py @@ -0,0 +1,235 @@ +import json +import time +import requests +import re +import shutil +import subprocess +from pathlib import Path +from logger import get_system_logger, log_exception + +# ================= 配置区域 ================= +SESSION_DIR = Path("./session") +COOKIE_FILE = Path("./cookies.json") + +# 【这里填你 B 站网页上看到的合集 ID】 +# 脚本会自动根据这两个 ID 去查找对应的 Section ID (小节ID) +SEASON_ID_A = 7196643 # 合集 A (同名视频) +SEASON_ID_B = 7196624 # 合集 B (Upload切片) + +# 自动寻找 biliup +BILIUP_PATH = shutil.which("biliup") or "biliup" +# =========================================== + +logger = get_system_logger("collection_manager") + +class BiliCollectionClient: + def __init__(self): + if not COOKIE_FILE.exists(): + raise FileNotFoundError(f"Cookies 文件不存在: {COOKIE_FILE}") + + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + + # 兼容处理 cookie 格式 + if "cookie_info" in data: + self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} + else: + self.cookies = data + + self.csrf = self.cookies.get("bili_jct") + if not self.csrf: + raise ValueError("Cookie 中缺少 bili_jct (CSRF Token)") + + self.session = requests.Session() + self.session.cookies.update(self.cookies) + + # 使用你测试成功的 Headers + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Referer": "https://member.bilibili.com/platform/upload-manager/distribution" + }) + + def get_video_info(self, bvid): + """通过 BVID 获取 AID, CID 和 Title""" + url = "https://api.bilibili.com/x/web-interface/view" + try: + res = self.session.get(url, params={"bvid": bvid}, timeout=10).json() + if res["code"] != 0: + logger.error(f"查询视频信息失败 [{bvid}]: {res['message']}") + return None + + data = res["data"] + return { + "aid": data["aid"], + "cid": data["cid"], + "title": data["title"] + } + except Exception as e: + logger.error(f"获取视频信息异常: {e}") + return None + + def resolve_section_id(self, target_season_id): + """ + 【关键逻辑】通过 Season ID (合集ID) 查找 Section ID (小节ID) + """ + url = "https://member.bilibili.com/x2/creative/web/seasons" + params = {"pn": 1, "ps": 50} # 获取前50个合集 + + try: + res = self.session.get(url, params=params, timeout=10).json() + if res.get("code") != 0: + logger.error(f"获取合集列表失败: {res.get('message')}") + return None + + seasons = res.get("data", {}).get("seasons", []) + + for s in seasons: + current_sid = s.get("season", {}).get("id") + + # 找到目标合集 + if current_sid == target_season_id: + title = s.get("season", {}).get("title", "未知标题") + sections = s.get("sections", {}).get("sections", []) + + if sections: + # 默认取第一个小节 + first_section_id = sections[0]["id"] + logger.info(f"✅ ID解析成功: 合集[{title}]({target_season_id}) -> 小节ID: {first_section_id}") + return first_section_id + else: + logger.error(f"❌ 合集[{title}]({target_season_id}) 存在,但没有创建任何小节!") + return None + + logger.error(f"❌ 未找到 Season ID 为 {target_season_id} 的合集,请检查 ID 是否正确。") + return None + + except Exception as e: + logger.error(f"解析 Section ID 异常: {e}") + return None + + def add_video_to_section(self, section_id, video_info): + """正式添加视频到合集""" + url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add" + + # 参数必须包含 csrf + params = {"csrf": self.csrf} + + payload = { + "sectionId": section_id, + "episodes": [{ + "aid": video_info["aid"], + "cid": video_info["cid"], + "title": video_info["title"], + "charging_pay": 0 + }] + } + + try: + res = self.session.post(url, params=params, json=payload, timeout=15).json() + if res["code"] == 0: + logger.info(f"🎉 成功添加: {video_info['title']}") + return True + else: + logger.error(f"添加失败: {res['message']} (Code: {res['code']})") + return False + except Exception as e: + logger.error(f"添加请求异常: {e}") + return False + +class CollectionWorker: + def __init__(self, client, section_id_a, section_id_b): + self.client = client + self.section_id_a = section_id_a + self.section_id_b = section_id_b + self.ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]") + + def fetch_recent_videos(self): + """获取最近投稿""" + try: + cmd = [str(BILIUP_PATH), "list", "--max-pages", "2"] + res = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') + videos = [] + for line in self.ansi_escape.sub("", res.stdout).splitlines(): + parts = line.split() + if len(parts) >= 2 and parts[0].startswith("BV"): + raw_title = " ".join(parts[1:]) + title = re.sub(r"(开放浏览|直播回放|审核中|-)$", "", raw_title).strip() + videos.append({"bvid": parts[0], "title": title}) + return videos + except Exception: + logger.warning("biliup list 执行失败,跳过同名视频匹配。") + return [] + + def normalize(self, text): + return re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', text).replace('点', '时').lower() + + def find_bvid_by_title(self, target_title, video_list): + target_norm = self.normalize(target_title) + for v in video_list: + v_norm = self.normalize(v['title']) + if target_norm in v_norm or v_norm in target_norm: + return v['bvid'] + return None + + def process_folder(self, folder: Path, video_list): + flag_a = folder / "collection_a_done.flag" + flag_b = folder / "collection_b_done.flag" + + # 任务 A: 同名视频 -> 合集 A + if self.section_id_a and not flag_a.exists(): + matched_bvid = self.find_bvid_by_title(folder.name, video_list) + if matched_bvid: + logger.info(f"任务A (同名): 匹配到 {matched_bvid},尝试添加...") + info = self.client.get_video_info(matched_bvid) + if info and self.client.add_video_to_section(self.section_id_a, info): + flag_a.touch() + + # 任务 B: Upload切片 -> 合集 B + if self.section_id_b and not flag_b.exists(): + bvid_file = folder / "bvid.txt" + if bvid_file.exists(): + bvid = bvid_file.read_text(encoding='utf-8').strip() + logger.info(f"任务B (切片): 读取到 {bvid},尝试添加...") + info = self.client.get_video_info(bvid) + if info and self.client.add_video_to_section(self.section_id_b, info): + flag_b.touch() + +def main(): + logger.info("启动合集管理模块 (基于成功测试版)...") + + try: + client = BiliCollectionClient() + except Exception as e: + logger.error(f"客户端初始化失败: {e}") + return + + # 1. 解析 ID (这是最关键的一步) + logger.info("正在解析合集 ID...") + real_section_a = None + real_section_b = None + + if SEASON_ID_A > 0: + real_section_a = client.resolve_section_id(SEASON_ID_A) + if SEASON_ID_B > 0: + real_section_b = client.resolve_section_id(SEASON_ID_B) + + if not real_section_a and not real_section_b: + logger.error("没有解析到任何有效的 Section ID,脚本停止。") + return + + # 2. 初始化 Worker + worker = CollectionWorker(client, real_section_a, real_section_b) + + # 3. 扫描逻辑 + logger.info("开始扫描目录...") + recent_videos = worker.fetch_recent_videos() + + if SESSION_DIR.exists(): + for folder in SESSION_DIR.iterdir(): + if folder.is_dir(): + worker.process_folder(folder, recent_videos) + + logger.info("扫描完成。") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/archive_scripts/monitorSrt-old.py b/archive_scripts/monitorSrt-old.py new file mode 100644 index 0000000..491cf8d --- /dev/null +++ b/archive_scripts/monitorSrt-old.py @@ -0,0 +1,249 @@ +import os +import time +import subprocess +import json +import shutil +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, get_ai_logger, log_exception + +# ========================================== +# 接口配置 (Interface Configuration) +# ========================================== +SESSION_DIR = r'./session' # 监控的工作区目录 +CHECK_INTERVAL = 2 # 轮询频率 +CODEX_CMD = "codex" # 如果报错,可以尝试改为 "codex.cmd" +DONE_FLAG = "transcribe_done.flag" # 监听这个标记 + +# 初始化日志 +logger = get_system_logger('monitorSrt') +# ========================================== +# 定义输出数据的 JSON Schema +SONG_SCHEMA = { + "type": "object", + "properties": { + "songs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "start": {"type": "string"}, + "end": {"type": "string"}, + "title": {"type": "string"}, + "artist": {"type": "string"}, + "confidence": {"type": "number"}, + "evidence": {"type": "string"} + }, + "required": ["start", "end", "title", "artist", "confidence", "evidence"], + "additionalProperties": False + } + } + }, + "required": ["songs"], + "additionalProperties": False +} + +TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。 +任务: +1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。 +2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。 +3. 同一首歌间隔 ≤30s 合并,>30s 分开。 +4. 忽略纯聊天片段。 +5. 无法确认的歌曲用 UNKNOWN 标注并在 evidence 说明。 +最后请严格按照 Schema 生成 JSON 数据。""" + +# ========================================== + +class SrtHandler(FileSystemEventHandler): + def on_created(self, event): + # 修改:不再看 .srt,改为看 .flag + if not event.is_directory and event.src_path.endswith(DONE_FLAG): + logger.debug(f"检测到转录完成标记: {event.src_path}") + self.process_with_codex(Path(event.src_path)) + + # if not event.is_directory and event.src_path.lower().endswith('.srt'): + # self.process_with_codex(Path(event.src_path)) + + def on_moved(self, event): + # 针对有些程序是先生成临时文件再重命名的情况 + if not event.is_directory and event.dest_path.lower().endswith('.srt'): + logger.debug(f"检测到字幕文件移动: {event.dest_path}") + self.process_with_codex(Path(event.dest_path)) + + def process_with_codex(self, srt_path): + work_dir = srt_path.parent + # 避免对同一目录重复调用 + if (work_dir / "songs.json").exists(): + logger.info(f"songs.json 已存在,跳过: {work_dir.name}") + return + + logger.info(f"发现新字幕,准备识别歌曲: {work_dir.name}") + + # 创建AI日志 + ai_log, ai_log_file = get_ai_logger('codex', 'songs') + ai_log.info("="*50) + ai_log.info("Codex 歌曲识别任务开始") + ai_log.info(f"工作目录: {work_dir}") + ai_log.info("="*50) + + logger.debug("准备 Schema 文件...") + ai_log.info("生成 JSON Schema") + + # 在当前目录下生成临时 Schema 文件供 Codex 参考 + schema_file = work_dir / "song_schema.json" + with open(schema_file, "w", encoding="utf-8") as f: + json.dump(SONG_SCHEMA, f, ensure_ascii=False, indent=2) + ai_log.info(f"Schema 文件: {schema_file.name}") + + logger.info("调用 Codex (Non-interactive mode)...") + ai_log.info("开始 Codex 执行") + ai_log.info(f"命令: {CODEX_CMD} exec") + ai_log.info(f"任务提示: {TASK_PROMPT[:100]}...") + + # 构建命令行参数 + # 注意:Windows 下为了防止 shell 解析错误,提示词尽量保持在一行 + cmd = [ + CODEX_CMD, "exec", + TASK_PROMPT.replace('\n', ' '), + "--full-auto", + "--sandbox", "workspace-write", + "--output-schema", "./song_schema.json", + "-o", "songs.json", + "--skip-git-repo-check", + "--json" # 启用 JSON 输出以获取详细日志 + ] + ai_log.info(f"完整命令: {subprocess.list2cmdline(cmd)}") + + try: + # 使用 shell=True 解决 Windows 下找不到 .cmd 脚本的问题 + # 使用 subprocess.list2cmdline 将列表安全转为字符串 +# process_cmd = subprocess.list2cmdline(cmd) + +# start_time = time.time() +# result = subprocess.run( +# process_cmd, +# cwd=str(work_dir), +# shell=False, +# capture_output=True, +# text=True, +# encoding='utf-8' +# ) +# 2. 修改调用逻辑(去掉 list2cmdline) + try: + start_time = time.time() + result = subprocess.run( + cmd, # 直接传列表,不要传字符串 + cwd=str(work_dir), + shell=False, # 在 Linux 上,传列表时 shell 必须为 False 或不设置 + capture_output=True, + text=True, + encoding='utf-8' + ) + elapsed = time.time() - start_time + + ai_log.info(f"Codex 执行完成,耗时: {elapsed:.2f}秒") + ai_log.info(f"返回码: {result.returncode}") + + # 解析并记录 Codex 的 JSON 输出 + if result.stdout: + ai_log.info("=== Codex 执行日志 ===") + for line in result.stdout.strip().split('\n'): + if line.strip(): + try: + # 尝试解析 JSONL 格式的事件 + event = json.loads(line) + event_type = event.get('type', 'unknown') + + # 根据事件类型记录不同级别的日志 + if event_type == 'error': + ai_log.error(f"Codex Error: {json.dumps(event, ensure_ascii=False)}") + elif event_type in ['tool_use', 'command_execution', 'file_operation']: + ai_log.info(f"Codex Action: {json.dumps(event, ensure_ascii=False)}") + else: + ai_log.debug(f"Codex Event: {json.dumps(event, ensure_ascii=False)}") + except json.JSONDecodeError: + # 如果不是 JSON 格式,直接记录原始行 + ai_log.info(line) + + if result.stderr: + ai_log.warning("=== STDERR ===") + for line in result.stderr.strip().split('\n'): + if line.strip(): + ai_log.warning(line) + + if result.returncode == 0: + logger.info(f"Codex 执行成功: {work_dir.name}") + ai_log.info("Codex 执行成功") + self.generate_txt_fallback(work_dir, ai_log) + else: + logger.error(f"Codex 返回错误码 {result.returncode}") + logger.error(f"错误详情: {result.stderr.strip() or result.stdout.strip()}") + ai_log.error(f"Codex 执行失败,错误码: {result.returncode}") + + except Exception as e: + log_exception(logger, e, "Codex 调用异常") + log_exception(ai_log, e, "Codex 执行异常") + + ai_log.info("="*50) + ai_log.info("Codex 歌曲识别任务完成") + ai_log.info("="*50) + logger.info(f"AI日志已保存: {ai_log_file}") + + def generate_txt_fallback(self, work_dir, ai_log): + """解析生成的 JSON 并同步创建 B 站评论格式的 txt""" + json_path = work_dir / "songs.json" + txt_path = work_dir / "songs.txt" + + try: + if json_path.exists(): + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + songs = data.get('songs', []) + + logger.info(f"识别到 {len(songs)} 首歌曲") + ai_log.info(f"解析结果: {len(songs)} 首歌曲") + + with open(txt_path, 'w', encoding='utf-8') as t: + for s in songs: + # 将 SRT 时间格式 (00:00:00,360) 转为 B 站格式 (00:00:00) + start_time = s['start'].split(',')[0] # 去掉毫秒部分 + line = f"{start_time} {s['title']} — {s['artist']}\n" + t.write(line) + ai_log.debug(f" {s['title']} — {s['artist']} ({start_time})") + + logger.info(f"成功生成: {txt_path.name}") + ai_log.info(f"生成 songs.txt 成功") + except Exception as e: + log_exception(logger, e, "生成 txt 失败") + log_exception(ai_log, e, "生成 songs.txt 失败") + +def main(): + path = Path(SESSION_DIR) + if not path.exists(): + path.mkdir(parents=True) + + logger.info("="*50) + logger.info("字幕监控模块启动 (Codex 歌曲识别)") + logger.info("="*50) + logger.info(f"监控目录: {SESSION_DIR}") + logger.info(f"Codex 命令: {CODEX_CMD}") + + event_handler = SrtHandler() + observer = Observer() + observer.schedule(event_handler, str(path), recursive=True) + + observer.start() + logger.info("文件监控已启动") + + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + logger.info("接收到停止信号,正在关闭...") + observer.stop() + observer.join() + logger.info("字幕监控模块已停止") + +if __name__ == "__main__": + main() diff --git a/archive_scripts/temp-add-2-collection-full.py b/archive_scripts/temp-add-2-collection-full.py new file mode 100755 index 0000000..081a02c --- /dev/null +++ b/archive_scripts/temp-add-2-collection-full.py @@ -0,0 +1,155 @@ +import json +import time +import requests +import subprocess +import re +import shutil +import random +from pathlib import Path + +# ================= 配置区域 ================= +COOKIE_FILE = Path("./cookies.json") +TARGET_SEASON_ID = 7196643 +# 必须包含的关键词 +MUST_KEYWORDS = [] +# 必须排除的关键词 +EXCLUDE_KEYWORD = "纯享" + +BILIUP_PATH = shutil.which("biliup") or "biliup" +# =========================================== + +class BiliCollectionBatchTool: + def __init__(self): + self.load_cookies() + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://member.bilibili.com/platform/upload-manager/distribution" + }) + + def load_cookies(self): + if not COOKIE_FILE.exists(): + raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}") + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data + self.csrf = self.cookies.get("bili_jct") + + def get_existing_bvids(self, season_id): + """拉取合集内所有已存在的 BVID,确保去重 100% 准确""" + print(f"📡 正在拉取合集 {season_id} 的现有视频数据...") + self.session.cookies.update(self.cookies) + try: + # 1. 获取 Section ID + list_url = "https://member.bilibili.com/x2/creative/web/seasons" + res_list = self.session.get(list_url, params={"pn": 1, "ps": 50}).json() + section_id = None + for s in res_list.get("data", {}).get("seasons", []): + if s.get("season", {}).get("id") == season_id: + sections = s.get("sections", {}).get("sections", []) + if sections: section_id = sections[0]["id"] + break + + if not section_id: return None, set() + + # 2. 获取该小节详细列表 + detail_url = "https://member.bilibili.com/x2/creative/web/season/section" + res_detail = self.session.get(detail_url, params={"id": section_id}).json() + + existing = set() + if res_detail.get("code") == 0: + for ep in res_detail.get("data", {}).get("episodes", []): + existing.add(ep.get("bvid")) + + print(f"📊 查重参考:合集内已有 {len(existing)} 个视频。") + return section_id, existing + except Exception as e: + print(f"❌ 查重逻辑失败: {e}") + return None, set() + + def fetch_filtered_videos(self, existing_set): + """ + 核心逻辑修改: + 1. 包含 王海颖, 唱歌, 录播 + 2. 不包含 纯享 + 3. 不在合集 existing_set 中 + """ + print(f"🔍 扫描符合条件且不含“{EXCLUDE_KEYWORD}”的视频...") + try: + res = subprocess.run([BILIUP_PATH, "list", "--max-pages", "20"], capture_output=True, text=True, encoding='utf-8') + output = re.sub(r"\x1b\[[0-9;]*[A-Za-z]", "", res.stdout) + + to_add_bvids = [] + for line in output.splitlines(): + if line.startswith("BV"): + parts = line.split() + bvid = parts[0] + title = " ".join(parts[1:]) + + # 判断逻辑 + is_match = all(kw in title for kw in MUST_KEYWORDS) + is_excluded = EXCLUDE_KEYWORD in title + + if is_match and not is_excluded: + if bvid in existing_set: + continue + to_add_bvids.append(bvid) + return to_add_bvids + except Exception as e: + print(f"❌ biliup 调用失败: {e}") + return [] + + def get_metadata(self, bv_list): + episodes = [] + for bvid in bv_list: + url = "https://api.bilibili.com/x/web-interface/view" + try: + res = self.session.get(url, params={"bvid": bvid}).json() + if res["code"] == 0: + d = res["data"] + episodes.append({ + "aid": d["aid"], "cid": d["cid"], + "title": d["title"], "charging_pay": 0 + }) + time.sleep(0.3) + except: pass + return episodes + + def run(self): + # 1. 深度查重 + section_id, existing_set = self.get_existing_bvids(TARGET_SEASON_ID) + if not section_id: + print("❌ 无法解析合集,任务终止。") + return + + # 2. 条件过滤 + 查重剔除 + target_bvids = self.fetch_filtered_videos(existing_set) + + if not target_bvids: + print("✨ 扫描完毕:没有符合条件的新视频。") + return + + print(f"💡 过滤后,确认有 {len(target_bvids)} 个视频待加入合集。") + + # 3. 解析元数据 + final_list = self.get_metadata(target_bvids) + + # 4. 一次性全量提交 + if final_list: + print(f"🚀 正在发送合并添加请求...") + add_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add" + res = self.session.post(add_url, params={"csrf": self.csrf}, json={ + "sectionId": section_id, + "episodes": final_list + }).json() + + if res["code"] == 0: + print(f"🎉 成功!已补齐 {len(final_list)} 个不含“纯享”的录播视频。") + else: + print(f"❌ 批量失败: {res['message']} (Code: {res['code']})") + else: + print("❌ 未能获取有效的视频详情。") + +if __name__ == "__main__": + tool = BiliCollectionBatchTool() + tool.run() \ No newline at end of file diff --git a/archive_scripts/temp_fromA_2_B.py b/archive_scripts/temp_fromA_2_B.py new file mode 100755 index 0000000..c2a05b1 --- /dev/null +++ b/archive_scripts/temp_fromA_2_B.py @@ -0,0 +1,147 @@ +import json +import time +import requests +import shutil +import random +from pathlib import Path + +# ================= 配置区域 ================= +COOKIE_FILE = Path("./cookies.json") +SOURCE_SEASON_ID = 7196643 # 源合集 (大合集) +TARGET_SEASON_ID = 7288568 # 目标合集 (短视频合集) +MAX_DURATION_SEC = 20 * 60 # 阈值:20分钟 (1200秒) +# =========================================== + +class BiliCollectionTransferTool: + def __init__(self): + self.load_cookies() + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://member.bilibili.com/platform/upload-manager/distribution" + }) + + def load_cookies(self): + if not COOKIE_FILE.exists(): + raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}") + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data + self.csrf = self.cookies.get("bili_jct") + + def get_season_info(self, season_id): + """获取合集的 Section ID 和 视频列表""" + self.session.cookies.update(self.cookies) + try: + # 1. 获取 Section ID + list_url = "https://member.bilibili.com/x2/creative/web/seasons" + res_list = self.session.get(list_url, params={"pn": 1, "ps": 50}).json() + section_id = None + for s in res_list.get("data", {}).get("seasons", []): + if s.get("season", {}).get("id") == season_id: + sections = s.get("sections", {}).get("sections", []) + if sections: section_id = sections[0]["id"] + break + + if not section_id: return None, [] + + # 2. 获取该小节详细视频列表 + detail_url = "https://member.bilibili.com/x2/creative/web/season/section" + res_detail = self.session.get(detail_url, params={"id": section_id}).json() + + # 兼容性修复:确保返回的是列表而非 None + episodes = res_detail.get("data", {}).get("episodes", []) + if episodes is None: episodes = [] + + return section_id, episodes + except Exception as e: + print(f"❌ 获取合集 {season_id} 失败: {e}") + return None, [] + + def get_video_duration(self, bvid): + """获取视频准确时长(秒)""" + url = "https://api.bilibili.com/x/web-interface/view" + try: + res = self.session.get(url, params={"bvid": bvid}).json() + if res["code"] == 0: + return res["data"]["duration"] + except: pass + return 999999 + + def run(self): + # 1. 获取合集信息 + src_section_id, src_episodes = self.get_season_info(SOURCE_SEASON_ID) + dst_section_id, dst_episodes = self.get_season_info(TARGET_SEASON_ID) + + if not src_section_id or not dst_section_id: + print("❌ 无法获取合集信息,请检查 ID 是否正确。") + return + + # 修复 NoneType 报错:确保 dst_episodes 是列表 + dst_bvids = {ep['bvid'] for ep in dst_episodes if ep and 'bvid' in ep} + + print(f"📡 源合集共有 {len(src_episodes)} 个视频,开始检查时长...") + + to_move = [] + for idx, ep in enumerate(src_episodes): + bvid = ep['bvid'] + duration = self.get_video_duration(bvid) + + # 进度提示 + if (idx + 1) % 10 == 0: + print(f" 已检查 {idx + 1}/{len(src_episodes)}...") + + if duration < MAX_DURATION_SEC: + if bvid not in dst_bvids: + to_move.append({ + "aid": ep["aid"], + "cid": ep["cid"], + "title": ep["title"], + "bvid": bvid, + "charging_pay": 0 + }) + time.sleep(0.4) + + if not to_move: + print("✨ 未发现需要迁移的短视频。") + return + + print(f"\n💡 共发现 {len(to_move)} 个短视频需要迁移。") + + # 2. 分批迁移 (每 30 个一组) + batch_size = 30 + for i in range(0, len(to_move), batch_size): + batch = to_move[i:i+batch_size] + batch_aids = [m["aid"] for m in batch] + + print(f"🚀 正在处理第 {i//batch_size + 1} 组迁移 ({len(batch)} 个)...") + + # 先加入目标合集 + add_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add" + res_add = self.session.post(add_url, params={"csrf": self.csrf}, json={ + "sectionId": dst_section_id, + "episodes": batch + }).json() + + if res_add["code"] == 0: + # 后从源合集移除 + del_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/delete" + res_del = self.session.post(del_url, params={"csrf": self.csrf}, json={ + "sectionId": src_section_id, + "aids": batch_aids + }).json() + + if res_del["code"] == 0: + print(f" ✅ 成功移动 {len(batch)} 个。") + else: + print(f" ⚠️ 移除失败: {res_del.get('message')}") + else: + print(f" ❌ 加入目标合集失败: {res_add.get('message')}") + + time.sleep(random.uniform(3, 6)) + + print("\n🎉 迁移任务执行完毕。") + +if __name__ == "__main__": + tool = BiliCollectionTransferTool() + tool.run() \ No newline at end of file diff --git a/archive_scripts/temp_get-10.py b/archive_scripts/temp_get-10.py new file mode 100755 index 0000000..60ad3d5 --- /dev/null +++ b/archive_scripts/temp_get-10.py @@ -0,0 +1,89 @@ + + + + +import json +import time +import requests +from pathlib import Path + +# ================= 配置区域 ================= +COOKIE_FILE = Path("./cookies.json") +TARGET_SEASON_ID = 7196643 # 要检查的合集 ID +# =========================================== + +class BiliCollectionChecker: + def __init__(self): + self.load_cookies() + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://member.bilibili.com/platform/upload-manager/distribution" + }) + + def load_cookies(self): + if not COOKIE_FILE.exists(): + raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}") + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data + + def get_video_pubdate(self, bvid): + """反查视频详细发布时间""" + url = "https://api.bilibili.com/x/web-interface/view" + try: + res = self.session.get(url, params={"bvid": bvid}).json() + if res["code"] == 0: + return res["data"]["pubdate"] + except: pass + return 0 + + def check_top_10(self): + print(f"📡 正在拉取合集 {TARGET_SEASON_ID} 当前的实时排位...") + self.session.cookies.update(self.cookies) + + try: + # 1. 先定位小节 ID + list_res = self.session.get("https://member.bilibili.com/x2/creative/web/seasons", params={"pn": 1, "ps": 50}).json() + section_id = None + for s in list_res.get("data", {}).get("seasons", []): + if s.get("season", {}).get("id") == TARGET_SEASON_ID: + section_id = s.get("sections", {}).get("sections", [])[0]['id'] + break + + if not section_id: + print("❌ 未找到合集信息") + return + + # 2. 获取该小节当前的前 10 个视频 + detail_url = "https://member.bilibili.com/x2/creative/web/season/section" + res_detail = self.session.get(detail_url, params={"id": section_id}).json() + + if res_detail.get("code") == 0: + episodes = res_detail.get("data", {}).get("episodes", []) + top_10 = episodes[:10] # 截取前 10 个 + + print("\n" + "="*60) + print(f"{'排位':<4} | {'发布时间':<20} | {'BVID':<12} | {'视频标题'}") + print("-" * 60) + + for idx, ep in enumerate(top_10): + bvid = ep['bvid'] + # 为了验证排序字段,这里再次请求真实发布时间 + pubtime = self.get_video_pubdate(bvid) + time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(pubtime)) if pubtime > 0 else "未知" + + print(f"#{idx+1:<3} | {time_str:<20} | {bvid:<12} | {ep['title']}") + + print("="*60) + print(f"\n💡 如果看到的发布时间是从 2025 年开始递增的,说明是【正序】。") + print(f"💡 如果是从 2026 年开始递减的,说明是【逆序】。") + else: + print(f"❌ 获取详情失败: {res_detail.get('message')}") + + except Exception as e: + print(f"❌ 运行异常: {e}") + +if __name__ == "__main__": + checker = BiliCollectionChecker() + checker.check_top_10() \ No newline at end of file diff --git a/archive_scripts/temp_sort.py b/archive_scripts/temp_sort.py new file mode 100755 index 0000000..bc03bf8 --- /dev/null +++ b/archive_scripts/temp_sort.py @@ -0,0 +1,218 @@ +import requests +import time +import json +import random +from pathlib import Path + +# ================= 配置区域 ================= +COOKIE_FILE = Path("./cookies.json") +TARGET_SEASON_ID = 7196643 # 目标合集 ID +ASCENDING_ORDER = True # True: 最早发布的在前面 (1, 2, 3...) +# =========================================== + +def extract_cookie_from_list(cookie_list): + """从列表结构中提取 SESSDATA 和 bili_jct""" + sessdata = "" + bili_jct = "" + for item in cookie_list: + if item.get("name") == "SESSDATA": + sessdata = item.get("value") + elif item.get("name") == "bili_jct": + bili_jct = item.get("value") + return sessdata, bili_jct + +def load_cookies(file_path): + """智能从 json 文件加载 cookies""" + if not file_path.exists(): + print(f"[!] 错误: 找不到文件 {file_path}") + exit(1) + + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + sessdata = "" + bili_jct = "" + + if isinstance(data, list): + sessdata, bili_jct = extract_cookie_from_list(data) + elif isinstance(data, dict): + if "cookie_info" in data and "cookies" in data["cookie_info"]: + sessdata, bili_jct = extract_cookie_from_list(data["cookie_info"]["cookies"]) + elif "cookies" in data and isinstance(data["cookies"], list): + sessdata, bili_jct = extract_cookie_from_list(data["cookies"]) + else: + sessdata = data.get("SESSDATA", "") + bili_jct = data.get("bili_jct", "") + + if not sessdata or not bili_jct: + print("[!] 错误: cookies.json 中未找到 SESSDATA 或 bili_jct") + exit(1) + + return sessdata, bili_jct + except Exception as e: + print(f"[!] 解析 cookies.json 失败: {e}") + exit(1) + +# 初始化 Cookie +SESSDATA, BILI_JCT = load_cookies(COOKIE_FILE) + +print(f"[*] SESSDATA 读取成功: {SESSDATA[:4]}...{SESSDATA[-4:]}") +print(f"[*] bili_jct 读取成功: {BILI_JCT[:4]}...{BILI_JCT[-4:]}") + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Cookie": f"SESSDATA={SESSDATA}; bili_jct={BILI_JCT}", + "Content-Type": "application/json" +} + +def get_section_id_by_season(season_id): + """查找合集对应的小节ID""" + print(f"[*] 正在查找合集 ID {season_id} 的小节信息...") + url = "https://member.bilibili.com/x2/creative/web/seasons" + + page = 1 + while True: + params = {"pn": page, "ps": 30, "order": "", "sort": ""} + resp = requests.get(url, params=params, headers=HEADERS) + data = resp.json() + + if data["code"] != 0: + print(f"[!] 获取合集列表失败: {data['message']}") + if data["code"] == -101: + print("[!] 提示: 账号未登录,请检查 cookies.json") + return None + + seasons_list = data["data"]["seasons"] + if not seasons_list: + break + + for s_obj in seasons_list: + s_info = s_obj["season"] + if s_info["id"] == season_id: + title = s_info["title"] + print(f"[*] 找到合集: 《{title}》") + + if "sections" in s_obj and "sections" in s_obj["sections"]: + first_section = s_obj["sections"]["sections"][0] + sec_id = first_section["id"] + sec_title = first_section["title"] + print(f"[*] 锁定小节: [{sec_title}] (Section ID: {sec_id})") + return sec_id + else: + print("[!] 该合集下没有发现小节结构。") + return None + + page += 1 + time.sleep(0.5) + + print(f"[!] 未在您的账号中找到 Season ID: {season_id}") + return None + +def get_video_publish_time(bvid): + """ + 获取视频发布时间 + [FIXED] 增加了 headers 参数,防止 B 站拦截请求 + """ + url = "https://api.bilibili.com/x/web-interface/view" + params = {"bvid": bvid} + try: + # !!!关键修改:这里必须带上 headers !!! + resp = requests.get(url, params=params, headers=HEADERS) + data = resp.json() + + if data["code"] == 0: + return data["data"]["pubdate"], data["data"]["title"] + else: + # 打印具体错误原因 + print(f"\n[!] 获取视频 {bvid} 失败: code={data['code']}, msg={data['message']}") + return 0, "Unknown" + except Exception as e: + print(f"\n[!] 请求异常: {e}") + return 0, "Unknown" + +def sort_videos(section_id): + # 1. 获取小节内视频 + url_get = "https://member.bilibili.com/x2/creative/web/season/section" + resp = requests.get(url_get, params={"id": section_id}, headers=HEADERS) + res_json = resp.json() + + if res_json["code"] != 0: + print(f"[!] API 错误: {res_json['message']}") + return + + section_info = res_json["data"]["section"] + episodes = res_json["data"]["episodes"] + + if not episodes: + print("[!] 合集内无视频。") + return + + total = len(episodes) + print(f"[*] 获取到 {total} 个视频,开始查询发布时间...") + + video_list = [] + success_count = 0 + + for idx, ep in enumerate(episodes): + # 随机延迟 0.2 ~ 0.5 秒,比固定延迟更安全 + time.sleep(random.uniform(0.2, 0.5)) + + bvid = ep["bvid"] + pubdate, title = get_video_publish_time(bvid) + + # 简单的进度显示 + date_str = "Fail/Unknown" + if pubdate != 0: + date_str = time.strftime('%Y-%m-%d', time.localtime(pubdate)) + success_count += 1 + + print(f" [{idx+1}/{total}] {title[:15]:<15} -> {date_str}") + + video_list.append({ + "id": ep["id"], + "title": ep["title"] if title == "Unknown" else title, # 优先使用 API 查到的全名 + "pubdate": pubdate + }) + + if success_count == 0: + print("[!] 错误: 所有视频时间查询均失败,终止排序以免数据混乱。") + return + + # 2. 排序 + print("[*] 正在计算排序顺序...") + video_list.sort(key=lambda x: x['pubdate'], reverse=not ASCENDING_ORDER) + + # 3. 提交 + print("[*] 正在提交新的排序列表...") + sorts_payload = [{"id": v["id"], "sort": i+1} for i, v in enumerate(video_list)] + + payload = { + "section": { + "id": section_info["id"], + "seasonId": section_info["seasonId"], + "title": section_info["title"], + "type": section_info["type"] + }, + "sorts": sorts_payload + } + + url_edit = f"https://member.bilibili.com/x2/creative/web/season/section/edit?csrf={BILI_JCT}" + try: + resp_submit = requests.post(url_edit, json=payload, headers=HEADERS) + result = resp_submit.json() + + if result["code"] == 0: + print(f"\n[SUCCESS] 合集《{section_info['title']}》排序更新成功!") + else: + print(f"\n[FAIL] 更新失败: {result['message']}") + except Exception as e: + print(f"\n[!] 提交时发生网络错误: {e}") + +if __name__ == "__main__": + print("--- Bilibili 合集自动排序工具 (v2.0 fixed) ---") + + target_section_id = get_section_id_by_season(TARGET_SEASON_ID) + + if target_section_id: + sort_videos(target_section_id) \ No newline at end of file diff --git a/archive_scripts/upload-1.py b/archive_scripts/upload-1.py new file mode 100644 index 0000000..ea922e4 --- /dev/null +++ b/archive_scripts/upload-1.py @@ -0,0 +1,351 @@ +import os +import time +import subprocess +import json +import re +import random +import shutil +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ========================================== +# 接口配置 +# ========================================== +SESSION_DIR = r'./session' # 监控的工作区目录 +CHECK_INTERVAL = 5 # 检查频率 +BILIUP_PATH = "./biliup" # biliup 命令 +CONFIG_FILE = "upload_config.json" # 配置文件路径 +DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记 +UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记 + +# 初始化日志 +logger = get_system_logger('upload') +# ========================================== + +class UploadConfig: + """上传配置管理器""" + def __init__(self, config_path): + self.config_path = Path(config_path) + self.config = self.load_config() + + def load_config(self): + """加载配置文件""" + try: + if not self.config_path.exists(): + logger.error(f"配置文件不存在: {self.config_path}") + return self.get_default_config() + + with open(self.config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + logger.info(f"成功加载配置文件: {self.config_path}") + return config + except Exception as e: + log_exception(logger, e, "加载配置文件失败") + return self.get_default_config() + + def get_default_config(self): + """默认配置""" + logger.warning("使用默认配置") + return { + "upload_settings": { + "tid": 31, + "copyright": 2, + "source": "直播回放", + "cover": "" + }, + "template": { + "title": "{streamer}_{date}", + "description": "录制剪辑\n\n{songs_list}", + "tag": "翻唱,直播切片,唱歌,音乐", + "dynamic": "" + }, + "streamers": {}, + "quotes": [], + "filename_patterns": { + "patterns": [] + } + } + + def parse_filename(self, filename): + """从文件名解析主播名和日期""" + patterns = self.config.get("filename_patterns", {}).get("patterns", []) + + for pattern_config in patterns: + regex = pattern_config.get("regex") + if not regex: + continue + + match = re.match(regex, filename) + if match: + data = match.groupdict() + date_format = pattern_config.get("date_format", "{date}") + + # 格式化日期 + try: + formatted_date = date_format.format(**data) + data['date'] = formatted_date + except KeyError: + pass + + logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}") + return data + + # 默认返回原始文件名 + logger.warning(f"文件名未匹配任何模式: {filename}") + return {"streamer": filename, "date": ""} + + def get_random_quote(self): + """随机获取一句名言""" + quotes = self.config.get("quotes", []) + if not quotes: + return {"text": "", "author": ""} + return random.choice(quotes) + +class UploadHandler(FileSystemEventHandler): + def __init__(self, config): + self.processing_sets = set() + self.config = config + + def on_created(self, event): + # 兼容处理 watchdog 路径编码问题 + src_path = event.src_path + if isinstance(src_path, bytes): + src_path = src_path.decode('utf-8') + + # 监听 split_done.flag 文件的生成 + if not event.is_directory and src_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记: {src_path}") + self.handle_upload(Path(src_path)) + + def on_moved(self, event): + dest_path = event.dest_path + if isinstance(dest_path, bytes): + dest_path = dest_path.decode('utf-8') + + if not event.is_directory and dest_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记移动: {dest_path}") + self.handle_upload(Path(dest_path)) + + def handle_upload(self, flag_path): + work_dir = flag_path.parent + video_stem = work_dir.name + upload_done = work_dir / UPLOAD_FLAG + split_dir = work_dir / "split_video" + + # 防重复检查 + if upload_done.exists() or video_stem in self.processing_sets: + logger.debug(f"上传已完成或正在处理,跳过: {video_stem}") + return + + logger.info("="*50) + logger.info(f"准备上传: {video_stem}") + logger.info("="*50) + self.processing_sets.add(video_stem) + + try: + # 1. 解析文件名 + parsed = self.config.parse_filename(video_stem) + streamer = parsed.get('streamer', video_stem) + date = parsed.get('date', '') + + logger.info(f"主播: {streamer}, 日期: {date}") + + # 2. 读取歌曲信息 + songs_json = work_dir / "songs.json" + songs_txt = work_dir / "songs.txt" + songs = [] + song_count = 0 + songs_list = "" + + if songs_json.exists(): + try: + with open(songs_json, 'r', encoding='utf-8') as f: + data = json.load(f) + songs = data.get('songs', []) + song_count = len(songs) + logger.info(f"读取到 {song_count} 首歌曲") + except Exception as e: + log_exception(logger, e, "读取 songs.json 失败") + + if songs_txt.exists(): + songs_list = songs_txt.read_text(encoding='utf-8').strip() + logger.info("已读取歌单文本") + + # 3. 获取随机名言 + quote = self.config.get_random_quote() + daily_quote = quote.get('text', '') + quote_author = quote.get('author', '') + + # 4. 构建模板变量 + template_vars = { + 'streamer': streamer, + 'date': date, + 'song_count': song_count, + 'songs_list': songs_list, + 'daily_quote': daily_quote, + 'quote_author': quote_author + } + + # 5. 渲染标题和简介 + template = self.config.config.get('template', {}) + title = template.get('title', '{streamer}_{date}').format(**template_vars) + description = template.get('description', '{songs_list}').format(**template_vars) + dynamic = template.get('dynamic', '').format(**template_vars) + + # 6. 获取标签(优先使用主播专属标签) + streamers_config = self.config.config.get('streamers', {}) + if streamer in streamers_config: + tags = streamers_config[streamer].get('tags', template.get('tag', '')) + logger.info(f"使用主播专属标签: {streamer}") + else: + tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars) + + logger.info(f"标题: {title}") + logger.info(f"标签: {tags}") + logger.debug(f"简介预览: {description[:100]}...") + + # 7. 获取所有切片视频 + video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv'}]) + + if not video_files: + logger.error(f"切片目录 {split_dir} 内没找到视频") + return + + logger.info(f"找到 {len(video_files)} 个视频分片") + + # 8. 读取上传设置 + upload_settings = self.config.config.get('upload_settings', {}) + tid = upload_settings.get('tid', 31) + copyright_val = upload_settings.get('copyright', 2) + source = upload_settings.get('source', '直播回放') + cover = upload_settings.get('cover', '') + + # 8. 刷新 biliup 登录信息 + renew_cmd = [BILIUP_PATH, "renew"] + logger.info("尝试刷新 biliup 登录信息") + renew_result = subprocess.run(renew_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + if renew_result.returncode != 0: + logger.warning(f"biliup renew 返回非 0: {renew_result.returncode}") + else: + logger.info("biliup renew 成功") + + # 9. 执行分批上传 + logger.info(f"启动分批投稿 (每批 5 个)...") + + # 第一批:使用 upload 创建稿件 + first_batch = video_files[:5] + remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)] + + # 构建初始上传命令 + upload_cmd = [ + BILIUP_PATH, "upload", + *first_batch, + "--title", title, + "--tid", str(tid), + "--tag", tags, + "--copyright", str(copyright_val), + "--source", source, + "--desc", description + ] + + if dynamic: + upload_cmd.extend(["--dynamic", dynamic]) + if cover and Path(cover).exists(): + upload_cmd.extend(["--cover", cover]) + + # 执行初始上传 + logger.info(f"正在上传第一批 ({len(first_batch)} 个文件)...") + result = subprocess.run(upload_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + + if result.returncode == 0: + # 从 stdout 提取 BV 号 + bv_match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout) + if not bv_match: + bv_match = re.search(r'(BV[A-Za-z0-9]+)', result.stdout) + + if bv_match: + bvid = bv_match.group(1) + logger.info(f"第一批投稿成功,获得 BV 号: {bvid}") + + # 追加后续批次 + for idx, batch in enumerate(remaining_batches, 2): + logger.info(f"正在追加第 {idx} 批 ({len(batch)} 个文件) 到 {bvid}...") + time.sleep(15) # 适当等待 + + append_cmd = [ + BILIUP_PATH, "append", + "--vid", bvid, + *batch + ] + append_res = subprocess.run(append_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + + if append_res.returncode != 0: + logger.error(f"第 {idx} 批追加失败: {append_res.stderr[:200]}") + + logger.info(f"所有批次处理完成: {video_stem}") + upload_done.touch() + + # 上传成功后清理空间 + try: + if split_dir.exists(): + shutil.rmtree(split_dir) + logger.info(f"已删除切片目录: {split_dir}") + for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']: + original_video = work_dir / f"{video_stem}{ext}" + if original_video.exists(): + original_video.unlink() + logger.info(f"已删除原视频: {original_video}") + except Exception as cleanup_err: + logger.error(f"清理空间失败: {cleanup_err}") + else: + logger.error("第一批上传成功但未能在输出中识别到 BV 号,无法追加后续分片") + else: + logger.error(f"第一批投稿失败,错误码: {result.returncode}") + logger.error(f"错误信息: {result.stderr[:500]}") + + except Exception as e: + log_exception(logger, e, "上传处理异常") + finally: + self.processing_sets.discard(video_stem) + logger.info("="*50) + +def main(): + path = Path(SESSION_DIR) + path.mkdir(parents=True, exist_ok=True) + + logger.info("="*50) + logger.info("上传模块启动 (Biliup 自动分批投稿)") + logger.info("="*50) + + # 加载配置 + config = UploadConfig(CONFIG_FILE) + + event_handler = UploadHandler(config) + observer = Observer() + observer.schedule(event_handler, str(path), recursive=True) + + # 启动时扫描已有目录 + logger.info("扫描待上传任务...") + scan_count = 0 + for sub_dir in path.iterdir(): + if sub_dir.is_dir(): + split_flag = sub_dir / DONE_FLAG + upload_flag = sub_dir / UPLOAD_FLAG + if split_flag.exists() and not upload_flag.exists(): + logger.info(f"发现待上传任务: {sub_dir.name}") + event_handler.handle_upload(split_flag) + scan_count += 1 + logger.info(f"扫描完成,处理 {scan_count} 个待上传任务") + + observer.start() + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + observer.stop() + observer.join() + +if __name__ == "__main__": + main() diff --git a/archive_scripts/upload-old.py b/archive_scripts/upload-old.py new file mode 100644 index 0000000..89d7e13 --- /dev/null +++ b/archive_scripts/upload-old.py @@ -0,0 +1,325 @@ +import os +import time +import subprocess +import json +import re +import random +import shutil +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ========================================== +# 接口配置 +# ========================================== +SESSION_DIR = r'./session' # 监控的工作区目录 +CHECK_INTERVAL = 5 # 检查频率 +BILIUP_PATH = "./biliup" # biliup 命令 +CONFIG_FILE = "upload_config.json" # 配置文件路径 +DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记 +UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记 + +# 初始化日志 +logger = get_system_logger('upload') +# ========================================== + +class UploadConfig: + """上传配置管理器""" + def __init__(self, config_path): + self.config_path = Path(config_path) + self.config = self.load_config() + + def load_config(self): + """加载配置文件""" + try: + if not self.config_path.exists(): + logger.error(f"配置文件不存在: {self.config_path}") + return self.get_default_config() + + with open(self.config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + logger.info(f"成功加载配置文件: {self.config_path}") + return config + except Exception as e: + log_exception(logger, e, "加载配置文件失败") + return self.get_default_config() + + def get_default_config(self): + """默认配置""" + logger.warning("使用默认配置") + return { + "upload_settings": { + "tid": 31, + "copyright": 2, + "source": "直播回放", + "cover": "" + }, + "template": { + "title": "{streamer}_{date}", + "description": "自动录制剪辑\n\n{songs_list}", + "tag": "翻唱,直播切片,唱歌,音乐", + "dynamic": "" + }, + "streamers": {}, + "quotes": [], + "filename_patterns": { + "patterns": [] + } + } + + def parse_filename(self, filename): + """从文件名解析主播名和日期""" + patterns = self.config.get("filename_patterns", {}).get("patterns", []) + + for pattern_config in patterns: + regex = pattern_config.get("regex") + if not regex: + continue + + match = re.match(regex, filename) + if match: + data = match.groupdict() + date_format = pattern_config.get("date_format", "{date}") + + # 格式化日期 + try: + formatted_date = date_format.format(**data) + data['date'] = formatted_date + except KeyError: + pass + + logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}") + return data + + # 默认返回原始文件名 + logger.warning(f"文件名未匹配任何模式: {filename}") + return {"streamer": filename, "date": ""} + + def get_random_quote(self): + """随机获取一句名言""" + quotes = self.config.get("quotes", []) + if not quotes: + return {"text": "", "author": ""} + return random.choice(quotes) + +class UploadHandler(FileSystemEventHandler): + def __init__(self, config): + self.processing_sets = set() + self.config = config + + def on_created(self, event): + # 监听 split_done.flag 文件的生成 + if not event.is_directory and event.src_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记: {event.src_path}") + self.handle_upload(Path(event.src_path)) + + def on_moved(self, event): + if not event.is_directory and event.dest_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记移动: {event.dest_path}") + self.handle_upload(Path(event.dest_path)) + + def handle_upload(self, flag_path): + work_dir = flag_path.parent + video_stem = work_dir.name + upload_done = work_dir / UPLOAD_FLAG + split_dir = work_dir / "split_video" + + # 防重复检查 + if upload_done.exists() or video_stem in self.processing_sets: + logger.debug(f"上传已完成或正在处理,跳过: {video_stem}") + return + + logger.info("="*50) + logger.info(f"准备上传: {video_stem}") + logger.info("="*50) + self.processing_sets.add(video_stem) + + try: + # 1. 解析文件名 + parsed = self.config.parse_filename(video_stem) + streamer = parsed.get('streamer', video_stem) + date = parsed.get('date', '') + + logger.info(f"主播: {streamer}, 日期: {date}") + + # 2. 读取歌曲信息 + songs_json = work_dir / "songs.json" + songs_txt = work_dir / "songs.txt" + songs = [] + song_count = 0 + songs_list = "" + + if songs_json.exists(): + try: + with open(songs_json, 'r', encoding='utf-8') as f: + data = json.load(f) + songs = data.get('songs', []) + song_count = len(songs) + logger.info(f"读取到 {song_count} 首歌曲") + except Exception as e: + log_exception(logger, e, "读取 songs.json 失败") + + if songs_txt.exists(): + songs_list = songs_txt.read_text(encoding='utf-8').strip() + logger.info("已读取歌单文本") + + # 3. 获取随机名言 + quote = self.config.get_random_quote() + daily_quote = quote.get('text', '') + quote_author = quote.get('author', '') + + # 4. 构建模板变量 + template_vars = { + 'streamer': streamer, + 'date': date, + 'song_count': song_count, + 'songs_list': songs_list, + 'daily_quote': daily_quote, + 'quote_author': quote_author + } + + # 5. 渲染标题和简介 + template = self.config.config.get('template', {}) + title = template.get('title', '{streamer}_{date}').format(**template_vars) + description = template.get('description', '{songs_list}').format(**template_vars) + dynamic = template.get('dynamic', '').format(**template_vars) + + # 6. 获取标签(优先使用主播专属标签) + streamers_config = self.config.config.get('streamers', {}) + if streamer in streamers_config: + tags = streamers_config[streamer].get('tags', template.get('tag', '')) + logger.info(f"使用主播专属标签: {streamer}") + else: + tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars) + + logger.info(f"标题: {title}") + logger.info(f"标签: {tags}") + logger.debug(f"简介预览: {description[:100]}...") + + # 7. 获取所有切片视频 + video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv'}]) + + if not video_files: + logger.error(f"切片目录 {split_dir} 内没找到视频") + return + + logger.info(f"找到 {len(video_files)} 个视频分片") + + # 8. 读取上传设置 + upload_settings = self.config.config.get('upload_settings', {}) + tid = upload_settings.get('tid', 31) + copyright_val = upload_settings.get('copyright', 2) + source = upload_settings.get('source', '直播回放') + cover = upload_settings.get('cover', '') + + # 8. 刷新 biliup 登录信息 + renew_cmd = [BILIUP_PATH, "renew"] + logger.info("尝试刷新 biliup 登录信息") + renew_result = subprocess.run(renew_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + if renew_result.returncode != 0: + logger.warning(f"biliup renew 返回非 0: {renew_result.returncode}") + logger.debug(f"renew stderr: {renew_result.stderr.strip()}") + else: + logger.info("biliup renew 成功") + + # 9. 执行上传 + logger.info(f"启动 biliup 投稿...") + cmd = [ + BILIUP_PATH, "upload", + *video_files, + "--title", title, + "--tid", str(tid), + "--tag", tags, + "--copyright", str(copyright_val), + "--source", source, + "--desc", description + ] + + if dynamic: + cmd.extend(["--dynamic", dynamic]) + + if cover and Path(cover).exists(): + cmd.extend(["--cover", cover]) + + logger.debug(f"biliup 命令: {' '.join(cmd[:5])}... (共 {len(video_files)} 个文件)") + + # shell=True 确保在 Windows 下调用正常 + result = subprocess.run(cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + + if result.returncode == 0: + logger.info(f"投稿成功: {video_stem}") + logger.info(f"标题: {title}") + upload_done.touch() # 盖上"上传完成"戳 + logger.info("生成上传完成标记") + + # 上传成功后清理空间 + try: + # 1. 删除 split_video 目录 + if split_dir.exists(): + shutil.rmtree(split_dir) + logger.info(f"已删除切片目录: {split_dir}") + + # 2. 删除原视频文件 (匹配常见视频后缀) + for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']: + original_video = work_dir / f"{video_stem}{ext}" + if original_video.exists(): + original_video.unlink() + logger.info(f"已删除原视频: {original_video}") + except Exception as cleanup_err: + logger.error(f"清理空间失败: {cleanup_err}") + else: + logger.error(f"投稿失败,错误码: {result.returncode}") + logger.error(f"错误信息: {result.stderr[:500]}") + + except Exception as e: + log_exception(logger, e, "上传处理异常") + finally: + self.processing_sets.discard(video_stem) + logger.info("="*50) + +def main(): + path = Path(SESSION_DIR) + path.mkdir(parents=True, exist_ok=True) + + logger.info("="*50) + logger.info("上传模块启动 (Biliup 自动投稿)") + logger.info("="*50) + logger.info(f"监控目录: {SESSION_DIR}") + logger.info(f"Biliup 路径: {BILIUP_PATH}") + logger.info(f"配置文件: {CONFIG_FILE}") + + # 加载配置 + config = UploadConfig(CONFIG_FILE) + + event_handler = UploadHandler(config) + observer = Observer() + observer.schedule(event_handler, str(path), recursive=True) + + # 启动时扫描已有目录:如果有 split_done.flag 但没 upload_done.flag,补投 + logger.info("扫描待上传任务...") + scan_count = 0 + for sub_dir in path.iterdir(): + if sub_dir.is_dir(): + split_flag = sub_dir / DONE_FLAG + upload_flag = sub_dir / UPLOAD_FLAG + if split_flag.exists() and not upload_flag.exists(): + logger.info(f"发现待上传任务: {sub_dir.name}") + event_handler.handle_upload(split_flag) + scan_count += 1 + logger.info(f"扫描完成,处理 {scan_count} 个待上传任务") + + observer.start() + logger.info("文件监控已启动") + + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + logger.info("接收到停止信号,正在关闭...") + observer.stop() + observer.join() + logger.info("上传模块已停止") + +if __name__ == "__main__": + main() diff --git a/biliup b/biliup new file mode 100755 index 0000000..a306615 Binary files /dev/null and b/biliup differ diff --git a/filelist.txt b/filelist.txt new file mode 100644 index 0000000..0d7ab52 --- /dev/null +++ b/filelist.txt @@ -0,0 +1,3 @@ +file '02月21日 22时06分 王海颖唱歌录播.flv' +file '02月21日 23时38分 王海颖唱歌录播.flv' +file '02月21日 23时47分 王海颖唱歌录播.flv' diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..c4a1258 --- /dev/null +++ b/logger.py @@ -0,0 +1,127 @@ +import logging +import os +from pathlib import Path +from logging.handlers import RotatingFileHandler +from datetime import datetime + +# ========================================== +# 日志系统配置 +# ========================================== +LOG_BASE_DIR = "./logs" +SYSTEM_LOG_DIR = os.path.join(LOG_BASE_DIR, "system") +AI_GROQ_LOG_DIR = os.path.join(LOG_BASE_DIR, "ai", "groq") +AI_CODEX_LOG_DIR = os.path.join(LOG_BASE_DIR, "ai", "codex") + +# 日志格式 +LOG_FORMAT = "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s" +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + +# 日志轮转配置 +MAX_BYTES = 10 * 1024 * 1024 # 10MB +BACKUP_COUNT = 5 + +# ========================================== + +def setup_directories(): + """创建所有必要的日志目录""" + for directory in [SYSTEM_LOG_DIR, AI_GROQ_LOG_DIR, AI_CODEX_LOG_DIR]: + Path(directory).mkdir(parents=True, exist_ok=True) + +def get_system_logger(module_name): + """ + 获取系统日志记录器 + :param module_name: 模块名称 (如 'monitor', 'upload') + :return: logger对象 + """ + setup_directories() + + logger = logging.getLogger(f"system.{module_name}") + logger.setLevel(logging.DEBUG) + + # 避免重复添加handler + if logger.handlers: + return logger + + # 文件handler + log_file = os.path.join(SYSTEM_LOG_DIR, f"{module_name}.log") + file_handler = RotatingFileHandler( + log_file, + maxBytes=MAX_BYTES, + backupCount=BACKUP_COUNT, + encoding='utf-8' + ) + file_handler.setLevel(logging.DEBUG) + + # 控制台handler + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + + # 设置格式 + formatter = logging.Formatter(LOG_FORMAT, DATE_FORMAT) + file_handler.setFormatter(formatter) + console_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger + +def get_ai_logger(ai_type, task_name=""): + """ + 获取AI调用的独立日志记录器 + :param ai_type: AI类型 ('groq' 或 'codex') + :param task_name: 任务名称 (如 'transcribe', 'songs') + :return: logger对象和日志文件路径 + """ + setup_directories() + + # 生成唯一的日志文件名 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + if task_name: + log_filename = f"{task_name}_{timestamp}.log" + else: + log_filename = f"{timestamp}.log" + + # 根据AI类型选择目录 + if ai_type == "groq": + log_dir = AI_GROQ_LOG_DIR + elif ai_type == "codex": + log_dir = AI_CODEX_LOG_DIR + else: + raise ValueError(f"未知的AI类型: {ai_type}") + + log_file = os.path.join(log_dir, log_filename) + + # 创建独立的logger + logger_name = f"ai.{ai_type}.{timestamp}" + logger = logging.getLogger(logger_name) + logger.setLevel(logging.DEBUG) + + # 清除已有的handlers + logger.handlers.clear() + + # 只使用文件handler + file_handler = logging.FileHandler(log_file, encoding='utf-8') + file_handler.setLevel(logging.DEBUG) + + formatter = logging.Formatter(LOG_FORMAT, DATE_FORMAT) + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + + # 防止日志传播到父logger + logger.propagate = False + + return logger, log_file + +def log_exception(logger, exception, context=""): + """ + 记录异常信息的辅助函数 + :param logger: logger对象 + :param exception: 异常对象 + :param context: 上下文信息 + """ + if context: + logger.error(f"{context}: {type(exception).__name__}: {str(exception)}", exc_info=True) + else: + logger.error(f"{type(exception).__name__}: {str(exception)}", exc_info=True) diff --git a/monitor.py b/monitor.py new file mode 100644 index 0000000..15f2eaa --- /dev/null +++ b/monitor.py @@ -0,0 +1,126 @@ +import os +import shutil +import subprocess +import time +import sys +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ========================================== +# 接口配置 +# ========================================== +STAGE_DIR = r'./stage' +BACKUP_DIR = r'./backup' +SESSION_DIR = r'./session' +MIN_DURATION_SECONDS = 15 * 60 +VIDEO_EXTS = {'.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv'} + +# 初始化日志 +logger = get_system_logger('monitor') +# ========================================== + +def get_video_duration(file_path): + try: + cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', str(file_path)] + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + duration = float(result.stdout) + logger.debug(f"获取视频时长: {file_path.name} = {duration}秒") + return duration + except Exception as e: + log_exception(logger, e, f"获取视频时长失败: {file_path}") + return 0 + +class VideoHandler(FileSystemEventHandler): + def on_created(self, event): + if not event.is_directory: + # 兼容处理 watchdog 路径编码问题 + src_path = event.src_path + if isinstance(src_path, bytes): + src_path = src_path.decode('utf-8') + + logger.debug(f"检测到文件创建事件: {src_path}") + self.handle_file(Path(src_path)) + + def handle_file(self, file_path): + if file_path.suffix.lower() not in VIDEO_EXTS: + logger.debug(f"跳过非视频文件: {file_path.name}") + return + + logger.info(f"发现新视频文件: {file_path.name},正在检查写入状态...") + + # 改进:通过检查文件大小变化来判断是否写入完成 + last_size = -1 + while True: + try: + if not file_path.exists(): + logger.warning(f"文件在检查期间消失: {file_path}") + return + current_size = file_path.stat().st_size + if current_size == last_size and current_size > 0: + break + last_size = current_size + time.sleep(5) # 每5秒检查一次大小 + except Exception as e: + logger.error(f"检查文件状态异常: {e}") + break + + try: + duration = get_video_duration(file_path) + logger.info(f"视频时长: {file_path.name} = {duration/60:.1f} 分钟") + + if duration < MIN_DURATION_SECONDS: + logger.info(f"时长不足 {MIN_DURATION_SECONDS/60:.0f} 分钟,移动到备份区") + dst = Path(BACKUP_DIR) / file_path.name + shutil.move(str(file_path), str(dst)) + logger.info(f"已移动至备份: {dst}") + else: + # 核心联动:创建专属工作区 + session_folder = Path(SESSION_DIR) / file_path.stem + session_folder.mkdir(parents=True, exist_ok=True) + logger.info(f"创建工作区: {session_folder}") + + logger.info(f"派发转录任务: {file_path.name}") + # 改进:使用 sys.executable 保证环境一致性 + process = subprocess.Popen([ + sys.executable, 'video2srt.py', + str(file_path), + str(session_folder) + ]) + logger.info(f"转录进程已启动 (PID: {process.pid})") + + except Exception as e: + log_exception(logger, e, "监控处理异常") + +if __name__ == "__main__": + logger.info("="*50) + logger.info("视频监控模块启动") + logger.info("="*50) + + for d in [STAGE_DIR, BACKUP_DIR, SESSION_DIR]: + Path(d).mkdir(parents=True, exist_ok=True) + logger.info(f"监控目录: {STAGE_DIR}") + logger.info(f"备份目录: {BACKUP_DIR}") + logger.info(f"工作目录: {SESSION_DIR}") + + handler = VideoHandler() + + # 启动时扫描已有文件 + logger.info("正在扫描 stage 目录下的存量视频...") + for f in Path(STAGE_DIR).iterdir(): + if f.is_file(): + handler.handle_file(f) + + observer = Observer() + observer.schedule(handler, STAGE_DIR, recursive=False) + observer.start() + logger.info("文件监控已启动") + + try: + while True: time.sleep(1) + except KeyboardInterrupt: + logger.info("接收到停止信号,正在关闭...") + observer.stop() + observer.join() + logger.info("视频监控模块已停止") diff --git a/monitorSongs.py b/monitorSongs.py new file mode 100644 index 0000000..873b997 --- /dev/null +++ b/monitorSongs.py @@ -0,0 +1,161 @@ +import os +import time +import json +import subprocess +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ========================================== +# 接口配置 +# ========================================== +SESSION_DIR = r'./session' +CHECK_INTERVAL = 2 +OUTPUT_SUBDIR = "split_video" +DONE_FLAG = "split_done.flag" + +# 初始化日志 +logger = get_system_logger('monitorSongs') +# ========================================== + +class SongsJsonHandler(FileSystemEventHandler): + def on_created(self, event): + if not event.is_directory and event.src_path.lower().endswith('songs.json'): + logger.debug(f"检测到歌曲列表创建: {event.src_path}") + self.process_video_splitting(Path(event.src_path)) + + def on_moved(self, event): + if not event.is_directory and event.dest_path.lower().endswith('songs.json'): + logger.debug(f"检测到歌曲列表移动: {event.dest_path}") + self.process_video_splitting(Path(event.dest_path)) + + def process_video_splitting(self, json_path): + work_dir = json_path.parent + split_dir = work_dir / OUTPUT_SUBDIR + flag_file = work_dir / DONE_FLAG + + # 1. 检查标记位 + if flag_file.exists(): + logger.debug(f"切割已完成,跳过: {work_dir.name}") + return + + logger.info("="*50) + logger.info(f"检测到新歌曲列表: {work_dir.name}") + logger.info("="*50) + + # 2. 读取并修正 JSON 时间格式 + try: + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + songs = data.get('songs', []) + logger.info(f"读取到 {len(songs)} 首歌曲") + except Exception as e: + log_exception(logger, e, f"读取 JSON 失败: {json_path}") + return + + # 3. 定位源视频 + source_video = None + video_exts = {'.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv'} + for f in work_dir.iterdir(): + if f.suffix.lower() in video_exts: + source_video = f + break + + if not source_video: + logger.error(f"工作区内未找到源视频文件,跳过") + return + + logger.info(f"源视频: {source_video.name}") + split_dir.mkdir(parents=True, exist_ok=True) + + # 4. 循环切割 + logger.info(f"开始切割视频片段 (Stream Copy)...") + + success_count = 0 + fail_count = 0 + + for idx, song in enumerate(songs, 1): + # --- 关键修正:将时间戳中的逗号替换为点号 --- + raw_start = song.get('start', '00:00:00.000') + raw_end = song.get('end', '00:00:00.000') + start = raw_start.replace(',', '.') + end = raw_end.replace(',', '.') + + title = song.get('title', 'UNKNOWN').replace('/', '_').replace('\\', '_') + artist = song.get('artist', 'UNKNOWN') + output_filename = f"{idx:02d}_{title}{source_video.suffix}" + output_path = split_dir / output_filename + + if output_path.exists(): + logger.info(f"[{idx}] 已存在,跳过: {title}") + continue + + # 构建高效率切割命令 + # 注意:-ss 在 -i 前面是为了快速定位且避免不必要的解码 + cmd = [ + 'ffmpeg', '-y', + '-ss', start, + '-to', end, + '-i', str(source_video), + '-c', 'copy', + '-map_metadata', '0', + str(output_path) + ] + + try: + # 使用 subprocess.run 配合 capture_output 捕获详细错误 + res = subprocess.run(cmd, capture_output=True, check=True) + logger.info(f"[{idx}] ✓ {title} - {artist}") + success_count += 1 + except subprocess.CalledProcessError as e: + logger.error(f"[{idx}] ✗ {title} 切割失败") + logger.error(f"FFmpeg 错误: {e.stderr.decode('utf-8', errors='ignore')[:200]}") + fail_count += 1 + + # 5. 生成完成标记 + flag_file.touch() + logger.info("="*50) + logger.info(f"切割任务完成: 成功 {success_count} / 失败 {fail_count}") + logger.info(f"输出目录: {split_dir}") + logger.info("="*50) + +def main(): + path = Path(SESSION_DIR) + path.mkdir(parents=True, exist_ok=True) + + logger.info("="*50) + logger.info("视频切割模块启动") + logger.info("="*50) + logger.info(f"监控目录: {SESSION_DIR}") + + event_handler = SongsJsonHandler() + observer = Observer() + observer.schedule(event_handler, str(path), recursive=True) + + # 启动扫描已存在的 songs.json + logger.info("扫描现有歌曲列表...") + scan_count = 0 + for sub_dir in path.iterdir(): + if sub_dir.is_dir(): + json_file = sub_dir / "songs.json" + if json_file.exists(): + logger.info(f"发现已存在的歌曲列表: {sub_dir.name}") + event_handler.process_video_splitting(json_file) + scan_count += 1 + logger.info(f"扫描完成,处理 {scan_count} 个歌曲列表") + + observer.start() + logger.info("文件监控已启动") + + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + logger.info("接收到停止信号,正在关闭...") + observer.stop() + observer.join() + logger.info("视频切割模块已停止") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/monitorSrt.py b/monitorSrt.py new file mode 100644 index 0000000..c5f98b3 --- /dev/null +++ b/monitorSrt.py @@ -0,0 +1,214 @@ +import os +import time +import subprocess +import json +import shutil +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, get_ai_logger, log_exception + +# ========================================== +# 接口配置 (Interface Configuration) +# ========================================== +SESSION_DIR = r'./session' # 监控的工作区目录 +CHECK_INTERVAL = 2 # 轮询频率 +CODEX_CMD = "/home/theshy/.nvm/versions/node/v22.13.0/bin/codex" # Linux 下通常直接用 codex +DONE_FLAG = "transcribe_done.flag" # 监听这个标记 + +# 初始化日志 +logger = get_system_logger('monitorSrt') +# ========================================== +# 定义输出数据的 JSON Schema +SONG_SCHEMA = { + "type": "object", + "properties": { + "songs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "start": {"type": "string"}, + "end": {"type": "string"}, + "title": {"type": "string"}, + "artist": {"type": "string"}, + "confidence": {"type": "number"}, + "evidence": {"type": "string"} + }, + "required": ["start", "end", "title", "artist", "confidence", "evidence"], + "additionalProperties": False + } + } + }, + "required": ["songs"], + "additionalProperties": False +} + +TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。 +任务: +1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。 +2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。歌曲开始时间规则: + - 歌曲开始时间应使用“上一句字幕的结束时间”作为 start_time。 + - 这样可以尽量保留歌曲可能存在的前奏。 +3. 同一首歌间隔 ≤160s 合并,>160s 分开。若连续识别出相同歌曲,且中间只有短暂对白、空白、转场或无歌词段,应合并为同一首歌. +4. 忽略纯聊天片段。 +5. 无法确认的歌曲丢弃,宁缺毋滥:你的输出将直接面向最终用户。 +6. 忽略短片段:如果一段演唱持续时间总和少于 15 秒,视为随口哼唱,请直接忽略,不计入列表。 +7. 仔细分析每一句歌词,识别出相关歌曲后, 使用该歌曲歌词上下文对比字幕上下文,确定歌曲起始与停止时间 +8.歌曲标注规则: + - 可以在歌曲名称后使用括号 () 添加补充说明。 + - 常见标注示例: + - (片段):歌曲演唱时间较短,例如 < 60 秒 + - (清唱):无伴奏演唱 + - (副歌):只演唱副歌部分 + - 标注应简洁,仅在确有必要时使用。 +9. 通过歌曲起始和结束时间自检, 一般歌曲长度在5分钟以内, 1分钟以上, 可疑片段重新联网搜索检查. +最后请严格按照 Schema 生成 JSON 数据。""" + +# ========================================== + +class SrtHandler(FileSystemEventHandler): + def on_created(self, event): + if not event.is_directory: + src_path = event.src_path + if isinstance(src_path, bytes): + src_path = src_path.decode('utf-8') + + if src_path.endswith(DONE_FLAG): + logger.debug(f"检测到转录完成标记: {src_path}") + self.process_with_codex(Path(src_path)) + + def on_moved(self, event): + dest_path = event.dest_path + if isinstance(dest_path, bytes): + dest_path = dest_path.decode('utf-8') + + if not event.is_directory and dest_path.lower().endswith('.srt'): + logger.debug(f"检测到字幕文件移动: {dest_path}") + self.process_with_codex(Path(dest_path)) + + def process_with_codex(self, srt_path): + work_dir = srt_path.parent + # 避免对同一目录重复调用 + if (work_dir / "songs.json").exists(): + logger.info(f"songs.json 已存在,跳过: {work_dir.name}") + return + + logger.info(f"发现新任务,准备识别歌曲: {work_dir.name}") + + # 创建AI日志 + ai_log, ai_log_file = get_ai_logger('codex', 'songs') + ai_log.info("="*50) + ai_log.info("Codex 歌曲识别任务开始") + ai_log.info(f"工作目录: {work_dir}") + ai_log.info("="*50) + + # 生成临时 Schema 文件 + schema_file = work_dir / "song_schema.json" + with open(schema_file, "w", encoding="utf-8") as f: + json.dump(SONG_SCHEMA, f, ensure_ascii=False, indent=2) + + # 构建命令行参数 (Linux 下必须使用列表形式) + cmd = [ + CODEX_CMD, "exec", + TASK_PROMPT.replace('\n', ' '), + "--full-auto", + "--sandbox", "workspace-write", + "--output-schema", "./song_schema.json", + "-o", "songs.json", + "--skip-git-repo-check", + "--json" + ] + + logger.info("调用 Codex...") + ai_log.info(f"执行命令: {subprocess.list2cmdline(cmd)}") + + try: + start_time = time.time() + # 关键修改:shell=False + 直接传列表,解决 "File name too long" 错误 + result = subprocess.run( + cmd, + cwd=str(work_dir), + shell=False, + capture_output=True, + text=True, + encoding='utf-8' + ) + elapsed = time.time() - start_time + + ai_log.info(f"Codex 执行完成,耗时: {elapsed:.2f}秒") + + # 记录输出 + if result.stdout: + ai_log.info("=== STDOUT ===") + ai_log.info(result.stdout) + if result.stderr: + ai_log.warning("=== STDERR ===") + ai_log.warning(result.stderr) + + if result.returncode == 0: + logger.info(f"Codex 执行成功: {work_dir.name}") + self.generate_txt_fallback(work_dir, ai_log) + else: + logger.error(f"Codex 失败,返回码: {result.returncode}") + ai_log.error(f"Codex 失败,返回码: {result.returncode}") + + except Exception as e: + log_exception(logger, e, "Codex 调用异常") + log_exception(ai_log, e, "Codex 执行异常") + + ai_log.info("="*50) + ai_log.info("Codex 歌曲识别任务完成") + ai_log.info("="*50) + + def generate_txt_fallback(self, work_dir, ai_log): + json_path = work_dir / "songs.json" + txt_path = work_dir / "songs.txt" + try: + if json_path.exists(): + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + songs = data.get('songs', []) + + with open(txt_path, 'w', encoding='utf-8') as t: + for s in songs: + start_time = s['start'].split(',')[0].split('.')[0] # 兼容点号和逗号 + line = f"{start_time} {s['title']} — {s['artist']}\n" + t.write(line) + logger.info(f"成功生成: {txt_path.name}") + except Exception as e: + log_exception(logger, e, "生成 txt 失败") + +def main(): + path = Path(SESSION_DIR) + path.mkdir(parents=True, exist_ok=True) + + logger.info("="*50) + logger.info("字幕监控模块启动 (Linux 优化版)") + logger.info("="*50) + + event_handler = SrtHandler() + + # 启动扫描:检查是否有 flag 但没 songs.json 的存量目录 + logger.info("正在扫描存量任务...") + for sub_dir in path.iterdir(): + if sub_dir.is_dir(): + flag = sub_dir / DONE_FLAG + json_file = sub_dir / "songs.json" + if flag.exists() and not json_file.exists(): + logger.info(f"发现存量任务: {sub_dir.name}") + event_handler.process_with_codex(flag) + + observer = Observer() + observer.schedule(event_handler, str(path), recursive=True) + observer.start() + + try: + while True: + time.sleep(CHECK_INTERVAL) + except KeyboardInterrupt: + observer.stop() + observer.join() + +if __name__ == "__main__": + main() diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..89d8600 --- /dev/null +++ b/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -e + +cd /home/theshy/biliup +source .venv/bin/activate +exec python start_all.py diff --git a/session_top_comment.py b/session_top_comment.py new file mode 100644 index 0000000..bc56d21 --- /dev/null +++ b/session_top_comment.py @@ -0,0 +1,165 @@ +import json, re, subprocess, time, requests +from pathlib import Path +from watchdog.events import FileSystemEventHandler +from watchdog.observers import Observer +from logger import get_system_logger, log_exception +import shutil +# --- 配置 --- +SESSION_DIR = Path("./session") +COOKIE_FILE = Path("./cookies.json") +# BILIUP_PATH = Path("./biliup") +BILIUP_PATH = shutil.which("biliup") or "./biliup" +MAX_RETRIES, BASE_DELAY, POLL_INTERVAL = 5, 180, 10 +ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]") + +# 初始化系统日志 +logger = get_system_logger("session_top_comment") + +def strip_ansi(text: str) -> str: + return ANSI_ESCAPE.sub("", text or "") + +class TopCommentClient: + def __init__(self): + with open(COOKIE_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + ck = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} + self.csrf = ck.get("bili_jct") + if not self.csrf: raise ValueError("Cookie 中缺少 bili_jct") + self.session = requests.Session() + self.session.cookies.update(ck) + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Referer": "https://www.bilibili.com/", + "Origin": "https://www.bilibili.com" + }) + + def get_aid(self, bvid): + res = self.session.get("https://api.bilibili.com/x/web-interface/view", params={"bvid": bvid}).json() + if res.get('code') != 0: raise RuntimeError(f"View API: {res.get('message')}") + return res['data']['aid'] + + def post_and_top(self, aid, msg): + # 1. 发表评论 + r = self.session.post("https://api.bilibili.com/x/v2/reply/add", + data={"type": 1, "oid": aid, "message": msg, "plat": 1, "csrf": self.csrf}).json() + if r.get('code') != 0: raise RuntimeError(f"Post API: {r.get('message')}") + rpid = r['data']['rpid'] + + # 2. 等待 3s 数据库同步 + logger.info(f"评论已发布(rpid={rpid}),等待 3s 置顶...") + time.sleep(3) + + # 3. 置顶 + r = self.session.post("https://api.bilibili.com/x/v2/reply/top", + data={"type": 1, "oid": aid, "rpid": rpid, "action": 1, "csrf": self.csrf}).json() + if r.get('code') != 0: raise RuntimeError(f"Top API: {r.get('message')}") + +class CommentManager: + def __init__(self): + self.client = TopCommentClient() + self.pending = {} # {folder_path: {'attempts': 0, 'next_run': 0}} + self._cache = {"time": 0, "videos": []} + + def fetch_videos(self): + """调用 biliup list 获取最近上传视频""" + if time.time() - self._cache["time"] < 60: return self._cache["videos"] + try: + res = subprocess.run([str(BILIUP_PATH), "list", "--max-pages", "1"], capture_output=True, text=True, encoding='utf-8') + videos = [] + for line in strip_ansi(res.stdout).splitlines(): + if line.startswith("BV"): + parts = line.split("\t") + if len(parts) >= 2: videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()}) + self._cache = {"time": time.time(), "videos": videos} + return videos + except Exception as e: + logger.error(f"biliup list 失败: {e}") + return [] + + def scan_and_add(self, folder: Path): + if not folder.is_dir() or (folder / "comment_done.flag").exists(): return + if (folder / "songs.txt").exists() and folder not in self.pending: + logger.info(f"发现待处理任务: {folder.name}") + self.pending[folder] = {'attempts': 0, 'next_run': 0} + + def process_queue(self): + now = time.time() + for folder in list(self.pending.keys()): + task = self.pending[folder] + if task['next_run'] > now: continue + + try: + # 1. 查找匹配视频 + videos = self.fetch_videos() + # 模糊匹配:文件夹名包含在视频标题中,或视频标题包含在文件夹名中 + matched_bvid = next((v['bvid'] for v in videos if folder.name in v['title'] or v['title'] in folder.name), None) + + # 如果没找到,也尝试从文件夹名提取 [BV...] + if not matched_bvid: + bv_match = re.search(r"\[(BV[0-9A-Za-z]+)\]", folder.name) + if bv_match: matched_bvid = bv_match.group(1) + + if not matched_bvid: + raise RuntimeError("未在最近上传列表中找到匹配视频") + + # 2. 读取内容 + content = (folder / "songs.txt").read_text(encoding="utf-8").strip() + if not content: + logger.warning(f"songs.txt 内容为空,取消任务: {folder.name}") + self.pending.pop(folder); continue + + # 3. 执行发布和置顶 + aid = self.client.get_aid(matched_bvid) + self.client.post_and_top(aid, content) + + # 4. 成功标记 + (folder / "comment_done.flag").touch() + logger.info(f"任务完成: {folder.name} -> {matched_bvid}") + self.pending.pop(folder) + + except Exception as e: + task['attempts'] += 1 + if task['attempts'] >= MAX_RETRIES: + logger.error(f"任务最终失败: {folder.name} - {e}") + self.pending.pop(folder) + else: + delay = BASE_DELAY * (2 ** (task['attempts']-1)) + task['next_run'] = now + delay + logger.warning(f"任务推迟({task['attempts']}/{MAX_RETRIES}): {folder.name} - {e}. {delay}s 后重试") + +def main(): + logger.info("="*50) + logger.info("置顶评论模块启动") + logger.info("="*50) + + try: + mgr = CommentManager() + except Exception as e: + logger.error(f"初始化失败: {e}") + return + + # 1. 初始扫描 + for f in SESSION_DIR.iterdir(): mgr.scan_and_add(f) + + # 2. 启动 Watchdog + class Handler(FileSystemEventHandler): + def on_created(self, event): + p = Path(event.src_path) + if p.name == "songs.txt": mgr.scan_and_add(p.parent) + + observer = Observer() + observer.schedule(Handler(), str(SESSION_DIR), recursive=True) + observer.start() + + logger.info(f"开始监控目录: {SESSION_DIR}") + try: + while True: + mgr.process_queue() + time.sleep(POLL_INTERVAL) + except KeyboardInterrupt: + observer.stop() + observer.join() + logger.info("置顶评论模块已停止") + +if __name__ == "__main__": + main() diff --git a/start_all.py b/start_all.py new file mode 100644 index 0000000..1654e53 --- /dev/null +++ b/start_all.py @@ -0,0 +1,96 @@ +import subprocess +import time +import sys +import os +from pathlib import Path +from logger import get_system_logger + +# ========================================== +# 配置区:确保脚本文件名与你本地一致 +# ========================================== +SCRIPTS = [ + "monitor.py", # 1. 监控 stage,触发视频转录 (调用 video2srt.py) + "monitorSrt.py", # 2. 监控 session,触发 Codex 歌词识别 + "monitorSongs.py", # 3. 监控 session,触发 FFmpeg 视频切片 + "upload.py", # 4. 监控 session,触发 biliup 自动投稿 + "session_top_comment.py", # 5. 监控 session,触发 B 站评论置顶 + "add_to_collection.py", # 5. 新增:监控 session,触发合集归档 <--- 添加这一行 +] + +# Python 解释器路径 (通常直接用 sys.executable) +PYTHON_EXE = sys.executable + +# 初始化日志 +logger = get_system_logger('start_all') + +# ========================================== + +def start_pipeline(): + processes = [] + + logger.info("="*50) + logger.info("直播切片 & 自动投稿全自动流水线") + logger.info("="*50) + logger.info(f"启动时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") + logger.info(f"当前路径: {os.getcwd()}") + logger.info(f"Python: {PYTHON_EXE}") + + # 检查所有脚本是否存在 + for script in SCRIPTS: + if not Path(script).exists(): + logger.error(f"找不到脚本 {script},请确保它们在同一目录下") + print(f"[X] 错误: 找不到脚本 {script},请确保它们在同一目录下。") + return + + # 逐一启动 + for script in SCRIPTS: + logger.info(f"正在启动模块: {script}") + try: + # 使用 subprocess.Popen 异步启动 + # creationflags=subprocess.CREATE_NEW_CONSOLE 可以让每个脚本在独立窗口运行(仅限 Windows) + # 如果你希望所有日志都在这一个窗口显示,去掉 creationflags + p = subprocess.Popen( + [PYTHON_EXE, script], + creationflags=subprocess.CREATE_NEW_CONSOLE if os.name == 'nt' else 0 + ) + processes.append((script, p)) + logger.info(f"模块已启动: {script} (PID: {p.pid})") + time.sleep(1) # 稍微错开启动时间,防止瞬间抢占 IO + except Exception as e: + logger.error(f"启动 {script} 失败: {e}") + print(f"[X] 启动 {script} 失败: {e}") + + logger.info("") + logger.info("="*50) + logger.info("所有监控模块已启动!") + logger.info("请勿关闭此主窗口,除非你想停止整个流水线") + logger.info("="*50) + + print("\n" + "="*50) + print("[√] 所有监控模块已启动!") + print("[!] 请勿关闭此主窗口,除非你想停止整个流水线。") + print("[!] 详细日志请查看 ./logs/system/ 目录") + print("="*50) + + try: + # 循环检查子进程状态 + while True: + for name, p in processes: + if p.poll() is not None: + logger.warning(f"模块 {name} 已意外停止 (Exit Code: {p.poll()})") + print(f"\n[⚠️] 警告: 模块 {name} 已意外停止 (Exit Code: {p.poll()})") + # 这里可以加入自动重启逻辑 + time.sleep(10) + except KeyboardInterrupt: + logger.info("接收到停止信号,正在关闭所有监控模块...") + print("\n[*] 正在关闭所有监控模块...") + for name, p in processes: + p.terminate() + logger.info(f"模块已终止: {name}") + logger.info("已安全退出") + print("[√] 已安全退出。") + +if __name__ == "__main__": + start_all_dir = Path(__file__).parent + os.chdir(start_all_dir) # 确保工作路径正确 + start_pipeline() \ No newline at end of file diff --git a/upload.py b/upload.py new file mode 100644 index 0000000..c1d643c --- /dev/null +++ b/upload.py @@ -0,0 +1,316 @@ + + +import os +import time +import subprocess +import json +import re +import random +import shutil +import sys +from pathlib import Path +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +from logger import get_system_logger, log_exception + +# ========================================== +# 接口配置 +# ========================================== +SESSION_DIR = r'./session' # 监控的工作区目录 +CHECK_INTERVAL = 5 # 检查频率 +BILIUP_PATH = shutil.which("biliup") or "./biliup" +CONFIG_FILE = "upload_config.json" # 配置文件路径 +DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记 +UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记 + +# 初始化日志 +logger = get_system_logger('upload') +# ========================================== + +class UploadConfig: + """上传配置管理器""" + def __init__(self, config_path): + self.config_path = Path(config_path) + self.config = self.load_config() + + def load_config(self): + try: + if not self.config_path.exists(): + logger.error(f"配置文件不存在: {self.config_path}") + return self.get_default_config() + + with open(self.config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + logger.info(f"成功加载配置文件: {self.config_path}") + return config + except Exception as e: + log_exception(logger, e, "加载配置文件失败") + return self.get_default_config() + + def get_default_config(self): + logger.warning("使用默认配置") + return { + "upload_settings": { + "tid": 31, + "copyright": 2, + "source": "直播回放", + "cover": "" + }, + "template": { + "title": "{streamer}_{date}", + "description": "自动录制剪辑\n\n{songs_list}", + "tag": "翻唱,直播切片,唱歌,音乐", + "dynamic": "" + }, + "streamers": {}, + "quotes": [], + "filename_patterns": {"patterns": []} + } + + def parse_filename(self, filename): + patterns = self.config.get("filename_patterns", {}).get("patterns", []) + for pattern_config in patterns: + regex = pattern_config.get("regex") + if not regex: continue + match = re.match(regex, filename) + if match: + data = match.groupdict() + date_format = pattern_config.get("date_format", "{date}") + try: + formatted_date = date_format.format(**data) + data['date'] = formatted_date + except KeyError: pass + logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}") + return data + logger.warning(f"文件名未匹配任何模式: {filename}") + return {"streamer": filename, "date": ""} + + def get_random_quote(self): + quotes = self.config.get("quotes", []) + if not quotes: return {"text": "", "author": ""} + return random.choice(quotes) + +class UploadHandler(FileSystemEventHandler): + def __init__(self, config): + self.processing_sets = set() + self.config = config + + def on_created(self, event): + src_path = event.src_path + if isinstance(src_path, bytes): src_path = src_path.decode('utf-8') + if not event.is_directory and src_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记: {src_path}") + self.handle_upload(Path(src_path)) + + def on_moved(self, event): + dest_path = event.dest_path + if isinstance(dest_path, bytes): dest_path = dest_path.decode('utf-8') + if not event.is_directory and dest_path.lower().endswith(DONE_FLAG): + logger.debug(f"检测到切割完成标记移动: {dest_path}") + self.handle_upload(Path(dest_path)) + + def _wait_exponential(self, retry_count, base_wait=300, max_wait=3600): + """指数退避等待计算""" + # 计算等待时间:60, 120, 240... 最大 600秒 + wait_time = min(base_wait * (2 ** retry_count), max_wait) + return wait_time + + def handle_upload(self, flag_path): + work_dir = flag_path.parent + video_stem = work_dir.name + upload_done = work_dir / UPLOAD_FLAG + split_dir = work_dir / "split_video" + + if upload_done.exists() or video_stem in self.processing_sets: + logger.debug(f"上传已完成或正在处理,跳过: {video_stem}") + return + + logger.info("="*50) + logger.info(f"准备上传: {video_stem}") + logger.info("="*50) + self.processing_sets.add(video_stem) + + try: + parsed = self.config.parse_filename(video_stem) + streamer = parsed.get('streamer', video_stem) + date = parsed.get('date', '') + + songs_json = work_dir / "songs.json" + songs_txt = work_dir / "songs.txt" + songs_list = "" + song_count = 0 + + if songs_json.exists(): + try: + with open(songs_json, 'r', encoding='utf-8') as f: + data = json.load(f) + song_count = len(data.get('songs', [])) + except Exception: pass + + if songs_txt.exists(): + songs_list = songs_txt.read_text(encoding='utf-8').strip() + + quote = self.config.get_random_quote() + template_vars = { + 'streamer': streamer, 'date': date, 'song_count': song_count, + 'songs_list': songs_list, 'daily_quote': quote.get('text', ''), + 'quote_author': quote.get('author', '') + } + + template = self.config.config.get('template', {}) + title = template.get('title', '{streamer}_{date}').format(**template_vars) + description = template.get('description', '{songs_list}').format(**template_vars) + dynamic = template.get('dynamic', '').format(**template_vars) + + streamers_config = self.config.config.get('streamers', {}) + if streamer in streamers_config: + tags = streamers_config[streamer].get('tags', template.get('tag', '')) + else: + tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars) + + video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv', '.ts'}]) + + if not video_files: + logger.error(f"切片目录 {split_dir} 内没找到视频") + return + + upload_settings = self.config.config.get('upload_settings', {}) + tid = upload_settings.get('tid', 31) + + # 1. 刷新登录 + subprocess.run([BILIUP_PATH, "renew"], shell=False, capture_output=True) + + # 2. 准备分批 + BATCH_SIZE = 5 + logger.info(f"启动分批投稿 (总计 {len(video_files)} 个分片)...") + + first_batch = video_files[:BATCH_SIZE] + remaining_batches = [video_files[i:i + BATCH_SIZE] for i in range(BATCH_SIZE, len(video_files), BATCH_SIZE)] + + upload_cmd = [ + BILIUP_PATH, "upload", + *first_batch, + "--title", title, + "--tid", str(tid), + "--tag", tags, + "--copyright", str(upload_settings.get('copyright', 2)), + "--source", upload_settings.get('source', '直播回放'), + "--desc", description + ] + + if dynamic: upload_cmd.extend(["--dynamic", dynamic]) + cover = upload_settings.get('cover', '') + if cover and Path(cover).exists(): upload_cmd.extend(["--cover", cover]) + + bvid = None + MAX_ATTEMPTS = 5 # 定义最大尝试次数 + + # ========================== + # 阶段一:首批上传 (最多5次) + # ========================== + logger.info(f"正在上传第一批 ({len(first_batch)}个文件)...") + + for attempt in range(1, MAX_ATTEMPTS + 1): + logger.info(f"首批上传尝试 [{attempt}/{MAX_ATTEMPTS}]...") + + result = subprocess.run(upload_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + + if result.returncode == 0: + bv_match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout) + if not bv_match: bv_match = re.search(r'(BV[A-Za-z0-9]+)', result.stdout) + + if bv_match: + bvid = bv_match.group(1) + logger.info(f"第一批上传成功,BV 号: {bvid}") + (work_dir / "bvid.txt").write_text(bvid, encoding='utf-8') + break # 成功退出循环 + else: + logger.warning(f"上传命令返回成功但未找到BVID (尝试 {attempt}/{MAX_ATTEMPTS})") + + # 如果没有成功 (没有break) + if attempt < MAX_ATTEMPTS: + err_msg = result.stderr.strip()[-100:] if result.stderr else "无标准错误输出" + wait_time = self._wait_exponential(attempt - 1) + logger.error(f"第一批上传失败或未获取BVID,等待 {wait_time}秒后重试。错误片段: {err_msg}") + time.sleep(wait_time) + else: + logger.error("第一批上传已达到最大重试次数 (5次),中止本次任务。") + return # 彻底结束函数,不进行后续操作 + + # ========================== + # 阶段二:追加上传 (每批最多5次) + # ========================== + if bvid: + for idx, batch in enumerate(remaining_batches, 2): + logger.info(f"等待 45 秒冷却时间,准备上传第 {idx} 批...") + time.sleep(45) + + batch_success = False + for attempt in range(1, MAX_ATTEMPTS + 1): + logger.info(f"正在追加第 {idx} 批 ({len(batch)}个) - 尝试 [{attempt}/{MAX_ATTEMPTS}]...") + + append_cmd = [BILIUP_PATH, "append", "--vid", bvid, *batch] + res = subprocess.run(append_cmd, shell=False, capture_output=True, text=True, encoding='utf-8') + + if res.returncode == 0: + logger.info(f"第 {idx} 批追加成功") + batch_success = True + break # 成功退出内层循环,进入下一批 + + # 如果失败 + if attempt < MAX_ATTEMPTS: + err_msg = res.stderr.strip()[-100:] if res.stderr else "无标准错误输出" + wait_time = self._wait_exponential(attempt - 1) + logger.error(f"第 {idx} 批追加失败,等待 {wait_time}秒后重试。错误片段: {err_msg}") + time.sleep(wait_time) + + if not batch_success: + logger.error(f"第 {idx} 批追加已达到最大重试次数 (5次)。为防止顺序错乱,中止后续上传。") + return # 某一批次彻底失败,停止整个流程 + + # 只有当所有循环都正常走完没有 return,才会执行到这里 + logger.info(f"所有分片上传完成: {bvid}") + upload_done.touch() + + # 清理 + try: + if split_dir.exists(): shutil.rmtree(split_dir) + for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']: + orig = work_dir / f"{video_stem}{ext}" + if orig.exists(): orig.unlink() + except Exception as e: + logger.error(f"清理空间失败: {e}") + else: + # 逻辑上如果第一阶段return了,这里不会执行; + # 但如果第一阶段break了但没bvid(理论上不可能,除非正则漏了),做个保险 + logger.error("逻辑错误:流程继续但无BVID,上传中止") + + except Exception as e: + log_exception(logger, e, "上传异常") + finally: + self.processing_sets.discard(video_stem) + +def main(): + path = Path(SESSION_DIR) + path.mkdir(parents=True, exist_ok=True) + logger.info("上传模块启动 (MaxRetry=5)") + + config = UploadConfig(CONFIG_FILE) + handler = UploadHandler(config) + + for sub_dir in path.iterdir(): + if sub_dir.is_dir(): + if (sub_dir / DONE_FLAG).exists() and not (sub_dir / UPLOAD_FLAG).exists(): + handler.handle_upload(sub_dir / DONE_FLAG) + + observer = Observer() + observer.schedule(handler, str(path), recursive=True) + observer.start() + try: + while True: time.sleep(5) + except KeyboardInterrupt: + observer.stop() + observer.join() + +if __name__ == "__main__": + main() diff --git a/video2srt.py b/video2srt.py new file mode 100644 index 0000000..e4d25ed --- /dev/null +++ b/video2srt.py @@ -0,0 +1,189 @@ +import os +import time +import math +import shutil +import subprocess +import sys +from pathlib import Path +from groq import Groq +from logger import get_system_logger, get_ai_logger, log_exception + +# ========================================== +# 接口配置 (Interface Configuration) +# ========================================== +GROQ_API_KEY = "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" +LANGUAGE = "zh" +KEEP_TEMP_AUDIO = False +MAX_FILE_SIZE_MB = 23 +BITRATE_KBPS = 64 +MODEL_NAME = "whisper-large-v3-turbo" + +# 初始化日志 +logger = get_system_logger('video2srt') +# ========================================== + +client = Groq(api_key=GROQ_API_KEY) + +def format_srt_time(seconds): + """秒转 SRT 时间戳格式 (00:00:00,000)""" + td_hours = int(seconds // 3600) + td_mins = int((seconds % 3600) // 60) + td_secs = int(seconds % 60) + td_millis = int((seconds - int(seconds)) * 1000) + return f"{td_hours:02}:{td_mins:02}:{td_secs:02},{td_millis:03}" + +def transcribe_with_retry(audio_file, ai_log): + """处理 Groq API 速率限制的重试逻辑""" + ai_log.info(f"开始转录音频: {audio_file.name}") + ai_log.info(f"模型: {MODEL_NAME}, 语言: {LANGUAGE}") + + retry_count = 0 + while True: + try: + start_time = time.time() + with open(audio_file, "rb") as file: + response = client.audio.transcriptions.create( + file=(audio_file.name, file.read()), + model=MODEL_NAME, + response_format="verbose_json", + language=LANGUAGE, + temperature=0.0 + ) + elapsed = time.time() - start_time + ai_log.info(f"转录成功,耗时: {elapsed:.2f}秒") + ai_log.info(f"识别到 {len(response.segments)} 个语音片段") + return response.segments + except Exception as e: + retry_count += 1 + err_str = str(e) + ai_log.error(f"转录失败 (尝试 {retry_count}): {err_str}") + if "429" in err_str or "rate_limit" in err_str.lower(): + wait_time = 25 + ai_log.warning(f"触发 API 速率限制,等待 {wait_time} 秒后重试...") + logger.warning(f"Groq API 速率限制,等待 {wait_time} 秒") + time.sleep(wait_time) + else: + log_exception(ai_log, e, "Groq API 调用失败") + raise e + +def process_single_video(raw_video_path, video_work_dir): + """ + 1. 移动视频到工作区 + 2. 提取音频并分片 + 3. 转录生成 SRT + """ + raw_video_path = Path(raw_video_path) + video_work_dir = Path(video_work_dir) + video_work_dir.mkdir(parents=True, exist_ok=True) + + logger.info("="*50) + logger.info(f"开始处理视频: {raw_video_path.name}") + logger.info("="*50) + + # 创建AI日志 + ai_log, ai_log_file = get_ai_logger('groq', 'transcribe') + ai_log.info("="*50) + ai_log.info(f"Groq 转录任务开始") + ai_log.info(f"视频文件: {raw_video_path.name}") + ai_log.info(f"工作目录: {video_work_dir}") + ai_log.info("="*50) + + # --- 新增步骤:将视频搬家到工作区 --- + target_video_path = video_work_dir / raw_video_path.name + if not target_video_path.exists(): + logger.info(f"移动视频至工作区: {raw_video_path.name}") + ai_log.info(f"移动视频: {raw_video_path} -> {target_video_path}") + shutil.move(str(raw_video_path), str(target_video_path)) + else: + logger.info(f"视频已在工作区中: {target_video_path.name}") + ai_log.info(f"视频已存在于工作区: {target_video_path}") + + video_stem = target_video_path.stem + audio_temp_dir = video_work_dir / "temp_audio" + audio_temp_dir.mkdir(parents=True, exist_ok=True) + + # 1. 计算切分时长 + seg_duration = math.floor((MAX_FILE_SIZE_MB * 8 * 1024) / BITRATE_KBPS) + logger.info(f"音频分片时长: {seg_duration} 秒") + ai_log.info(f"音频分片参数: {seg_duration}秒/片, 比特率: {BITRATE_KBPS}kbps") + + # 2. FFmpeg 提取并分片 (使用工作区内的视频路径) + logger.info("开始提取音频...") + ai_log.info("开始 FFmpeg 音频提取") + output_pattern = str(audio_temp_dir / "part_%03d.mp3") + + cmd = [ + 'ffmpeg', '-y', '-i', str(target_video_path), + '-vn', '-acodec', 'libmp3lame', '-b:a', f'{BITRATE_KBPS}k', + '-ac', '1', '-ar', '22050', + '-f', 'segment', '-segment_time', str(seg_duration), + '-reset_timestamps', '1', output_pattern + ] + + try: + subprocess.run(cmd, check=True, capture_output=True) + logger.info("音频提取完成") + ai_log.info("FFmpeg 音频提取成功") + except Exception as e: + log_exception(logger, e, "FFmpeg 音频提取失败") + log_exception(ai_log, e, "FFmpeg 执行失败") + raise + + segments = sorted(list(audio_temp_dir.glob("part_*.mp3"))) + logger.info(f"音频分片数量: {len(segments)}") + ai_log.info(f"生成音频分片: {len(segments)} 个") + + # 3. 转录并实时写入 SRT + logger.info(f"开始分片转录...") + ai_log.info("开始批量转录") + srt_path = video_work_dir / f"{video_stem}.srt" + global_idx = 1 + + with open(srt_path, "w", encoding="utf-8") as srt_file: + for i, seg in enumerate(segments): + offset = i * seg_duration + logger.info(f"转录进度: {i+1}/{len(segments)}") + ai_log.info(f"转录片段 {i+1}/{len(segments)}: {seg.name}") + + seg_data = transcribe_with_retry(seg, ai_log) + + for chunk in seg_data: + start = format_srt_time(chunk['start'] + offset) + end = format_srt_time(chunk['end'] + offset) + text = chunk['text'].strip() + srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n") + global_idx += 1 + + logger.info(f"字幕文件已生成: {srt_path.name}") + ai_log.info(f"字幕生成完成: 共 {global_idx-1} 条字幕") + + # 4. 清理临时音频 + if not KEEP_TEMP_AUDIO: + logger.info("清理临时音频文件...") + ai_log.info("清理临时音频目录") + shutil.rmtree(audio_temp_dir, ignore_errors=True) + + # --- 新增:生成转录完成标记 --- + (video_work_dir / "transcribe_done.flag").touch() + logger.info("生成转录完成标记") + ai_log.info("生成 transcribe_done.flag") + + logger.info(f"处理完成。工作区目录: {video_work_dir}") + ai_log.info("="*50) + ai_log.info("Groq 转录任务完成") + ai_log.info("="*50) + logger.info(f"AI日志已保存: {ai_log_file}") + +if __name__ == "__main__": + if len(sys.argv) >= 3: + # sys.argv[1]: 原始视频路径 (通常在 stage 下) + # sys.argv[2]: 目标工作区路径 (通常在 session 下) + logger.info(f"接收到转录任务: {sys.argv[1]}") + try: + process_single_video(sys.argv[1], sys.argv[2]) + except Exception as e: + log_exception(logger, e, "视频转录失败") + raise + else: + logger.error("缺少参数。用法: python video2srt.py <视频路径> <工作区路径>") + print("错误: 缺少参数。用法: python video2srt.py <视频路径> <工作区路径>") \ No newline at end of file