Initial commit: sanitize repository for remote push

This commit is contained in:
theshy
2026-03-21 01:36:28 +08:00
commit 3925cb508f
21 changed files with 3357 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
*.json
*.log
*.log.*
*.part
*.flv
data/
session/
logs/
stage/
__pycache__/
qrcode.png
.venv/
test/add_2_collection.py

172
add_to_collection.py Executable file
View File

@ -0,0 +1,172 @@
import json
import time
import requests
import re
import shutil
import subprocess
import random
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ================= 配置区域 =================
SESSION_DIR = Path("./session")
COOKIE_FILE = Path("./cookies.json")
CHECK_INTERVAL = 5
# 合集 ID 配置
SEASON_ID_A = 7196643 # 合集 A (同名视频)
SEASON_ID_B = 7196624 # 合集 B (Upload切片)
# 自动寻找 biliup
BILIUP_PATH = shutil.which("biliup") or "biliup"
# 初始化日志
logger = get_system_logger("add_to_collection.py")
# ===========================================
class BiliCollectionClient:
def __init__(self):
self.load_cookies()
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution"
})
def load_cookies(self):
if not COOKIE_FILE.exists():
raise FileNotFoundError(f"Cookies 文件不存在: {COOKIE_FILE}")
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data
self.csrf = self.cookies.get("bili_jct")
def get_video_info(self, bvid):
url = "https://api.bilibili.com/x/web-interface/view"
try:
self.session.cookies.update(self.cookies)
res = self.session.get(url, params={"bvid": bvid}, timeout=10).json()
if res["code"] == 0:
d = res["data"]
return {"aid": d["aid"], "cid": d["cid"], "title": d["title"], "charging_pay": 0}
except Exception as e:
logger.error(f"获取视频信息失败: {e}")
return None
def resolve_section_id(self, sid):
url = "https://member.bilibili.com/x2/creative/web/seasons"
try:
self.session.cookies.update(self.cookies)
res = self.session.get(url, params={"pn": 1, "ps": 50}).json()
for s in res.get("data", {}).get("seasons", []):
if s.get("season", {}).get("id") == sid:
return s.get("sections", {}).get("sections", [])[0]["id"]
except: pass
return None
def add_videos_batch(self, section_id, episodes):
if not episodes: return True
# 频率控制
wait = random.uniform(5.0, 10.0)
logger.info(f"☕ 模拟人工操作,等待 {wait:.2f}s 后提交到合集...")
time.sleep(wait)
url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add"
params = {"csrf": self.csrf}
try:
res = self.session.post(url, params=params, json={"sectionId": section_id, "episodes": episodes}).json()
return res["code"] == 0
except Exception as e:
log_exception(logger, e, "批量添加合集异常")
return False
class CollectionHandler(FileSystemEventHandler):
def __init__(self, client, sid_a, sid_b):
self.client = client
self.sid_a = sid_a
self.sid_b = sid_b
self.ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]")
def on_created(self, event):
# 监听文件夹创建或 bvid.txt 创建
if event.is_directory or event.src_path.endswith("bvid.txt"):
self.process_all()
def process_all(self):
recent = self.fetch_biliup_list()
pending_a, pending_b = [], []
for folder in SESSION_DIR.iterdir():
if not folder.is_dir(): continue
# 任务 A: 同名视频 -> 合集 A
flag_a = folder / "collection_a_done.flag"
if self.sid_a and not flag_a.exists():
bvid = self.match_bvid(folder.name, recent)
if bvid:
info = self.client.get_video_info(bvid)
if info: pending_a.append((folder, info))
# 任务 B: 切片视频 -> 合集 B
flag_b = folder / "collection_b_done.flag"
txt = folder / "bvid.txt"
if self.sid_b and not flag_b.exists() and txt.exists():
try:
bvid = txt.read_text(encoding='utf-8').strip()
if bvid.startswith("BV"):
info = self.client.get_video_info(bvid)
if info: pending_b.append((folder, info))
except: pass
# 批量执行提交
if pending_a:
if self.client.add_videos_batch(self.sid_a, [i[1] for i in pending_a]):
for f, _ in pending_a: (f / "collection_a_done.flag").touch()
logger.info(f"合集 A 更新完成: {len(pending_a)}个任务")
if pending_b:
if self.client.add_videos_batch(self.sid_b, [i[1] for i in pending_b]):
for f, _ in pending_b: (f / "collection_b_done.flag").touch()
logger.info(f"合集 B 更新完成: {len(pending_b)}个任务")
def fetch_biliup_list(self):
try:
res = subprocess.run([BILIUP_PATH, "list"], capture_output=True, text=True, encoding='utf-8')
clean_out = self.ansi_escape.sub("", res.stdout)
return [{"bvid": l.split()[0], "title": "".join(l.split()[1:])} for l in clean_out.splitlines() if l.startswith("BV")]
except: return []
def match_bvid(self, name, vlist):
n = lambda x: re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', x).lower()
target = n(name)
for v in vlist:
vn = n(v['title'])
if target in vn or vn in target: return v['bvid']
return None
def main():
logger.info("="*50)
logger.info("合集监控模块启动")
logger.info("="*50)
client = BiliCollectionClient()
sid_a = client.resolve_section_id(SEASON_ID_A) if SEASON_ID_A > 0 else None
sid_b = client.resolve_section_id(SEASON_ID_B) if SEASON_ID_B > 0 else None
handler = CollectionHandler(client, sid_a, sid_b)
handler.process_all() # 初始扫描
observer = Observer()
observer.schedule(handler, str(SESSION_DIR), recursive=False)
observer.start()
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
observer.stop()
observer.join()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,235 @@
import json
import time
import requests
import re
import shutil
import subprocess
from pathlib import Path
from logger import get_system_logger, log_exception
# ================= 配置区域 =================
SESSION_DIR = Path("./session")
COOKIE_FILE = Path("./cookies.json")
# 【这里填你 B 站网页上看到的合集 ID】
# 脚本会自动根据这两个 ID 去查找对应的 Section ID (小节ID)
SEASON_ID_A = 7196643 # 合集 A (同名视频)
SEASON_ID_B = 7196624 # 合集 B (Upload切片)
# 自动寻找 biliup
BILIUP_PATH = shutil.which("biliup") or "biliup"
# ===========================================
logger = get_system_logger("collection_manager")
class BiliCollectionClient:
def __init__(self):
if not COOKIE_FILE.exists():
raise FileNotFoundError(f"Cookies 文件不存在: {COOKIE_FILE}")
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
# 兼容处理 cookie 格式
if "cookie_info" in data:
self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])}
else:
self.cookies = data
self.csrf = self.cookies.get("bili_jct")
if not self.csrf:
raise ValueError("Cookie 中缺少 bili_jct (CSRF Token)")
self.session = requests.Session()
self.session.cookies.update(self.cookies)
# 使用你测试成功的 Headers
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution"
})
def get_video_info(self, bvid):
"""通过 BVID 获取 AID, CID 和 Title"""
url = "https://api.bilibili.com/x/web-interface/view"
try:
res = self.session.get(url, params={"bvid": bvid}, timeout=10).json()
if res["code"] != 0:
logger.error(f"查询视频信息失败 [{bvid}]: {res['message']}")
return None
data = res["data"]
return {
"aid": data["aid"],
"cid": data["cid"],
"title": data["title"]
}
except Exception as e:
logger.error(f"获取视频信息异常: {e}")
return None
def resolve_section_id(self, target_season_id):
"""
【关键逻辑】通过 Season ID (合集ID) 查找 Section ID (小节ID)
"""
url = "https://member.bilibili.com/x2/creative/web/seasons"
params = {"pn": 1, "ps": 50} # 获取前50个合集
try:
res = self.session.get(url, params=params, timeout=10).json()
if res.get("code") != 0:
logger.error(f"获取合集列表失败: {res.get('message')}")
return None
seasons = res.get("data", {}).get("seasons", [])
for s in seasons:
current_sid = s.get("season", {}).get("id")
# 找到目标合集
if current_sid == target_season_id:
title = s.get("season", {}).get("title", "未知标题")
sections = s.get("sections", {}).get("sections", [])
if sections:
# 默认取第一个小节
first_section_id = sections[0]["id"]
logger.info(f"✅ ID解析成功: 合集[{title}]({target_season_id}) -> 小节ID: {first_section_id}")
return first_section_id
else:
logger.error(f"❌ 合集[{title}]({target_season_id}) 存在,但没有创建任何小节!")
return None
logger.error(f"❌ 未找到 Season ID 为 {target_season_id} 的合集,请检查 ID 是否正确。")
return None
except Exception as e:
logger.error(f"解析 Section ID 异常: {e}")
return None
def add_video_to_section(self, section_id, video_info):
"""正式添加视频到合集"""
url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add"
# 参数必须包含 csrf
params = {"csrf": self.csrf}
payload = {
"sectionId": section_id,
"episodes": [{
"aid": video_info["aid"],
"cid": video_info["cid"],
"title": video_info["title"],
"charging_pay": 0
}]
}
try:
res = self.session.post(url, params=params, json=payload, timeout=15).json()
if res["code"] == 0:
logger.info(f"🎉 成功添加: {video_info['title']}")
return True
else:
logger.error(f"添加失败: {res['message']} (Code: {res['code']})")
return False
except Exception as e:
logger.error(f"添加请求异常: {e}")
return False
class CollectionWorker:
def __init__(self, client, section_id_a, section_id_b):
self.client = client
self.section_id_a = section_id_a
self.section_id_b = section_id_b
self.ansi_escape = re.compile(r"\x1b\[[0-9;]*[A-Za-z]")
def fetch_recent_videos(self):
"""获取最近投稿"""
try:
cmd = [str(BILIUP_PATH), "list", "--max-pages", "2"]
res = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
videos = []
for line in self.ansi_escape.sub("", res.stdout).splitlines():
parts = line.split()
if len(parts) >= 2 and parts[0].startswith("BV"):
raw_title = " ".join(parts[1:])
title = re.sub(r"(开放浏览|直播回放|审核中|-)$", "", raw_title).strip()
videos.append({"bvid": parts[0], "title": title})
return videos
except Exception:
logger.warning("biliup list 执行失败,跳过同名视频匹配。")
return []
def normalize(self, text):
return re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', text).replace('', '').lower()
def find_bvid_by_title(self, target_title, video_list):
target_norm = self.normalize(target_title)
for v in video_list:
v_norm = self.normalize(v['title'])
if target_norm in v_norm or v_norm in target_norm:
return v['bvid']
return None
def process_folder(self, folder: Path, video_list):
flag_a = folder / "collection_a_done.flag"
flag_b = folder / "collection_b_done.flag"
# 任务 A: 同名视频 -> 合集 A
if self.section_id_a and not flag_a.exists():
matched_bvid = self.find_bvid_by_title(folder.name, video_list)
if matched_bvid:
logger.info(f"任务A (同名): 匹配到 {matched_bvid},尝试添加...")
info = self.client.get_video_info(matched_bvid)
if info and self.client.add_video_to_section(self.section_id_a, info):
flag_a.touch()
# 任务 B: Upload切片 -> 合集 B
if self.section_id_b and not flag_b.exists():
bvid_file = folder / "bvid.txt"
if bvid_file.exists():
bvid = bvid_file.read_text(encoding='utf-8').strip()
logger.info(f"任务B (切片): 读取到 {bvid},尝试添加...")
info = self.client.get_video_info(bvid)
if info and self.client.add_video_to_section(self.section_id_b, info):
flag_b.touch()
def main():
logger.info("启动合集管理模块 (基于成功测试版)...")
try:
client = BiliCollectionClient()
except Exception as e:
logger.error(f"客户端初始化失败: {e}")
return
# 1. 解析 ID (这是最关键的一步)
logger.info("正在解析合集 ID...")
real_section_a = None
real_section_b = None
if SEASON_ID_A > 0:
real_section_a = client.resolve_section_id(SEASON_ID_A)
if SEASON_ID_B > 0:
real_section_b = client.resolve_section_id(SEASON_ID_B)
if not real_section_a and not real_section_b:
logger.error("没有解析到任何有效的 Section ID脚本停止。")
return
# 2. 初始化 Worker
worker = CollectionWorker(client, real_section_a, real_section_b)
# 3. 扫描逻辑
logger.info("开始扫描目录...")
recent_videos = worker.fetch_recent_videos()
if SESSION_DIR.exists():
for folder in SESSION_DIR.iterdir():
if folder.is_dir():
worker.process_folder(folder, recent_videos)
logger.info("扫描完成。")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,249 @@
import os
import time
import subprocess
import json
import shutil
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, get_ai_logger, log_exception
# ==========================================
# 接口配置 (Interface Configuration)
# ==========================================
SESSION_DIR = r'./session' # 监控的工作区目录
CHECK_INTERVAL = 2 # 轮询频率
CODEX_CMD = "codex" # 如果报错,可以尝试改为 "codex.cmd"
DONE_FLAG = "transcribe_done.flag" # 监听这个标记
# 初始化日志
logger = get_system_logger('monitorSrt')
# ==========================================
# 定义输出数据的 JSON Schema
SONG_SCHEMA = {
"type": "object",
"properties": {
"songs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start": {"type": "string"},
"end": {"type": "string"},
"title": {"type": "string"},
"artist": {"type": "string"},
"confidence": {"type": "number"},
"evidence": {"type": "string"}
},
"required": ["start", "end", "title", "artist", "confidence", "evidence"],
"additionalProperties": False
}
}
},
"required": ["songs"],
"additionalProperties": False
}
TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。
任务:
1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。
2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。
3. 同一首歌间隔 ≤30s 合并,>30s 分开。
4. 忽略纯聊天片段。
5. 无法确认的歌曲用 UNKNOWN 标注并在 evidence 说明。
最后请严格按照 Schema 生成 JSON 数据。"""
# ==========================================
class SrtHandler(FileSystemEventHandler):
def on_created(self, event):
# 修改:不再看 .srt改为看 .flag
if not event.is_directory and event.src_path.endswith(DONE_FLAG):
logger.debug(f"检测到转录完成标记: {event.src_path}")
self.process_with_codex(Path(event.src_path))
# if not event.is_directory and event.src_path.lower().endswith('.srt'):
# self.process_with_codex(Path(event.src_path))
def on_moved(self, event):
# 针对有些程序是先生成临时文件再重命名的情况
if not event.is_directory and event.dest_path.lower().endswith('.srt'):
logger.debug(f"检测到字幕文件移动: {event.dest_path}")
self.process_with_codex(Path(event.dest_path))
def process_with_codex(self, srt_path):
work_dir = srt_path.parent
# 避免对同一目录重复调用
if (work_dir / "songs.json").exists():
logger.info(f"songs.json 已存在,跳过: {work_dir.name}")
return
logger.info(f"发现新字幕,准备识别歌曲: {work_dir.name}")
# 创建AI日志
ai_log, ai_log_file = get_ai_logger('codex', 'songs')
ai_log.info("="*50)
ai_log.info("Codex 歌曲识别任务开始")
ai_log.info(f"工作目录: {work_dir}")
ai_log.info("="*50)
logger.debug("准备 Schema 文件...")
ai_log.info("生成 JSON Schema")
# 在当前目录下生成临时 Schema 文件供 Codex 参考
schema_file = work_dir / "song_schema.json"
with open(schema_file, "w", encoding="utf-8") as f:
json.dump(SONG_SCHEMA, f, ensure_ascii=False, indent=2)
ai_log.info(f"Schema 文件: {schema_file.name}")
logger.info("调用 Codex (Non-interactive mode)...")
ai_log.info("开始 Codex 执行")
ai_log.info(f"命令: {CODEX_CMD} exec")
ai_log.info(f"任务提示: {TASK_PROMPT[:100]}...")
# 构建命令行参数
# 注意Windows 下为了防止 shell 解析错误,提示词尽量保持在一行
cmd = [
CODEX_CMD, "exec",
TASK_PROMPT.replace('\n', ' '),
"--full-auto",
"--sandbox", "workspace-write",
"--output-schema", "./song_schema.json",
"-o", "songs.json",
"--skip-git-repo-check",
"--json" # 启用 JSON 输出以获取详细日志
]
ai_log.info(f"完整命令: {subprocess.list2cmdline(cmd)}")
try:
# 使用 shell=True 解决 Windows 下找不到 .cmd 脚本的问题
# 使用 subprocess.list2cmdline 将列表安全转为字符串
# process_cmd = subprocess.list2cmdline(cmd)
# start_time = time.time()
# result = subprocess.run(
# process_cmd,
# cwd=str(work_dir),
# shell=False,
# capture_output=True,
# text=True,
# encoding='utf-8'
# )
# 2. 修改调用逻辑(去掉 list2cmdline
try:
start_time = time.time()
result = subprocess.run(
cmd, # 直接传列表,不要传字符串
cwd=str(work_dir),
shell=False, # 在 Linux 上,传列表时 shell 必须为 False 或不设置
capture_output=True,
text=True,
encoding='utf-8'
)
elapsed = time.time() - start_time
ai_log.info(f"Codex 执行完成,耗时: {elapsed:.2f}")
ai_log.info(f"返回码: {result.returncode}")
# 解析并记录 Codex 的 JSON 输出
if result.stdout:
ai_log.info("=== Codex 执行日志 ===")
for line in result.stdout.strip().split('\n'):
if line.strip():
try:
# 尝试解析 JSONL 格式的事件
event = json.loads(line)
event_type = event.get('type', 'unknown')
# 根据事件类型记录不同级别的日志
if event_type == 'error':
ai_log.error(f"Codex Error: {json.dumps(event, ensure_ascii=False)}")
elif event_type in ['tool_use', 'command_execution', 'file_operation']:
ai_log.info(f"Codex Action: {json.dumps(event, ensure_ascii=False)}")
else:
ai_log.debug(f"Codex Event: {json.dumps(event, ensure_ascii=False)}")
except json.JSONDecodeError:
# 如果不是 JSON 格式,直接记录原始行
ai_log.info(line)
if result.stderr:
ai_log.warning("=== STDERR ===")
for line in result.stderr.strip().split('\n'):
if line.strip():
ai_log.warning(line)
if result.returncode == 0:
logger.info(f"Codex 执行成功: {work_dir.name}")
ai_log.info("Codex 执行成功")
self.generate_txt_fallback(work_dir, ai_log)
else:
logger.error(f"Codex 返回错误码 {result.returncode}")
logger.error(f"错误详情: {result.stderr.strip() or result.stdout.strip()}")
ai_log.error(f"Codex 执行失败,错误码: {result.returncode}")
except Exception as e:
log_exception(logger, e, "Codex 调用异常")
log_exception(ai_log, e, "Codex 执行异常")
ai_log.info("="*50)
ai_log.info("Codex 歌曲识别任务完成")
ai_log.info("="*50)
logger.info(f"AI日志已保存: {ai_log_file}")
def generate_txt_fallback(self, work_dir, ai_log):
"""解析生成的 JSON 并同步创建 B 站评论格式的 txt"""
json_path = work_dir / "songs.json"
txt_path = work_dir / "songs.txt"
try:
if json_path.exists():
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
songs = data.get('songs', [])
logger.info(f"识别到 {len(songs)} 首歌曲")
ai_log.info(f"解析结果: {len(songs)} 首歌曲")
with open(txt_path, 'w', encoding='utf-8') as t:
for s in songs:
# 将 SRT 时间格式 (00:00:00,360) 转为 B 站格式 (00:00:00)
start_time = s['start'].split(',')[0] # 去掉毫秒部分
line = f"{start_time} {s['title']}{s['artist']}\n"
t.write(line)
ai_log.debug(f" {s['title']}{s['artist']} ({start_time})")
logger.info(f"成功生成: {txt_path.name}")
ai_log.info(f"生成 songs.txt 成功")
except Exception as e:
log_exception(logger, e, "生成 txt 失败")
log_exception(ai_log, e, "生成 songs.txt 失败")
def main():
path = Path(SESSION_DIR)
if not path.exists():
path.mkdir(parents=True)
logger.info("="*50)
logger.info("字幕监控模块启动 (Codex 歌曲识别)")
logger.info("="*50)
logger.info(f"监控目录: {SESSION_DIR}")
logger.info(f"Codex 命令: {CODEX_CMD}")
event_handler = SrtHandler()
observer = Observer()
observer.schedule(event_handler, str(path), recursive=True)
observer.start()
logger.info("文件监控已启动")
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
logger.info("接收到停止信号,正在关闭...")
observer.stop()
observer.join()
logger.info("字幕监控模块已停止")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,155 @@
import json
import time
import requests
import subprocess
import re
import shutil
import random
from pathlib import Path
# ================= 配置区域 =================
COOKIE_FILE = Path("./cookies.json")
TARGET_SEASON_ID = 7196643
# 必须包含的关键词
MUST_KEYWORDS = []
# 必须排除的关键词
EXCLUDE_KEYWORD = "纯享"
BILIUP_PATH = shutil.which("biliup") or "biliup"
# ===========================================
class BiliCollectionBatchTool:
def __init__(self):
self.load_cookies()
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution"
})
def load_cookies(self):
if not COOKIE_FILE.exists():
raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}")
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data
self.csrf = self.cookies.get("bili_jct")
def get_existing_bvids(self, season_id):
"""拉取合集内所有已存在的 BVID确保去重 100% 准确"""
print(f"📡 正在拉取合集 {season_id} 的现有视频数据...")
self.session.cookies.update(self.cookies)
try:
# 1. 获取 Section ID
list_url = "https://member.bilibili.com/x2/creative/web/seasons"
res_list = self.session.get(list_url, params={"pn": 1, "ps": 50}).json()
section_id = None
for s in res_list.get("data", {}).get("seasons", []):
if s.get("season", {}).get("id") == season_id:
sections = s.get("sections", {}).get("sections", [])
if sections: section_id = sections[0]["id"]
break
if not section_id: return None, set()
# 2. 获取该小节详细列表
detail_url = "https://member.bilibili.com/x2/creative/web/season/section"
res_detail = self.session.get(detail_url, params={"id": section_id}).json()
existing = set()
if res_detail.get("code") == 0:
for ep in res_detail.get("data", {}).get("episodes", []):
existing.add(ep.get("bvid"))
print(f"📊 查重参考:合集内已有 {len(existing)} 个视频。")
return section_id, existing
except Exception as e:
print(f"❌ 查重逻辑失败: {e}")
return None, set()
def fetch_filtered_videos(self, existing_set):
"""
核心逻辑修改:
1. 包含 王海颖, 唱歌, 录播
2. 不包含 纯享
3. 不在合集 existing_set 中
"""
print(f"🔍 扫描符合条件且不含“{EXCLUDE_KEYWORD}”的视频...")
try:
res = subprocess.run([BILIUP_PATH, "list", "--max-pages", "20"], capture_output=True, text=True, encoding='utf-8')
output = re.sub(r"\x1b\[[0-9;]*[A-Za-z]", "", res.stdout)
to_add_bvids = []
for line in output.splitlines():
if line.startswith("BV"):
parts = line.split()
bvid = parts[0]
title = " ".join(parts[1:])
# 判断逻辑
is_match = all(kw in title for kw in MUST_KEYWORDS)
is_excluded = EXCLUDE_KEYWORD in title
if is_match and not is_excluded:
if bvid in existing_set:
continue
to_add_bvids.append(bvid)
return to_add_bvids
except Exception as e:
print(f"❌ biliup 调用失败: {e}")
return []
def get_metadata(self, bv_list):
episodes = []
for bvid in bv_list:
url = "https://api.bilibili.com/x/web-interface/view"
try:
res = self.session.get(url, params={"bvid": bvid}).json()
if res["code"] == 0:
d = res["data"]
episodes.append({
"aid": d["aid"], "cid": d["cid"],
"title": d["title"], "charging_pay": 0
})
time.sleep(0.3)
except: pass
return episodes
def run(self):
# 1. 深度查重
section_id, existing_set = self.get_existing_bvids(TARGET_SEASON_ID)
if not section_id:
print("❌ 无法解析合集,任务终止。")
return
# 2. 条件过滤 + 查重剔除
target_bvids = self.fetch_filtered_videos(existing_set)
if not target_bvids:
print("✨ 扫描完毕:没有符合条件的新视频。")
return
print(f"💡 过滤后,确认有 {len(target_bvids)} 个视频待加入合集。")
# 3. 解析元数据
final_list = self.get_metadata(target_bvids)
# 4. 一次性全量提交
if final_list:
print(f"🚀 正在发送合并添加请求...")
add_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add"
res = self.session.post(add_url, params={"csrf": self.csrf}, json={
"sectionId": section_id,
"episodes": final_list
}).json()
if res["code"] == 0:
print(f"🎉 成功!已补齐 {len(final_list)} 个不含“纯享”的录播视频。")
else:
print(f"❌ 批量失败: {res['message']} (Code: {res['code']})")
else:
print("❌ 未能获取有效的视频详情。")
if __name__ == "__main__":
tool = BiliCollectionBatchTool()
tool.run()

147
archive_scripts/temp_fromA_2_B.py Executable file
View File

@ -0,0 +1,147 @@
import json
import time
import requests
import shutil
import random
from pathlib import Path
# ================= 配置区域 =================
COOKIE_FILE = Path("./cookies.json")
SOURCE_SEASON_ID = 7196643 # 源合集 (大合集)
TARGET_SEASON_ID = 7288568 # 目标合集 (短视频合集)
MAX_DURATION_SEC = 20 * 60 # 阈值20分钟 (1200秒)
# ===========================================
class BiliCollectionTransferTool:
def __init__(self):
self.load_cookies()
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution"
})
def load_cookies(self):
if not COOKIE_FILE.exists():
raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}")
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data
self.csrf = self.cookies.get("bili_jct")
def get_season_info(self, season_id):
"""获取合集的 Section ID 和 视频列表"""
self.session.cookies.update(self.cookies)
try:
# 1. 获取 Section ID
list_url = "https://member.bilibili.com/x2/creative/web/seasons"
res_list = self.session.get(list_url, params={"pn": 1, "ps": 50}).json()
section_id = None
for s in res_list.get("data", {}).get("seasons", []):
if s.get("season", {}).get("id") == season_id:
sections = s.get("sections", {}).get("sections", [])
if sections: section_id = sections[0]["id"]
break
if not section_id: return None, []
# 2. 获取该小节详细视频列表
detail_url = "https://member.bilibili.com/x2/creative/web/season/section"
res_detail = self.session.get(detail_url, params={"id": section_id}).json()
# 兼容性修复:确保返回的是列表而非 None
episodes = res_detail.get("data", {}).get("episodes", [])
if episodes is None: episodes = []
return section_id, episodes
except Exception as e:
print(f"❌ 获取合集 {season_id} 失败: {e}")
return None, []
def get_video_duration(self, bvid):
"""获取视频准确时长(秒)"""
url = "https://api.bilibili.com/x/web-interface/view"
try:
res = self.session.get(url, params={"bvid": bvid}).json()
if res["code"] == 0:
return res["data"]["duration"]
except: pass
return 999999
def run(self):
# 1. 获取合集信息
src_section_id, src_episodes = self.get_season_info(SOURCE_SEASON_ID)
dst_section_id, dst_episodes = self.get_season_info(TARGET_SEASON_ID)
if not src_section_id or not dst_section_id:
print("❌ 无法获取合集信息,请检查 ID 是否正确。")
return
# 修复 NoneType 报错:确保 dst_episodes 是列表
dst_bvids = {ep['bvid'] for ep in dst_episodes if ep and 'bvid' in ep}
print(f"📡 源合集共有 {len(src_episodes)} 个视频,开始检查时长...")
to_move = []
for idx, ep in enumerate(src_episodes):
bvid = ep['bvid']
duration = self.get_video_duration(bvid)
# 进度提示
if (idx + 1) % 10 == 0:
print(f" 已检查 {idx + 1}/{len(src_episodes)}...")
if duration < MAX_DURATION_SEC:
if bvid not in dst_bvids:
to_move.append({
"aid": ep["aid"],
"cid": ep["cid"],
"title": ep["title"],
"bvid": bvid,
"charging_pay": 0
})
time.sleep(0.4)
if not to_move:
print("✨ 未发现需要迁移的短视频。")
return
print(f"\n💡 共发现 {len(to_move)} 个短视频需要迁移。")
# 2. 分批迁移 (每 30 个一组)
batch_size = 30
for i in range(0, len(to_move), batch_size):
batch = to_move[i:i+batch_size]
batch_aids = [m["aid"] for m in batch]
print(f"🚀 正在处理第 {i//batch_size + 1} 组迁移 ({len(batch)} 个)...")
# 先加入目标合集
add_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/add"
res_add = self.session.post(add_url, params={"csrf": self.csrf}, json={
"sectionId": dst_section_id,
"episodes": batch
}).json()
if res_add["code"] == 0:
# 后从源合集移除
del_url = "https://member.bilibili.com/x2/creative/web/season/section/episodes/delete"
res_del = self.session.post(del_url, params={"csrf": self.csrf}, json={
"sectionId": src_section_id,
"aids": batch_aids
}).json()
if res_del["code"] == 0:
print(f" ✅ 成功移动 {len(batch)} 个。")
else:
print(f" ⚠️ 移除失败: {res_del.get('message')}")
else:
print(f" ❌ 加入目标合集失败: {res_add.get('message')}")
time.sleep(random.uniform(3, 6))
print("\n🎉 迁移任务执行完毕。")
if __name__ == "__main__":
tool = BiliCollectionTransferTool()
tool.run()

89
archive_scripts/temp_get-10.py Executable file
View File

@ -0,0 +1,89 @@
import json
import time
import requests
from pathlib import Path
# ================= 配置区域 =================
COOKIE_FILE = Path("./cookies.json")
TARGET_SEASON_ID = 7196643 # 要检查的合集 ID
# ===========================================
class BiliCollectionChecker:
def __init__(self):
self.load_cookies()
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution"
})
def load_cookies(self):
if not COOKIE_FILE.exists():
raise FileNotFoundError(f"找不到 Cookies 文件: {COOKIE_FILE}")
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
self.cookies = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])} if "cookie_info" in data else data
def get_video_pubdate(self, bvid):
"""反查视频详细发布时间"""
url = "https://api.bilibili.com/x/web-interface/view"
try:
res = self.session.get(url, params={"bvid": bvid}).json()
if res["code"] == 0:
return res["data"]["pubdate"]
except: pass
return 0
def check_top_10(self):
print(f"📡 正在拉取合集 {TARGET_SEASON_ID} 当前的实时排位...")
self.session.cookies.update(self.cookies)
try:
# 1. 先定位小节 ID
list_res = self.session.get("https://member.bilibili.com/x2/creative/web/seasons", params={"pn": 1, "ps": 50}).json()
section_id = None
for s in list_res.get("data", {}).get("seasons", []):
if s.get("season", {}).get("id") == TARGET_SEASON_ID:
section_id = s.get("sections", {}).get("sections", [])[0]['id']
break
if not section_id:
print("❌ 未找到合集信息")
return
# 2. 获取该小节当前的前 10 个视频
detail_url = "https://member.bilibili.com/x2/creative/web/season/section"
res_detail = self.session.get(detail_url, params={"id": section_id}).json()
if res_detail.get("code") == 0:
episodes = res_detail.get("data", {}).get("episodes", [])
top_10 = episodes[:10] # 截取前 10 个
print("\n" + "="*60)
print(f"{'排位':<4} | {'发布时间':<20} | {'BVID':<12} | {'视频标题'}")
print("-" * 60)
for idx, ep in enumerate(top_10):
bvid = ep['bvid']
# 为了验证排序字段,这里再次请求真实发布时间
pubtime = self.get_video_pubdate(bvid)
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(pubtime)) if pubtime > 0 else "未知"
print(f"#{idx+1:<3} | {time_str:<20} | {bvid:<12} | {ep['title']}")
print("="*60)
print(f"\n💡 如果看到的发布时间是从 2025 年开始递增的,说明是【正序】。")
print(f"💡 如果是从 2026 年开始递减的,说明是【逆序】。")
else:
print(f"❌ 获取详情失败: {res_detail.get('message')}")
except Exception as e:
print(f"❌ 运行异常: {e}")
if __name__ == "__main__":
checker = BiliCollectionChecker()
checker.check_top_10()

218
archive_scripts/temp_sort.py Executable file
View File

@ -0,0 +1,218 @@
import requests
import time
import json
import random
from pathlib import Path
# ================= 配置区域 =================
COOKIE_FILE = Path("./cookies.json")
TARGET_SEASON_ID = 7196643 # 目标合集 ID
ASCENDING_ORDER = True # True: 最早发布的在前面 (1, 2, 3...)
# ===========================================
def extract_cookie_from_list(cookie_list):
"""从列表结构中提取 SESSDATA 和 bili_jct"""
sessdata = ""
bili_jct = ""
for item in cookie_list:
if item.get("name") == "SESSDATA":
sessdata = item.get("value")
elif item.get("name") == "bili_jct":
bili_jct = item.get("value")
return sessdata, bili_jct
def load_cookies(file_path):
"""智能从 json 文件加载 cookies"""
if not file_path.exists():
print(f"[!] 错误: 找不到文件 {file_path}")
exit(1)
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
sessdata = ""
bili_jct = ""
if isinstance(data, list):
sessdata, bili_jct = extract_cookie_from_list(data)
elif isinstance(data, dict):
if "cookie_info" in data and "cookies" in data["cookie_info"]:
sessdata, bili_jct = extract_cookie_from_list(data["cookie_info"]["cookies"])
elif "cookies" in data and isinstance(data["cookies"], list):
sessdata, bili_jct = extract_cookie_from_list(data["cookies"])
else:
sessdata = data.get("SESSDATA", "")
bili_jct = data.get("bili_jct", "")
if not sessdata or not bili_jct:
print("[!] 错误: cookies.json 中未找到 SESSDATA 或 bili_jct")
exit(1)
return sessdata, bili_jct
except Exception as e:
print(f"[!] 解析 cookies.json 失败: {e}")
exit(1)
# 初始化 Cookie
SESSDATA, BILI_JCT = load_cookies(COOKIE_FILE)
print(f"[*] SESSDATA 读取成功: {SESSDATA[:4]}...{SESSDATA[-4:]}")
print(f"[*] bili_jct 读取成功: {BILI_JCT[:4]}...{BILI_JCT[-4:]}")
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Cookie": f"SESSDATA={SESSDATA}; bili_jct={BILI_JCT}",
"Content-Type": "application/json"
}
def get_section_id_by_season(season_id):
"""查找合集对应的小节ID"""
print(f"[*] 正在查找合集 ID {season_id} 的小节信息...")
url = "https://member.bilibili.com/x2/creative/web/seasons"
page = 1
while True:
params = {"pn": page, "ps": 30, "order": "", "sort": ""}
resp = requests.get(url, params=params, headers=HEADERS)
data = resp.json()
if data["code"] != 0:
print(f"[!] 获取合集列表失败: {data['message']}")
if data["code"] == -101:
print("[!] 提示: 账号未登录,请检查 cookies.json")
return None
seasons_list = data["data"]["seasons"]
if not seasons_list:
break
for s_obj in seasons_list:
s_info = s_obj["season"]
if s_info["id"] == season_id:
title = s_info["title"]
print(f"[*] 找到合集: 《{title}")
if "sections" in s_obj and "sections" in s_obj["sections"]:
first_section = s_obj["sections"]["sections"][0]
sec_id = first_section["id"]
sec_title = first_section["title"]
print(f"[*] 锁定小节: [{sec_title}] (Section ID: {sec_id})")
return sec_id
else:
print("[!] 该合集下没有发现小节结构。")
return None
page += 1
time.sleep(0.5)
print(f"[!] 未在您的账号中找到 Season ID: {season_id}")
return None
def get_video_publish_time(bvid):
"""
获取视频发布时间
[FIXED] 增加了 headers 参数,防止 B 站拦截请求
"""
url = "https://api.bilibili.com/x/web-interface/view"
params = {"bvid": bvid}
try:
# !!!关键修改:这里必须带上 headers
resp = requests.get(url, params=params, headers=HEADERS)
data = resp.json()
if data["code"] == 0:
return data["data"]["pubdate"], data["data"]["title"]
else:
# 打印具体错误原因
print(f"\n[!] 获取视频 {bvid} 失败: code={data['code']}, msg={data['message']}")
return 0, "Unknown"
except Exception as e:
print(f"\n[!] 请求异常: {e}")
return 0, "Unknown"
def sort_videos(section_id):
# 1. 获取小节内视频
url_get = "https://member.bilibili.com/x2/creative/web/season/section"
resp = requests.get(url_get, params={"id": section_id}, headers=HEADERS)
res_json = resp.json()
if res_json["code"] != 0:
print(f"[!] API 错误: {res_json['message']}")
return
section_info = res_json["data"]["section"]
episodes = res_json["data"]["episodes"]
if not episodes:
print("[!] 合集内无视频。")
return
total = len(episodes)
print(f"[*] 获取到 {total} 个视频,开始查询发布时间...")
video_list = []
success_count = 0
for idx, ep in enumerate(episodes):
# 随机延迟 0.2 ~ 0.5 秒,比固定延迟更安全
time.sleep(random.uniform(0.2, 0.5))
bvid = ep["bvid"]
pubdate, title = get_video_publish_time(bvid)
# 简单的进度显示
date_str = "Fail/Unknown"
if pubdate != 0:
date_str = time.strftime('%Y-%m-%d', time.localtime(pubdate))
success_count += 1
print(f" [{idx+1}/{total}] {title[:15]:<15} -> {date_str}")
video_list.append({
"id": ep["id"],
"title": ep["title"] if title == "Unknown" else title, # 优先使用 API 查到的全名
"pubdate": pubdate
})
if success_count == 0:
print("[!] 错误: 所有视频时间查询均失败,终止排序以免数据混乱。")
return
# 2. 排序
print("[*] 正在计算排序顺序...")
video_list.sort(key=lambda x: x['pubdate'], reverse=not ASCENDING_ORDER)
# 3. 提交
print("[*] 正在提交新的排序列表...")
sorts_payload = [{"id": v["id"], "sort": i+1} for i, v in enumerate(video_list)]
payload = {
"section": {
"id": section_info["id"],
"seasonId": section_info["seasonId"],
"title": section_info["title"],
"type": section_info["type"]
},
"sorts": sorts_payload
}
url_edit = f"https://member.bilibili.com/x2/creative/web/season/section/edit?csrf={BILI_JCT}"
try:
resp_submit = requests.post(url_edit, json=payload, headers=HEADERS)
result = resp_submit.json()
if result["code"] == 0:
print(f"\n[SUCCESS] 合集《{section_info['title']}》排序更新成功!")
else:
print(f"\n[FAIL] 更新失败: {result['message']}")
except Exception as e:
print(f"\n[!] 提交时发生网络错误: {e}")
if __name__ == "__main__":
print("--- Bilibili 合集自动排序工具 (v2.0 fixed) ---")
target_section_id = get_section_id_by_season(TARGET_SEASON_ID)
if target_section_id:
sort_videos(target_section_id)

351
archive_scripts/upload-1.py Normal file
View File

@ -0,0 +1,351 @@
import os
import time
import subprocess
import json
import re
import random
import shutil
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ==========================================
# 接口配置
# ==========================================
SESSION_DIR = r'./session' # 监控的工作区目录
CHECK_INTERVAL = 5 # 检查频率
BILIUP_PATH = "./biliup" # biliup 命令
CONFIG_FILE = "upload_config.json" # 配置文件路径
DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记
UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记
# 初始化日志
logger = get_system_logger('upload')
# ==========================================
class UploadConfig:
"""上传配置管理器"""
def __init__(self, config_path):
self.config_path = Path(config_path)
self.config = self.load_config()
def load_config(self):
"""加载配置文件"""
try:
if not self.config_path.exists():
logger.error(f"配置文件不存在: {self.config_path}")
return self.get_default_config()
with open(self.config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
logger.info(f"成功加载配置文件: {self.config_path}")
return config
except Exception as e:
log_exception(logger, e, "加载配置文件失败")
return self.get_default_config()
def get_default_config(self):
"""默认配置"""
logger.warning("使用默认配置")
return {
"upload_settings": {
"tid": 31,
"copyright": 2,
"source": "直播回放",
"cover": ""
},
"template": {
"title": "{streamer}_{date}",
"description": "录制剪辑\n\n{songs_list}",
"tag": "翻唱,直播切片,唱歌,音乐",
"dynamic": ""
},
"streamers": {},
"quotes": [],
"filename_patterns": {
"patterns": []
}
}
def parse_filename(self, filename):
"""从文件名解析主播名和日期"""
patterns = self.config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
if not regex:
continue
match = re.match(regex, filename)
if match:
data = match.groupdict()
date_format = pattern_config.get("date_format", "{date}")
# 格式化日期
try:
formatted_date = date_format.format(**data)
data['date'] = formatted_date
except KeyError:
pass
logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}")
return data
# 默认返回原始文件名
logger.warning(f"文件名未匹配任何模式: {filename}")
return {"streamer": filename, "date": ""}
def get_random_quote(self):
"""随机获取一句名言"""
quotes = self.config.get("quotes", [])
if not quotes:
return {"text": "", "author": ""}
return random.choice(quotes)
class UploadHandler(FileSystemEventHandler):
def __init__(self, config):
self.processing_sets = set()
self.config = config
def on_created(self, event):
# 兼容处理 watchdog 路径编码问题
src_path = event.src_path
if isinstance(src_path, bytes):
src_path = src_path.decode('utf-8')
# 监听 split_done.flag 文件的生成
if not event.is_directory and src_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记: {src_path}")
self.handle_upload(Path(src_path))
def on_moved(self, event):
dest_path = event.dest_path
if isinstance(dest_path, bytes):
dest_path = dest_path.decode('utf-8')
if not event.is_directory and dest_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记移动: {dest_path}")
self.handle_upload(Path(dest_path))
def handle_upload(self, flag_path):
work_dir = flag_path.parent
video_stem = work_dir.name
upload_done = work_dir / UPLOAD_FLAG
split_dir = work_dir / "split_video"
# 防重复检查
if upload_done.exists() or video_stem in self.processing_sets:
logger.debug(f"上传已完成或正在处理,跳过: {video_stem}")
return
logger.info("="*50)
logger.info(f"准备上传: {video_stem}")
logger.info("="*50)
self.processing_sets.add(video_stem)
try:
# 1. 解析文件名
parsed = self.config.parse_filename(video_stem)
streamer = parsed.get('streamer', video_stem)
date = parsed.get('date', '')
logger.info(f"主播: {streamer}, 日期: {date}")
# 2. 读取歌曲信息
songs_json = work_dir / "songs.json"
songs_txt = work_dir / "songs.txt"
songs = []
song_count = 0
songs_list = ""
if songs_json.exists():
try:
with open(songs_json, 'r', encoding='utf-8') as f:
data = json.load(f)
songs = data.get('songs', [])
song_count = len(songs)
logger.info(f"读取到 {song_count} 首歌曲")
except Exception as e:
log_exception(logger, e, "读取 songs.json 失败")
if songs_txt.exists():
songs_list = songs_txt.read_text(encoding='utf-8').strip()
logger.info("已读取歌单文本")
# 3. 获取随机名言
quote = self.config.get_random_quote()
daily_quote = quote.get('text', '')
quote_author = quote.get('author', '')
# 4. 构建模板变量
template_vars = {
'streamer': streamer,
'date': date,
'song_count': song_count,
'songs_list': songs_list,
'daily_quote': daily_quote,
'quote_author': quote_author
}
# 5. 渲染标题和简介
template = self.config.config.get('template', {})
title = template.get('title', '{streamer}_{date}').format(**template_vars)
description = template.get('description', '{songs_list}').format(**template_vars)
dynamic = template.get('dynamic', '').format(**template_vars)
# 6. 获取标签(优先使用主播专属标签)
streamers_config = self.config.config.get('streamers', {})
if streamer in streamers_config:
tags = streamers_config[streamer].get('tags', template.get('tag', ''))
logger.info(f"使用主播专属标签: {streamer}")
else:
tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars)
logger.info(f"标题: {title}")
logger.info(f"标签: {tags}")
logger.debug(f"简介预览: {description[:100]}...")
# 7. 获取所有切片视频
video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv'}])
if not video_files:
logger.error(f"切片目录 {split_dir} 内没找到视频")
return
logger.info(f"找到 {len(video_files)} 个视频分片")
# 8. 读取上传设置
upload_settings = self.config.config.get('upload_settings', {})
tid = upload_settings.get('tid', 31)
copyright_val = upload_settings.get('copyright', 2)
source = upload_settings.get('source', '直播回放')
cover = upload_settings.get('cover', '')
# 8. 刷新 biliup 登录信息
renew_cmd = [BILIUP_PATH, "renew"]
logger.info("尝试刷新 biliup 登录信息")
renew_result = subprocess.run(renew_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if renew_result.returncode != 0:
logger.warning(f"biliup renew 返回非 0: {renew_result.returncode}")
else:
logger.info("biliup renew 成功")
# 9. 执行分批上传
logger.info(f"启动分批投稿 (每批 5 个)...")
# 第一批:使用 upload 创建稿件
first_batch = video_files[:5]
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
# 构建初始上传命令
upload_cmd = [
BILIUP_PATH, "upload",
*first_batch,
"--title", title,
"--tid", str(tid),
"--tag", tags,
"--copyright", str(copyright_val),
"--source", source,
"--desc", description
]
if dynamic:
upload_cmd.extend(["--dynamic", dynamic])
if cover and Path(cover).exists():
upload_cmd.extend(["--cover", cover])
# 执行初始上传
logger.info(f"正在上传第一批 ({len(first_batch)} 个文件)...")
result = subprocess.run(upload_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
# 从 stdout 提取 BV 号
bv_match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout)
if not bv_match:
bv_match = re.search(r'(BV[A-Za-z0-9]+)', result.stdout)
if bv_match:
bvid = bv_match.group(1)
logger.info(f"第一批投稿成功,获得 BV 号: {bvid}")
# 追加后续批次
for idx, batch in enumerate(remaining_batches, 2):
logger.info(f"正在追加第 {idx} 批 ({len(batch)} 个文件) 到 {bvid}...")
time.sleep(15) # 适当等待
append_cmd = [
BILIUP_PATH, "append",
"--vid", bvid,
*batch
]
append_res = subprocess.run(append_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if append_res.returncode != 0:
logger.error(f"{idx} 批追加失败: {append_res.stderr[:200]}")
logger.info(f"所有批次处理完成: {video_stem}")
upload_done.touch()
# 上传成功后清理空间
try:
if split_dir.exists():
shutil.rmtree(split_dir)
logger.info(f"已删除切片目录: {split_dir}")
for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']:
original_video = work_dir / f"{video_stem}{ext}"
if original_video.exists():
original_video.unlink()
logger.info(f"已删除原视频: {original_video}")
except Exception as cleanup_err:
logger.error(f"清理空间失败: {cleanup_err}")
else:
logger.error("第一批上传成功但未能在输出中识别到 BV 号,无法追加后续分片")
else:
logger.error(f"第一批投稿失败,错误码: {result.returncode}")
logger.error(f"错误信息: {result.stderr[:500]}")
except Exception as e:
log_exception(logger, e, "上传处理异常")
finally:
self.processing_sets.discard(video_stem)
logger.info("="*50)
def main():
path = Path(SESSION_DIR)
path.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info("上传模块启动 (Biliup 自动分批投稿)")
logger.info("="*50)
# 加载配置
config = UploadConfig(CONFIG_FILE)
event_handler = UploadHandler(config)
observer = Observer()
observer.schedule(event_handler, str(path), recursive=True)
# 启动时扫描已有目录
logger.info("扫描待上传任务...")
scan_count = 0
for sub_dir in path.iterdir():
if sub_dir.is_dir():
split_flag = sub_dir / DONE_FLAG
upload_flag = sub_dir / UPLOAD_FLAG
if split_flag.exists() and not upload_flag.exists():
logger.info(f"发现待上传任务: {sub_dir.name}")
event_handler.handle_upload(split_flag)
scan_count += 1
logger.info(f"扫描完成,处理 {scan_count} 个待上传任务")
observer.start()
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
observer.stop()
observer.join()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,325 @@
import os
import time
import subprocess
import json
import re
import random
import shutil
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ==========================================
# 接口配置
# ==========================================
SESSION_DIR = r'./session' # 监控的工作区目录
CHECK_INTERVAL = 5 # 检查频率
BILIUP_PATH = "./biliup" # biliup 命令
CONFIG_FILE = "upload_config.json" # 配置文件路径
DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记
UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记
# 初始化日志
logger = get_system_logger('upload')
# ==========================================
class UploadConfig:
"""上传配置管理器"""
def __init__(self, config_path):
self.config_path = Path(config_path)
self.config = self.load_config()
def load_config(self):
"""加载配置文件"""
try:
if not self.config_path.exists():
logger.error(f"配置文件不存在: {self.config_path}")
return self.get_default_config()
with open(self.config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
logger.info(f"成功加载配置文件: {self.config_path}")
return config
except Exception as e:
log_exception(logger, e, "加载配置文件失败")
return self.get_default_config()
def get_default_config(self):
"""默认配置"""
logger.warning("使用默认配置")
return {
"upload_settings": {
"tid": 31,
"copyright": 2,
"source": "直播回放",
"cover": ""
},
"template": {
"title": "{streamer}_{date}",
"description": "自动录制剪辑\n\n{songs_list}",
"tag": "翻唱,直播切片,唱歌,音乐",
"dynamic": ""
},
"streamers": {},
"quotes": [],
"filename_patterns": {
"patterns": []
}
}
def parse_filename(self, filename):
"""从文件名解析主播名和日期"""
patterns = self.config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
if not regex:
continue
match = re.match(regex, filename)
if match:
data = match.groupdict()
date_format = pattern_config.get("date_format", "{date}")
# 格式化日期
try:
formatted_date = date_format.format(**data)
data['date'] = formatted_date
except KeyError:
pass
logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}")
return data
# 默认返回原始文件名
logger.warning(f"文件名未匹配任何模式: {filename}")
return {"streamer": filename, "date": ""}
def get_random_quote(self):
"""随机获取一句名言"""
quotes = self.config.get("quotes", [])
if not quotes:
return {"text": "", "author": ""}
return random.choice(quotes)
class UploadHandler(FileSystemEventHandler):
def __init__(self, config):
self.processing_sets = set()
self.config = config
def on_created(self, event):
# 监听 split_done.flag 文件的生成
if not event.is_directory and event.src_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记: {event.src_path}")
self.handle_upload(Path(event.src_path))
def on_moved(self, event):
if not event.is_directory and event.dest_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记移动: {event.dest_path}")
self.handle_upload(Path(event.dest_path))
def handle_upload(self, flag_path):
work_dir = flag_path.parent
video_stem = work_dir.name
upload_done = work_dir / UPLOAD_FLAG
split_dir = work_dir / "split_video"
# 防重复检查
if upload_done.exists() or video_stem in self.processing_sets:
logger.debug(f"上传已完成或正在处理,跳过: {video_stem}")
return
logger.info("="*50)
logger.info(f"准备上传: {video_stem}")
logger.info("="*50)
self.processing_sets.add(video_stem)
try:
# 1. 解析文件名
parsed = self.config.parse_filename(video_stem)
streamer = parsed.get('streamer', video_stem)
date = parsed.get('date', '')
logger.info(f"主播: {streamer}, 日期: {date}")
# 2. 读取歌曲信息
songs_json = work_dir / "songs.json"
songs_txt = work_dir / "songs.txt"
songs = []
song_count = 0
songs_list = ""
if songs_json.exists():
try:
with open(songs_json, 'r', encoding='utf-8') as f:
data = json.load(f)
songs = data.get('songs', [])
song_count = len(songs)
logger.info(f"读取到 {song_count} 首歌曲")
except Exception as e:
log_exception(logger, e, "读取 songs.json 失败")
if songs_txt.exists():
songs_list = songs_txt.read_text(encoding='utf-8').strip()
logger.info("已读取歌单文本")
# 3. 获取随机名言
quote = self.config.get_random_quote()
daily_quote = quote.get('text', '')
quote_author = quote.get('author', '')
# 4. 构建模板变量
template_vars = {
'streamer': streamer,
'date': date,
'song_count': song_count,
'songs_list': songs_list,
'daily_quote': daily_quote,
'quote_author': quote_author
}
# 5. 渲染标题和简介
template = self.config.config.get('template', {})
title = template.get('title', '{streamer}_{date}').format(**template_vars)
description = template.get('description', '{songs_list}').format(**template_vars)
dynamic = template.get('dynamic', '').format(**template_vars)
# 6. 获取标签(优先使用主播专属标签)
streamers_config = self.config.config.get('streamers', {})
if streamer in streamers_config:
tags = streamers_config[streamer].get('tags', template.get('tag', ''))
logger.info(f"使用主播专属标签: {streamer}")
else:
tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars)
logger.info(f"标题: {title}")
logger.info(f"标签: {tags}")
logger.debug(f"简介预览: {description[:100]}...")
# 7. 获取所有切片视频
video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv'}])
if not video_files:
logger.error(f"切片目录 {split_dir} 内没找到视频")
return
logger.info(f"找到 {len(video_files)} 个视频分片")
# 8. 读取上传设置
upload_settings = self.config.config.get('upload_settings', {})
tid = upload_settings.get('tid', 31)
copyright_val = upload_settings.get('copyright', 2)
source = upload_settings.get('source', '直播回放')
cover = upload_settings.get('cover', '')
# 8. 刷新 biliup 登录信息
renew_cmd = [BILIUP_PATH, "renew"]
logger.info("尝试刷新 biliup 登录信息")
renew_result = subprocess.run(renew_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if renew_result.returncode != 0:
logger.warning(f"biliup renew 返回非 0: {renew_result.returncode}")
logger.debug(f"renew stderr: {renew_result.stderr.strip()}")
else:
logger.info("biliup renew 成功")
# 9. 执行上传
logger.info(f"启动 biliup 投稿...")
cmd = [
BILIUP_PATH, "upload",
*video_files,
"--title", title,
"--tid", str(tid),
"--tag", tags,
"--copyright", str(copyright_val),
"--source", source,
"--desc", description
]
if dynamic:
cmd.extend(["--dynamic", dynamic])
if cover and Path(cover).exists():
cmd.extend(["--cover", cover])
logger.debug(f"biliup 命令: {' '.join(cmd[:5])}... (共 {len(video_files)} 个文件)")
# shell=True 确保在 Windows 下调用正常
result = subprocess.run(cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
logger.info(f"投稿成功: {video_stem}")
logger.info(f"标题: {title}")
upload_done.touch() # 盖上"上传完成"戳
logger.info("生成上传完成标记")
# 上传成功后清理空间
try:
# 1. 删除 split_video 目录
if split_dir.exists():
shutil.rmtree(split_dir)
logger.info(f"已删除切片目录: {split_dir}")
# 2. 删除原视频文件 (匹配常见视频后缀)
for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']:
original_video = work_dir / f"{video_stem}{ext}"
if original_video.exists():
original_video.unlink()
logger.info(f"已删除原视频: {original_video}")
except Exception as cleanup_err:
logger.error(f"清理空间失败: {cleanup_err}")
else:
logger.error(f"投稿失败,错误码: {result.returncode}")
logger.error(f"错误信息: {result.stderr[:500]}")
except Exception as e:
log_exception(logger, e, "上传处理异常")
finally:
self.processing_sets.discard(video_stem)
logger.info("="*50)
def main():
path = Path(SESSION_DIR)
path.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info("上传模块启动 (Biliup 自动投稿)")
logger.info("="*50)
logger.info(f"监控目录: {SESSION_DIR}")
logger.info(f"Biliup 路径: {BILIUP_PATH}")
logger.info(f"配置文件: {CONFIG_FILE}")
# 加载配置
config = UploadConfig(CONFIG_FILE)
event_handler = UploadHandler(config)
observer = Observer()
observer.schedule(event_handler, str(path), recursive=True)
# 启动时扫描已有目录:如果有 split_done.flag 但没 upload_done.flag补投
logger.info("扫描待上传任务...")
scan_count = 0
for sub_dir in path.iterdir():
if sub_dir.is_dir():
split_flag = sub_dir / DONE_FLAG
upload_flag = sub_dir / UPLOAD_FLAG
if split_flag.exists() and not upload_flag.exists():
logger.info(f"发现待上传任务: {sub_dir.name}")
event_handler.handle_upload(split_flag)
scan_count += 1
logger.info(f"扫描完成,处理 {scan_count} 个待上传任务")
observer.start()
logger.info("文件监控已启动")
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
logger.info("接收到停止信号,正在关闭...")
observer.stop()
observer.join()
logger.info("上传模块已停止")
if __name__ == "__main__":
main()

BIN
biliup Executable file

Binary file not shown.

3
filelist.txt Normal file
View File

@ -0,0 +1,3 @@
file '02月21日 22时06分 王海颖唱歌录播.flv'
file '02月21日 23时38分 王海颖唱歌录播.flv'
file '02月21日 23时47分 王海颖唱歌录播.flv'

127
logger.py Normal file
View File

@ -0,0 +1,127 @@
import logging
import os
from pathlib import Path
from logging.handlers import RotatingFileHandler
from datetime import datetime
# ==========================================
# 日志系统配置
# ==========================================
LOG_BASE_DIR = "./logs"
SYSTEM_LOG_DIR = os.path.join(LOG_BASE_DIR, "system")
AI_GROQ_LOG_DIR = os.path.join(LOG_BASE_DIR, "ai", "groq")
AI_CODEX_LOG_DIR = os.path.join(LOG_BASE_DIR, "ai", "codex")
# 日志格式
LOG_FORMAT = "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s"
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
# 日志轮转配置
MAX_BYTES = 10 * 1024 * 1024 # 10MB
BACKUP_COUNT = 5
# ==========================================
def setup_directories():
"""创建所有必要的日志目录"""
for directory in [SYSTEM_LOG_DIR, AI_GROQ_LOG_DIR, AI_CODEX_LOG_DIR]:
Path(directory).mkdir(parents=True, exist_ok=True)
def get_system_logger(module_name):
"""
获取系统日志记录器
:param module_name: 模块名称 (如 'monitor', 'upload')
:return: logger对象
"""
setup_directories()
logger = logging.getLogger(f"system.{module_name}")
logger.setLevel(logging.DEBUG)
# 避免重复添加handler
if logger.handlers:
return logger
# 文件handler
log_file = os.path.join(SYSTEM_LOG_DIR, f"{module_name}.log")
file_handler = RotatingFileHandler(
log_file,
maxBytes=MAX_BYTES,
backupCount=BACKUP_COUNT,
encoding='utf-8'
)
file_handler.setLevel(logging.DEBUG)
# 控制台handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# 设置格式
formatter = logging.Formatter(LOG_FORMAT, DATE_FORMAT)
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
def get_ai_logger(ai_type, task_name=""):
"""
获取AI调用的独立日志记录器
:param ai_type: AI类型 ('groq''codex')
:param task_name: 任务名称 (如 'transcribe', 'songs')
:return: logger对象和日志文件路径
"""
setup_directories()
# 生成唯一的日志文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
if task_name:
log_filename = f"{task_name}_{timestamp}.log"
else:
log_filename = f"{timestamp}.log"
# 根据AI类型选择目录
if ai_type == "groq":
log_dir = AI_GROQ_LOG_DIR
elif ai_type == "codex":
log_dir = AI_CODEX_LOG_DIR
else:
raise ValueError(f"未知的AI类型: {ai_type}")
log_file = os.path.join(log_dir, log_filename)
# 创建独立的logger
logger_name = f"ai.{ai_type}.{timestamp}"
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
# 清除已有的handlers
logger.handlers.clear()
# 只使用文件handler
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(LOG_FORMAT, DATE_FORMAT)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 防止日志传播到父logger
logger.propagate = False
return logger, log_file
def log_exception(logger, exception, context=""):
"""
记录异常信息的辅助函数
:param logger: logger对象
:param exception: 异常对象
:param context: 上下文信息
"""
if context:
logger.error(f"{context}: {type(exception).__name__}: {str(exception)}", exc_info=True)
else:
logger.error(f"{type(exception).__name__}: {str(exception)}", exc_info=True)

126
monitor.py Normal file
View File

@ -0,0 +1,126 @@
import os
import shutil
import subprocess
import time
import sys
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ==========================================
# 接口配置
# ==========================================
STAGE_DIR = r'./stage'
BACKUP_DIR = r'./backup'
SESSION_DIR = r'./session'
MIN_DURATION_SECONDS = 15 * 60
VIDEO_EXTS = {'.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv'}
# 初始化日志
logger = get_system_logger('monitor')
# ==========================================
def get_video_duration(file_path):
try:
cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', str(file_path)]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
duration = float(result.stdout)
logger.debug(f"获取视频时长: {file_path.name} = {duration}")
return duration
except Exception as e:
log_exception(logger, e, f"获取视频时长失败: {file_path}")
return 0
class VideoHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory:
# 兼容处理 watchdog 路径编码问题
src_path = event.src_path
if isinstance(src_path, bytes):
src_path = src_path.decode('utf-8')
logger.debug(f"检测到文件创建事件: {src_path}")
self.handle_file(Path(src_path))
def handle_file(self, file_path):
if file_path.suffix.lower() not in VIDEO_EXTS:
logger.debug(f"跳过非视频文件: {file_path.name}")
return
logger.info(f"发现新视频文件: {file_path.name},正在检查写入状态...")
# 改进:通过检查文件大小变化来判断是否写入完成
last_size = -1
while True:
try:
if not file_path.exists():
logger.warning(f"文件在检查期间消失: {file_path}")
return
current_size = file_path.stat().st_size
if current_size == last_size and current_size > 0:
break
last_size = current_size
time.sleep(5) # 每5秒检查一次大小
except Exception as e:
logger.error(f"检查文件状态异常: {e}")
break
try:
duration = get_video_duration(file_path)
logger.info(f"视频时长: {file_path.name} = {duration/60:.1f} 分钟")
if duration < MIN_DURATION_SECONDS:
logger.info(f"时长不足 {MIN_DURATION_SECONDS/60:.0f} 分钟,移动到备份区")
dst = Path(BACKUP_DIR) / file_path.name
shutil.move(str(file_path), str(dst))
logger.info(f"已移动至备份: {dst}")
else:
# 核心联动:创建专属工作区
session_folder = Path(SESSION_DIR) / file_path.stem
session_folder.mkdir(parents=True, exist_ok=True)
logger.info(f"创建工作区: {session_folder}")
logger.info(f"派发转录任务: {file_path.name}")
# 改进:使用 sys.executable 保证环境一致性
process = subprocess.Popen([
sys.executable, 'video2srt.py',
str(file_path),
str(session_folder)
])
logger.info(f"转录进程已启动 (PID: {process.pid})")
except Exception as e:
log_exception(logger, e, "监控处理异常")
if __name__ == "__main__":
logger.info("="*50)
logger.info("视频监控模块启动")
logger.info("="*50)
for d in [STAGE_DIR, BACKUP_DIR, SESSION_DIR]:
Path(d).mkdir(parents=True, exist_ok=True)
logger.info(f"监控目录: {STAGE_DIR}")
logger.info(f"备份目录: {BACKUP_DIR}")
logger.info(f"工作目录: {SESSION_DIR}")
handler = VideoHandler()
# 启动时扫描已有文件
logger.info("正在扫描 stage 目录下的存量视频...")
for f in Path(STAGE_DIR).iterdir():
if f.is_file():
handler.handle_file(f)
observer = Observer()
observer.schedule(handler, STAGE_DIR, recursive=False)
observer.start()
logger.info("文件监控已启动")
try:
while True: time.sleep(1)
except KeyboardInterrupt:
logger.info("接收到停止信号,正在关闭...")
observer.stop()
observer.join()
logger.info("视频监控模块已停止")

161
monitorSongs.py Normal file
View File

@ -0,0 +1,161 @@
import os
import time
import json
import subprocess
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ==========================================
# 接口配置
# ==========================================
SESSION_DIR = r'./session'
CHECK_INTERVAL = 2
OUTPUT_SUBDIR = "split_video"
DONE_FLAG = "split_done.flag"
# 初始化日志
logger = get_system_logger('monitorSongs')
# ==========================================
class SongsJsonHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory and event.src_path.lower().endswith('songs.json'):
logger.debug(f"检测到歌曲列表创建: {event.src_path}")
self.process_video_splitting(Path(event.src_path))
def on_moved(self, event):
if not event.is_directory and event.dest_path.lower().endswith('songs.json'):
logger.debug(f"检测到歌曲列表移动: {event.dest_path}")
self.process_video_splitting(Path(event.dest_path))
def process_video_splitting(self, json_path):
work_dir = json_path.parent
split_dir = work_dir / OUTPUT_SUBDIR
flag_file = work_dir / DONE_FLAG
# 1. 检查标记位
if flag_file.exists():
logger.debug(f"切割已完成,跳过: {work_dir.name}")
return
logger.info("="*50)
logger.info(f"检测到新歌曲列表: {work_dir.name}")
logger.info("="*50)
# 2. 读取并修正 JSON 时间格式
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
songs = data.get('songs', [])
logger.info(f"读取到 {len(songs)} 首歌曲")
except Exception as e:
log_exception(logger, e, f"读取 JSON 失败: {json_path}")
return
# 3. 定位源视频
source_video = None
video_exts = {'.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv'}
for f in work_dir.iterdir():
if f.suffix.lower() in video_exts:
source_video = f
break
if not source_video:
logger.error(f"工作区内未找到源视频文件,跳过")
return
logger.info(f"源视频: {source_video.name}")
split_dir.mkdir(parents=True, exist_ok=True)
# 4. 循环切割
logger.info(f"开始切割视频片段 (Stream Copy)...")
success_count = 0
fail_count = 0
for idx, song in enumerate(songs, 1):
# --- 关键修正:将时间戳中的逗号替换为点号 ---
raw_start = song.get('start', '00:00:00.000')
raw_end = song.get('end', '00:00:00.000')
start = raw_start.replace(',', '.')
end = raw_end.replace(',', '.')
title = song.get('title', 'UNKNOWN').replace('/', '_').replace('\\', '_')
artist = song.get('artist', 'UNKNOWN')
output_filename = f"{idx:02d}_{title}{source_video.suffix}"
output_path = split_dir / output_filename
if output_path.exists():
logger.info(f"[{idx}] 已存在,跳过: {title}")
continue
# 构建高效率切割命令
# 注意:-ss 在 -i 前面是为了快速定位且避免不必要的解码
cmd = [
'ffmpeg', '-y',
'-ss', start,
'-to', end,
'-i', str(source_video),
'-c', 'copy',
'-map_metadata', '0',
str(output_path)
]
try:
# 使用 subprocess.run 配合 capture_output 捕获详细错误
res = subprocess.run(cmd, capture_output=True, check=True)
logger.info(f"[{idx}] ✓ {title} - {artist}")
success_count += 1
except subprocess.CalledProcessError as e:
logger.error(f"[{idx}] ✗ {title} 切割失败")
logger.error(f"FFmpeg 错误: {e.stderr.decode('utf-8', errors='ignore')[:200]}")
fail_count += 1
# 5. 生成完成标记
flag_file.touch()
logger.info("="*50)
logger.info(f"切割任务完成: 成功 {success_count} / 失败 {fail_count}")
logger.info(f"输出目录: {split_dir}")
logger.info("="*50)
def main():
path = Path(SESSION_DIR)
path.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info("视频切割模块启动")
logger.info("="*50)
logger.info(f"监控目录: {SESSION_DIR}")
event_handler = SongsJsonHandler()
observer = Observer()
observer.schedule(event_handler, str(path), recursive=True)
# 启动扫描已存在的 songs.json
logger.info("扫描现有歌曲列表...")
scan_count = 0
for sub_dir in path.iterdir():
if sub_dir.is_dir():
json_file = sub_dir / "songs.json"
if json_file.exists():
logger.info(f"发现已存在的歌曲列表: {sub_dir.name}")
event_handler.process_video_splitting(json_file)
scan_count += 1
logger.info(f"扫描完成,处理 {scan_count} 个歌曲列表")
observer.start()
logger.info("文件监控已启动")
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
logger.info("接收到停止信号,正在关闭...")
observer.stop()
observer.join()
logger.info("视频切割模块已停止")
if __name__ == "__main__":
main()

214
monitorSrt.py Normal file
View File

@ -0,0 +1,214 @@
import os
import time
import subprocess
import json
import shutil
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, get_ai_logger, log_exception
# ==========================================
# 接口配置 (Interface Configuration)
# ==========================================
SESSION_DIR = r'./session' # 监控的工作区目录
CHECK_INTERVAL = 2 # 轮询频率
CODEX_CMD = "/home/theshy/.nvm/versions/node/v22.13.0/bin/codex" # Linux 下通常直接用 codex
DONE_FLAG = "transcribe_done.flag" # 监听这个标记
# 初始化日志
logger = get_system_logger('monitorSrt')
# ==========================================
# 定义输出数据的 JSON Schema
SONG_SCHEMA = {
"type": "object",
"properties": {
"songs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start": {"type": "string"},
"end": {"type": "string"},
"title": {"type": "string"},
"artist": {"type": "string"},
"confidence": {"type": "number"},
"evidence": {"type": "string"}
},
"required": ["start", "end", "title", "artist", "confidence", "evidence"],
"additionalProperties": False
}
}
},
"required": ["songs"],
"additionalProperties": False
}
TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。
任务:
1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。
2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。歌曲开始时间规则:
- 歌曲开始时间应使用“上一句字幕的结束时间”作为 start_time。
- 这样可以尽量保留歌曲可能存在的前奏。
3. 同一首歌间隔 ≤160s 合并,>160s 分开。若连续识别出相同歌曲,且中间只有短暂对白、空白、转场或无歌词段,应合并为同一首歌.
4. 忽略纯聊天片段。
5. 无法确认的歌曲丢弃,宁缺毋滥:你的输出将直接面向最终用户。
6. 忽略短片段:如果一段演唱持续时间总和少于 15 秒,视为随口哼唱,请直接忽略,不计入列表。
7. 仔细分析每一句歌词,识别出相关歌曲后, 使用该歌曲歌词上下文对比字幕上下文,确定歌曲起始与停止时间
8.歌曲标注规则:
- 可以在歌曲名称后使用括号 () 添加补充说明。
- 常见标注示例:
- (片段):歌曲演唱时间较短,例如 < 60 秒
- (清唱):无伴奏演唱
- (副歌):只演唱副歌部分
- 标注应简洁,仅在确有必要时使用。
9. 通过歌曲起始和结束时间自检, 一般歌曲长度在5分钟以内, 1分钟以上, 可疑片段重新联网搜索检查.
最后请严格按照 Schema 生成 JSON 数据。"""
# ==========================================
class SrtHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory:
src_path = event.src_path
if isinstance(src_path, bytes):
src_path = src_path.decode('utf-8')
if src_path.endswith(DONE_FLAG):
logger.debug(f"检测到转录完成标记: {src_path}")
self.process_with_codex(Path(src_path))
def on_moved(self, event):
dest_path = event.dest_path
if isinstance(dest_path, bytes):
dest_path = dest_path.decode('utf-8')
if not event.is_directory and dest_path.lower().endswith('.srt'):
logger.debug(f"检测到字幕文件移动: {dest_path}")
self.process_with_codex(Path(dest_path))
def process_with_codex(self, srt_path):
work_dir = srt_path.parent
# 避免对同一目录重复调用
if (work_dir / "songs.json").exists():
logger.info(f"songs.json 已存在,跳过: {work_dir.name}")
return
logger.info(f"发现新任务,准备识别歌曲: {work_dir.name}")
# 创建AI日志
ai_log, ai_log_file = get_ai_logger('codex', 'songs')
ai_log.info("="*50)
ai_log.info("Codex 歌曲识别任务开始")
ai_log.info(f"工作目录: {work_dir}")
ai_log.info("="*50)
# 生成临时 Schema 文件
schema_file = work_dir / "song_schema.json"
with open(schema_file, "w", encoding="utf-8") as f:
json.dump(SONG_SCHEMA, f, ensure_ascii=False, indent=2)
# 构建命令行参数 (Linux 下必须使用列表形式)
cmd = [
CODEX_CMD, "exec",
TASK_PROMPT.replace('\n', ' '),
"--full-auto",
"--sandbox", "workspace-write",
"--output-schema", "./song_schema.json",
"-o", "songs.json",
"--skip-git-repo-check",
"--json"
]
logger.info("调用 Codex...")
ai_log.info(f"执行命令: {subprocess.list2cmdline(cmd)}")
try:
start_time = time.time()
# 关键修改shell=False + 直接传列表,解决 "File name too long" 错误
result = subprocess.run(
cmd,
cwd=str(work_dir),
shell=False,
capture_output=True,
text=True,
encoding='utf-8'
)
elapsed = time.time() - start_time
ai_log.info(f"Codex 执行完成,耗时: {elapsed:.2f}")
# 记录输出
if result.stdout:
ai_log.info("=== STDOUT ===")
ai_log.info(result.stdout)
if result.stderr:
ai_log.warning("=== STDERR ===")
ai_log.warning(result.stderr)
if result.returncode == 0:
logger.info(f"Codex 执行成功: {work_dir.name}")
self.generate_txt_fallback(work_dir, ai_log)
else:
logger.error(f"Codex 失败,返回码: {result.returncode}")
ai_log.error(f"Codex 失败,返回码: {result.returncode}")
except Exception as e:
log_exception(logger, e, "Codex 调用异常")
log_exception(ai_log, e, "Codex 执行异常")
ai_log.info("="*50)
ai_log.info("Codex 歌曲识别任务完成")
ai_log.info("="*50)
def generate_txt_fallback(self, work_dir, ai_log):
json_path = work_dir / "songs.json"
txt_path = work_dir / "songs.txt"
try:
if json_path.exists():
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
songs = data.get('songs', [])
with open(txt_path, 'w', encoding='utf-8') as t:
for s in songs:
start_time = s['start'].split(',')[0].split('.')[0] # 兼容点号和逗号
line = f"{start_time} {s['title']}{s['artist']}\n"
t.write(line)
logger.info(f"成功生成: {txt_path.name}")
except Exception as e:
log_exception(logger, e, "生成 txt 失败")
def main():
path = Path(SESSION_DIR)
path.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info("字幕监控模块启动 (Linux 优化版)")
logger.info("="*50)
event_handler = SrtHandler()
# 启动扫描:检查是否有 flag 但没 songs.json 的存量目录
logger.info("正在扫描存量任务...")
for sub_dir in path.iterdir():
if sub_dir.is_dir():
flag = sub_dir / DONE_FLAG
json_file = sub_dir / "songs.json"
if flag.exists() and not json_file.exists():
logger.info(f"发现存量任务: {sub_dir.name}")
event_handler.process_with_codex(flag)
observer = Observer()
observer.schedule(event_handler, str(path), recursive=True)
observer.start()
try:
while True:
time.sleep(CHECK_INTERVAL)
except KeyboardInterrupt:
observer.stop()
observer.join()
if __name__ == "__main__":
main()

6
run.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -e
cd /home/theshy/biliup
source .venv/bin/activate
exec python start_all.py

165
session_top_comment.py Normal file
View File

@ -0,0 +1,165 @@
import json, re, subprocess, time, requests
from pathlib import Path
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from logger import get_system_logger, log_exception
import shutil
# --- 配置 ---
SESSION_DIR = Path("./session")
COOKIE_FILE = Path("./cookies.json")
# BILIUP_PATH = Path("./biliup")
BILIUP_PATH = shutil.which("biliup") or "./biliup"
MAX_RETRIES, BASE_DELAY, POLL_INTERVAL = 5, 180, 10
ANSI_ESCAPE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]")
# 初始化系统日志
logger = get_system_logger("session_top_comment")
def strip_ansi(text: str) -> str:
return ANSI_ESCAPE.sub("", text or "")
class TopCommentClient:
def __init__(self):
with open(COOKIE_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
ck = {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])}
self.csrf = ck.get("bili_jct")
if not self.csrf: raise ValueError("Cookie 中缺少 bili_jct")
self.session = requests.Session()
self.session.cookies.update(ck)
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/",
"Origin": "https://www.bilibili.com"
})
def get_aid(self, bvid):
res = self.session.get("https://api.bilibili.com/x/web-interface/view", params={"bvid": bvid}).json()
if res.get('code') != 0: raise RuntimeError(f"View API: {res.get('message')}")
return res['data']['aid']
def post_and_top(self, aid, msg):
# 1. 发表评论
r = self.session.post("https://api.bilibili.com/x/v2/reply/add",
data={"type": 1, "oid": aid, "message": msg, "plat": 1, "csrf": self.csrf}).json()
if r.get('code') != 0: raise RuntimeError(f"Post API: {r.get('message')}")
rpid = r['data']['rpid']
# 2. 等待 3s 数据库同步
logger.info(f"评论已发布(rpid={rpid}),等待 3s 置顶...")
time.sleep(3)
# 3. 置顶
r = self.session.post("https://api.bilibili.com/x/v2/reply/top",
data={"type": 1, "oid": aid, "rpid": rpid, "action": 1, "csrf": self.csrf}).json()
if r.get('code') != 0: raise RuntimeError(f"Top API: {r.get('message')}")
class CommentManager:
def __init__(self):
self.client = TopCommentClient()
self.pending = {} # {folder_path: {'attempts': 0, 'next_run': 0}}
self._cache = {"time": 0, "videos": []}
def fetch_videos(self):
"""调用 biliup list 获取最近上传视频"""
if time.time() - self._cache["time"] < 60: return self._cache["videos"]
try:
res = subprocess.run([str(BILIUP_PATH), "list", "--max-pages", "1"], capture_output=True, text=True, encoding='utf-8')
videos = []
for line in strip_ansi(res.stdout).splitlines():
if line.startswith("BV"):
parts = line.split("\t")
if len(parts) >= 2: videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
self._cache = {"time": time.time(), "videos": videos}
return videos
except Exception as e:
logger.error(f"biliup list 失败: {e}")
return []
def scan_and_add(self, folder: Path):
if not folder.is_dir() or (folder / "comment_done.flag").exists(): return
if (folder / "songs.txt").exists() and folder not in self.pending:
logger.info(f"发现待处理任务: {folder.name}")
self.pending[folder] = {'attempts': 0, 'next_run': 0}
def process_queue(self):
now = time.time()
for folder in list(self.pending.keys()):
task = self.pending[folder]
if task['next_run'] > now: continue
try:
# 1. 查找匹配视频
videos = self.fetch_videos()
# 模糊匹配:文件夹名包含在视频标题中,或视频标题包含在文件夹名中
matched_bvid = next((v['bvid'] for v in videos if folder.name in v['title'] or v['title'] in folder.name), None)
# 如果没找到,也尝试从文件夹名提取 [BV...]
if not matched_bvid:
bv_match = re.search(r"\[(BV[0-9A-Za-z]+)\]", folder.name)
if bv_match: matched_bvid = bv_match.group(1)
if not matched_bvid:
raise RuntimeError("未在最近上传列表中找到匹配视频")
# 2. 读取内容
content = (folder / "songs.txt").read_text(encoding="utf-8").strip()
if not content:
logger.warning(f"songs.txt 内容为空,取消任务: {folder.name}")
self.pending.pop(folder); continue
# 3. 执行发布和置顶
aid = self.client.get_aid(matched_bvid)
self.client.post_and_top(aid, content)
# 4. 成功标记
(folder / "comment_done.flag").touch()
logger.info(f"任务完成: {folder.name} -> {matched_bvid}")
self.pending.pop(folder)
except Exception as e:
task['attempts'] += 1
if task['attempts'] >= MAX_RETRIES:
logger.error(f"任务最终失败: {folder.name} - {e}")
self.pending.pop(folder)
else:
delay = BASE_DELAY * (2 ** (task['attempts']-1))
task['next_run'] = now + delay
logger.warning(f"任务推迟({task['attempts']}/{MAX_RETRIES}): {folder.name} - {e}. {delay}s 后重试")
def main():
logger.info("="*50)
logger.info("置顶评论模块启动")
logger.info("="*50)
try:
mgr = CommentManager()
except Exception as e:
logger.error(f"初始化失败: {e}")
return
# 1. 初始扫描
for f in SESSION_DIR.iterdir(): mgr.scan_and_add(f)
# 2. 启动 Watchdog
class Handler(FileSystemEventHandler):
def on_created(self, event):
p = Path(event.src_path)
if p.name == "songs.txt": mgr.scan_and_add(p.parent)
observer = Observer()
observer.schedule(Handler(), str(SESSION_DIR), recursive=True)
observer.start()
logger.info(f"开始监控目录: {SESSION_DIR}")
try:
while True:
mgr.process_queue()
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
observer.stop()
observer.join()
logger.info("置顶评论模块已停止")
if __name__ == "__main__":
main()

96
start_all.py Normal file
View File

@ -0,0 +1,96 @@
import subprocess
import time
import sys
import os
from pathlib import Path
from logger import get_system_logger
# ==========================================
# 配置区:确保脚本文件名与你本地一致
# ==========================================
SCRIPTS = [
"monitor.py", # 1. 监控 stage触发视频转录 (调用 video2srt.py)
"monitorSrt.py", # 2. 监控 session触发 Codex 歌词识别
"monitorSongs.py", # 3. 监控 session触发 FFmpeg 视频切片
"upload.py", # 4. 监控 session触发 biliup 自动投稿
"session_top_comment.py", # 5. 监控 session触发 B 站评论置顶
"add_to_collection.py", # 5. 新增:监控 session触发合集归档 <--- 添加这一行
]
# Python 解释器路径 (通常直接用 sys.executable)
PYTHON_EXE = sys.executable
# 初始化日志
logger = get_system_logger('start_all')
# ==========================================
def start_pipeline():
processes = []
logger.info("="*50)
logger.info("直播切片 & 自动投稿全自动流水线")
logger.info("="*50)
logger.info(f"启动时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
logger.info(f"当前路径: {os.getcwd()}")
logger.info(f"Python: {PYTHON_EXE}")
# 检查所有脚本是否存在
for script in SCRIPTS:
if not Path(script).exists():
logger.error(f"找不到脚本 {script},请确保它们在同一目录下")
print(f"[X] 错误: 找不到脚本 {script},请确保它们在同一目录下。")
return
# 逐一启动
for script in SCRIPTS:
logger.info(f"正在启动模块: {script}")
try:
# 使用 subprocess.Popen 异步启动
# creationflags=subprocess.CREATE_NEW_CONSOLE 可以让每个脚本在独立窗口运行(仅限 Windows
# 如果你希望所有日志都在这一个窗口显示,去掉 creationflags
p = subprocess.Popen(
[PYTHON_EXE, script],
creationflags=subprocess.CREATE_NEW_CONSOLE if os.name == 'nt' else 0
)
processes.append((script, p))
logger.info(f"模块已启动: {script} (PID: {p.pid})")
time.sleep(1) # 稍微错开启动时间,防止瞬间抢占 IO
except Exception as e:
logger.error(f"启动 {script} 失败: {e}")
print(f"[X] 启动 {script} 失败: {e}")
logger.info("")
logger.info("="*50)
logger.info("所有监控模块已启动!")
logger.info("请勿关闭此主窗口,除非你想停止整个流水线")
logger.info("="*50)
print("\n" + "="*50)
print("[√] 所有监控模块已启动!")
print("[!] 请勿关闭此主窗口,除非你想停止整个流水线。")
print("[!] 详细日志请查看 ./logs/system/ 目录")
print("="*50)
try:
# 循环检查子进程状态
while True:
for name, p in processes:
if p.poll() is not None:
logger.warning(f"模块 {name} 已意外停止 (Exit Code: {p.poll()})")
print(f"\n[⚠️] 警告: 模块 {name} 已意外停止 (Exit Code: {p.poll()})")
# 这里可以加入自动重启逻辑
time.sleep(10)
except KeyboardInterrupt:
logger.info("接收到停止信号,正在关闭所有监控模块...")
print("\n[*] 正在关闭所有监控模块...")
for name, p in processes:
p.terminate()
logger.info(f"模块已终止: {name}")
logger.info("已安全退出")
print("[√] 已安全退出。")
if __name__ == "__main__":
start_all_dir = Path(__file__).parent
os.chdir(start_all_dir) # 确保工作路径正确
start_pipeline()

316
upload.py Normal file
View File

@ -0,0 +1,316 @@
import os
import time
import subprocess
import json
import re
import random
import shutil
import sys
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from logger import get_system_logger, log_exception
# ==========================================
# 接口配置
# ==========================================
SESSION_DIR = r'./session' # 监控的工作区目录
CHECK_INTERVAL = 5 # 检查频率
BILIUP_PATH = shutil.which("biliup") or "./biliup"
CONFIG_FILE = "upload_config.json" # 配置文件路径
DONE_FLAG = "split_done.flag" # monitorSongs.py 生成的标记
UPLOAD_FLAG = "upload_done.flag" # 本脚本生成的完成标记
# 初始化日志
logger = get_system_logger('upload')
# ==========================================
class UploadConfig:
"""上传配置管理器"""
def __init__(self, config_path):
self.config_path = Path(config_path)
self.config = self.load_config()
def load_config(self):
try:
if not self.config_path.exists():
logger.error(f"配置文件不存在: {self.config_path}")
return self.get_default_config()
with open(self.config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
logger.info(f"成功加载配置文件: {self.config_path}")
return config
except Exception as e:
log_exception(logger, e, "加载配置文件失败")
return self.get_default_config()
def get_default_config(self):
logger.warning("使用默认配置")
return {
"upload_settings": {
"tid": 31,
"copyright": 2,
"source": "直播回放",
"cover": ""
},
"template": {
"title": "{streamer}_{date}",
"description": "自动录制剪辑\n\n{songs_list}",
"tag": "翻唱,直播切片,唱歌,音乐",
"dynamic": ""
},
"streamers": {},
"quotes": [],
"filename_patterns": {"patterns": []}
}
def parse_filename(self, filename):
patterns = self.config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
if not regex: continue
match = re.match(regex, filename)
if match:
data = match.groupdict()
date_format = pattern_config.get("date_format", "{date}")
try:
formatted_date = date_format.format(**data)
data['date'] = formatted_date
except KeyError: pass
logger.debug(f"文件名匹配成功: {pattern_config.get('name')} -> {data}")
return data
logger.warning(f"文件名未匹配任何模式: {filename}")
return {"streamer": filename, "date": ""}
def get_random_quote(self):
quotes = self.config.get("quotes", [])
if not quotes: return {"text": "", "author": ""}
return random.choice(quotes)
class UploadHandler(FileSystemEventHandler):
def __init__(self, config):
self.processing_sets = set()
self.config = config
def on_created(self, event):
src_path = event.src_path
if isinstance(src_path, bytes): src_path = src_path.decode('utf-8')
if not event.is_directory and src_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记: {src_path}")
self.handle_upload(Path(src_path))
def on_moved(self, event):
dest_path = event.dest_path
if isinstance(dest_path, bytes): dest_path = dest_path.decode('utf-8')
if not event.is_directory and dest_path.lower().endswith(DONE_FLAG):
logger.debug(f"检测到切割完成标记移动: {dest_path}")
self.handle_upload(Path(dest_path))
def _wait_exponential(self, retry_count, base_wait=300, max_wait=3600):
"""指数退避等待计算"""
# 计算等待时间60, 120, 240... 最大 600秒
wait_time = min(base_wait * (2 ** retry_count), max_wait)
return wait_time
def handle_upload(self, flag_path):
work_dir = flag_path.parent
video_stem = work_dir.name
upload_done = work_dir / UPLOAD_FLAG
split_dir = work_dir / "split_video"
if upload_done.exists() or video_stem in self.processing_sets:
logger.debug(f"上传已完成或正在处理,跳过: {video_stem}")
return
logger.info("="*50)
logger.info(f"准备上传: {video_stem}")
logger.info("="*50)
self.processing_sets.add(video_stem)
try:
parsed = self.config.parse_filename(video_stem)
streamer = parsed.get('streamer', video_stem)
date = parsed.get('date', '')
songs_json = work_dir / "songs.json"
songs_txt = work_dir / "songs.txt"
songs_list = ""
song_count = 0
if songs_json.exists():
try:
with open(songs_json, 'r', encoding='utf-8') as f:
data = json.load(f)
song_count = len(data.get('songs', []))
except Exception: pass
if songs_txt.exists():
songs_list = songs_txt.read_text(encoding='utf-8').strip()
quote = self.config.get_random_quote()
template_vars = {
'streamer': streamer, 'date': date, 'song_count': song_count,
'songs_list': songs_list, 'daily_quote': quote.get('text', ''),
'quote_author': quote.get('author', '')
}
template = self.config.config.get('template', {})
title = template.get('title', '{streamer}_{date}').format(**template_vars)
description = template.get('description', '{songs_list}').format(**template_vars)
dynamic = template.get('dynamic', '').format(**template_vars)
streamers_config = self.config.config.get('streamers', {})
if streamer in streamers_config:
tags = streamers_config[streamer].get('tags', template.get('tag', ''))
else:
tags = template.get('tag', '翻唱,唱歌,音乐').format(**template_vars)
video_files = sorted([str(v) for v in split_dir.glob("*") if v.suffix.lower() in {'.mp4', '.mkv', '.mov', '.flv', '.ts'}])
if not video_files:
logger.error(f"切片目录 {split_dir} 内没找到视频")
return
upload_settings = self.config.config.get('upload_settings', {})
tid = upload_settings.get('tid', 31)
# 1. 刷新登录
subprocess.run([BILIUP_PATH, "renew"], shell=False, capture_output=True)
# 2. 准备分批
BATCH_SIZE = 5
logger.info(f"启动分批投稿 (总计 {len(video_files)} 个分片)...")
first_batch = video_files[:BATCH_SIZE]
remaining_batches = [video_files[i:i + BATCH_SIZE] for i in range(BATCH_SIZE, len(video_files), BATCH_SIZE)]
upload_cmd = [
BILIUP_PATH, "upload",
*first_batch,
"--title", title,
"--tid", str(tid),
"--tag", tags,
"--copyright", str(upload_settings.get('copyright', 2)),
"--source", upload_settings.get('source', '直播回放'),
"--desc", description
]
if dynamic: upload_cmd.extend(["--dynamic", dynamic])
cover = upload_settings.get('cover', '')
if cover and Path(cover).exists(): upload_cmd.extend(["--cover", cover])
bvid = None
MAX_ATTEMPTS = 5 # 定义最大尝试次数
# ==========================
# 阶段一:首批上传 (最多5次)
# ==========================
logger.info(f"正在上传第一批 ({len(first_batch)}个文件)...")
for attempt in range(1, MAX_ATTEMPTS + 1):
logger.info(f"首批上传尝试 [{attempt}/{MAX_ATTEMPTS}]...")
result = subprocess.run(upload_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if result.returncode == 0:
bv_match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout)
if not bv_match: bv_match = re.search(r'(BV[A-Za-z0-9]+)', result.stdout)
if bv_match:
bvid = bv_match.group(1)
logger.info(f"第一批上传成功BV 号: {bvid}")
(work_dir / "bvid.txt").write_text(bvid, encoding='utf-8')
break # 成功退出循环
else:
logger.warning(f"上传命令返回成功但未找到BVID (尝试 {attempt}/{MAX_ATTEMPTS})")
# 如果没有成功 (没有break)
if attempt < MAX_ATTEMPTS:
err_msg = result.stderr.strip()[-100:] if result.stderr else "无标准错误输出"
wait_time = self._wait_exponential(attempt - 1)
logger.error(f"第一批上传失败或未获取BVID等待 {wait_time}秒后重试。错误片段: {err_msg}")
time.sleep(wait_time)
else:
logger.error("第一批上传已达到最大重试次数 (5次),中止本次任务。")
return # 彻底结束函数,不进行后续操作
# ==========================
# 阶段二:追加上传 (每批最多5次)
# ==========================
if bvid:
for idx, batch in enumerate(remaining_batches, 2):
logger.info(f"等待 45 秒冷却时间,准备上传第 {idx} 批...")
time.sleep(45)
batch_success = False
for attempt in range(1, MAX_ATTEMPTS + 1):
logger.info(f"正在追加第 {idx} 批 ({len(batch)}个) - 尝试 [{attempt}/{MAX_ATTEMPTS}]...")
append_cmd = [BILIUP_PATH, "append", "--vid", bvid, *batch]
res = subprocess.run(append_cmd, shell=False, capture_output=True, text=True, encoding='utf-8')
if res.returncode == 0:
logger.info(f"{idx} 批追加成功")
batch_success = True
break # 成功退出内层循环,进入下一批
# 如果失败
if attempt < MAX_ATTEMPTS:
err_msg = res.stderr.strip()[-100:] if res.stderr else "无标准错误输出"
wait_time = self._wait_exponential(attempt - 1)
logger.error(f"{idx} 批追加失败,等待 {wait_time}秒后重试。错误片段: {err_msg}")
time.sleep(wait_time)
if not batch_success:
logger.error(f"{idx} 批追加已达到最大重试次数 (5次)。为防止顺序错乱,中止后续上传。")
return # 某一批次彻底失败,停止整个流程
# 只有当所有循环都正常走完没有 return才会执行到这里
logger.info(f"所有分片上传完成: {bvid}")
upload_done.touch()
# 清理
try:
if split_dir.exists(): shutil.rmtree(split_dir)
for ext in ['.mp4', '.mkv', '.mov', '.flv', '.ts']:
orig = work_dir / f"{video_stem}{ext}"
if orig.exists(): orig.unlink()
except Exception as e:
logger.error(f"清理空间失败: {e}")
else:
# 逻辑上如果第一阶段return了这里不会执行
# 但如果第一阶段break了但没bvid(理论上不可能,除非正则漏了),做个保险
logger.error("逻辑错误流程继续但无BVID上传中止")
except Exception as e:
log_exception(logger, e, "上传异常")
finally:
self.processing_sets.discard(video_stem)
def main():
path = Path(SESSION_DIR)
path.mkdir(parents=True, exist_ok=True)
logger.info("上传模块启动 (MaxRetry=5)")
config = UploadConfig(CONFIG_FILE)
handler = UploadHandler(config)
for sub_dir in path.iterdir():
if sub_dir.is_dir():
if (sub_dir / DONE_FLAG).exists() and not (sub_dir / UPLOAD_FLAG).exists():
handler.handle_upload(sub_dir / DONE_FLAG)
observer = Observer()
observer.schedule(handler, str(path), recursive=True)
observer.start()
try:
while True: time.sleep(5)
except KeyboardInterrupt:
observer.stop()
observer.join()
if __name__ == "__main__":
main()

189
video2srt.py Normal file
View File

@ -0,0 +1,189 @@
import os
import time
import math
import shutil
import subprocess
import sys
from pathlib import Path
from groq import Groq
from logger import get_system_logger, get_ai_logger, log_exception
# ==========================================
# 接口配置 (Interface Configuration)
# ==========================================
GROQ_API_KEY = "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"
LANGUAGE = "zh"
KEEP_TEMP_AUDIO = False
MAX_FILE_SIZE_MB = 23
BITRATE_KBPS = 64
MODEL_NAME = "whisper-large-v3-turbo"
# 初始化日志
logger = get_system_logger('video2srt')
# ==========================================
client = Groq(api_key=GROQ_API_KEY)
def format_srt_time(seconds):
"""秒转 SRT 时间戳格式 (00:00:00,000)"""
td_hours = int(seconds // 3600)
td_mins = int((seconds % 3600) // 60)
td_secs = int(seconds % 60)
td_millis = int((seconds - int(seconds)) * 1000)
return f"{td_hours:02}:{td_mins:02}:{td_secs:02},{td_millis:03}"
def transcribe_with_retry(audio_file, ai_log):
"""处理 Groq API 速率限制的重试逻辑"""
ai_log.info(f"开始转录音频: {audio_file.name}")
ai_log.info(f"模型: {MODEL_NAME}, 语言: {LANGUAGE}")
retry_count = 0
while True:
try:
start_time = time.time()
with open(audio_file, "rb") as file:
response = client.audio.transcriptions.create(
file=(audio_file.name, file.read()),
model=MODEL_NAME,
response_format="verbose_json",
language=LANGUAGE,
temperature=0.0
)
elapsed = time.time() - start_time
ai_log.info(f"转录成功,耗时: {elapsed:.2f}")
ai_log.info(f"识别到 {len(response.segments)} 个语音片段")
return response.segments
except Exception as e:
retry_count += 1
err_str = str(e)
ai_log.error(f"转录失败 (尝试 {retry_count}): {err_str}")
if "429" in err_str or "rate_limit" in err_str.lower():
wait_time = 25
ai_log.warning(f"触发 API 速率限制,等待 {wait_time} 秒后重试...")
logger.warning(f"Groq API 速率限制,等待 {wait_time}")
time.sleep(wait_time)
else:
log_exception(ai_log, e, "Groq API 调用失败")
raise e
def process_single_video(raw_video_path, video_work_dir):
"""
1. 移动视频到工作区
2. 提取音频并分片
3. 转录生成 SRT
"""
raw_video_path = Path(raw_video_path)
video_work_dir = Path(video_work_dir)
video_work_dir.mkdir(parents=True, exist_ok=True)
logger.info("="*50)
logger.info(f"开始处理视频: {raw_video_path.name}")
logger.info("="*50)
# 创建AI日志
ai_log, ai_log_file = get_ai_logger('groq', 'transcribe')
ai_log.info("="*50)
ai_log.info(f"Groq 转录任务开始")
ai_log.info(f"视频文件: {raw_video_path.name}")
ai_log.info(f"工作目录: {video_work_dir}")
ai_log.info("="*50)
# --- 新增步骤:将视频搬家到工作区 ---
target_video_path = video_work_dir / raw_video_path.name
if not target_video_path.exists():
logger.info(f"移动视频至工作区: {raw_video_path.name}")
ai_log.info(f"移动视频: {raw_video_path} -> {target_video_path}")
shutil.move(str(raw_video_path), str(target_video_path))
else:
logger.info(f"视频已在工作区中: {target_video_path.name}")
ai_log.info(f"视频已存在于工作区: {target_video_path}")
video_stem = target_video_path.stem
audio_temp_dir = video_work_dir / "temp_audio"
audio_temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 计算切分时长
seg_duration = math.floor((MAX_FILE_SIZE_MB * 8 * 1024) / BITRATE_KBPS)
logger.info(f"音频分片时长: {seg_duration}")
ai_log.info(f"音频分片参数: {seg_duration}秒/片, 比特率: {BITRATE_KBPS}kbps")
# 2. FFmpeg 提取并分片 (使用工作区内的视频路径)
logger.info("开始提取音频...")
ai_log.info("开始 FFmpeg 音频提取")
output_pattern = str(audio_temp_dir / "part_%03d.mp3")
cmd = [
'ffmpeg', '-y', '-i', str(target_video_path),
'-vn', '-acodec', 'libmp3lame', '-b:a', f'{BITRATE_KBPS}k',
'-ac', '1', '-ar', '22050',
'-f', 'segment', '-segment_time', str(seg_duration),
'-reset_timestamps', '1', output_pattern
]
try:
subprocess.run(cmd, check=True, capture_output=True)
logger.info("音频提取完成")
ai_log.info("FFmpeg 音频提取成功")
except Exception as e:
log_exception(logger, e, "FFmpeg 音频提取失败")
log_exception(ai_log, e, "FFmpeg 执行失败")
raise
segments = sorted(list(audio_temp_dir.glob("part_*.mp3")))
logger.info(f"音频分片数量: {len(segments)}")
ai_log.info(f"生成音频分片: {len(segments)}")
# 3. 转录并实时写入 SRT
logger.info(f"开始分片转录...")
ai_log.info("开始批量转录")
srt_path = video_work_dir / f"{video_stem}.srt"
global_idx = 1
with open(srt_path, "w", encoding="utf-8") as srt_file:
for i, seg in enumerate(segments):
offset = i * seg_duration
logger.info(f"转录进度: {i+1}/{len(segments)}")
ai_log.info(f"转录片段 {i+1}/{len(segments)}: {seg.name}")
seg_data = transcribe_with_retry(seg, ai_log)
for chunk in seg_data:
start = format_srt_time(chunk['start'] + offset)
end = format_srt_time(chunk['end'] + offset)
text = chunk['text'].strip()
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
global_idx += 1
logger.info(f"字幕文件已生成: {srt_path.name}")
ai_log.info(f"字幕生成完成: 共 {global_idx-1} 条字幕")
# 4. 清理临时音频
if not KEEP_TEMP_AUDIO:
logger.info("清理临时音频文件...")
ai_log.info("清理临时音频目录")
shutil.rmtree(audio_temp_dir, ignore_errors=True)
# --- 新增:生成转录完成标记 ---
(video_work_dir / "transcribe_done.flag").touch()
logger.info("生成转录完成标记")
ai_log.info("生成 transcribe_done.flag")
logger.info(f"处理完成。工作区目录: {video_work_dir}")
ai_log.info("="*50)
ai_log.info("Groq 转录任务完成")
ai_log.info("="*50)
logger.info(f"AI日志已保存: {ai_log_file}")
if __name__ == "__main__":
if len(sys.argv) >= 3:
# sys.argv[1]: 原始视频路径 (通常在 stage 下)
# sys.argv[2]: 目标工作区路径 (通常在 session 下)
logger.info(f"接收到转录任务: {sys.argv[1]}")
try:
process_single_video(sys.argv[1], sys.argv[2])
except Exception as e:
log_exception(logger, e, "视频转录失败")
raise
else:
logger.error("缺少参数。用法: python video2srt.py <视频路径> <工作区路径>")
print("错误: 缺少参数。用法: python video2srt.py <视频路径> <工作区路径>")