init biliup-next

This commit is contained in:
theshy
2026-04-01 00:44:58 +08:00
commit d0cf1fd0df
127 changed files with 15582 additions and 0 deletions

View File

@ -0,0 +1,179 @@
from __future__ import annotations
import json
import random
import re
import subprocess
import time
from pathlib import Path
from typing import Any
import requests
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Task
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
class LegacyBilibiliCollectionProvider:
manifest = ProviderManifest(
id="bilibili_collection",
name="Legacy Bilibili Collection Provider",
version="0.1.0",
provider_type="collection_provider",
entrypoint="biliup_next.infra.adapters.bilibili_collection_legacy:LegacyBilibiliCollectionProvider",
capabilities=["collection"],
enabled_by_default=True,
)
def __init__(self) -> None:
self._section_cache: dict[int, int | None] = {}
def sync(self, task: Task, target: str, settings: dict[str, Any]) -> dict[str, object]:
session_dir = Path(str(settings["session_dir"])) / task.title
cookies = self._load_cookies(Path(str(settings["cookies_file"])))
csrf = cookies.get("bili_jct")
if not csrf:
raise ModuleError(code="COOKIE_CSRF_MISSING", message="Cookie 缺少 bili_jct", retryable=False)
session = requests.Session()
session.cookies.update(cookies)
session.headers.update(
{
"User-Agent": "Mozilla/5.0",
"Referer": "https://member.bilibili.com/platform/upload-manager/distribution",
}
)
if target == "a":
season_id = int(settings["season_id_a"])
bvid = resolve_full_video_bvid(task.title, session_dir, settings)
if not bvid:
(session_dir / "collection_a_done.flag").touch()
return {"status": "skipped", "reason": "full_video_bvid_not_found"}
flag_path = session_dir / "collection_a_done.flag"
else:
season_id = int(settings["season_id_b"])
bvid_path = session_dir / "bvid.txt"
if not bvid_path.exists():
raise ModuleError(code="COLLECTION_BVID_MISSING", message=f"缺少 bvid.txt: {session_dir}", retryable=True)
bvid = bvid_path.read_text(encoding="utf-8").strip()
flag_path = session_dir / "collection_b_done.flag"
if season_id <= 0:
flag_path.touch()
return {"status": "skipped", "reason": "season_disabled"}
section_id = self._resolve_section_id(session, season_id)
if not section_id:
raise ModuleError(code="COLLECTION_SECTION_NOT_FOUND", message=f"未找到合集 section: {season_id}", retryable=True)
info = self._get_video_info(session, bvid)
episodes = [info]
add_result = self._add_videos_batch(session, csrf, section_id, episodes)
if add_result["status"] == "failed":
raise ModuleError(
code="COLLECTION_ADD_FAILED",
message=add_result["message"],
retryable=True,
details=add_result,
)
flag_path.touch()
if add_result["status"] == "added":
append_key = "append_collection_a_new_to_end" if target == "a" else "append_collection_b_new_to_end"
if settings.get(append_key, True):
self._move_videos_to_section_end(session, csrf, section_id, [info["aid"]])
return {"status": add_result["status"], "target": target, "bvid": bvid, "season_id": season_id}
@staticmethod
def _load_cookies(path: Path) -> dict[str, str]:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if "cookie_info" in data:
return {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])}
return data
def _resolve_section_id(self, session: requests.Session, season_id: int) -> int | None:
if season_id in self._section_cache:
return self._section_cache[season_id]
result = session.get("https://member.bilibili.com/x2/creative/web/seasons", params={"pn": 1, "ps": 50}, timeout=15).json()
if result.get("code") != 0:
return None
for season in result.get("data", {}).get("seasons", []):
if season.get("season", {}).get("id") == season_id:
sections = season.get("sections", {}).get("sections", [])
section_id = sections[0]["id"] if sections else None
self._section_cache[season_id] = section_id
return section_id
self._section_cache[season_id] = None
return None
@staticmethod
def _get_video_info(session: requests.Session, bvid: str) -> dict[str, object]:
result = session.get("https://api.bilibili.com/x/web-interface/view", params={"bvid": bvid}, timeout=15).json()
if result.get("code") != 0:
raise ModuleError(
code="COLLECTION_VIDEO_INFO_FAILED",
message=f"获取视频信息失败: {result.get('message')}",
retryable=True,
)
data = result["data"]
return {"aid": data["aid"], "cid": data["cid"], "title": data["title"], "charging_pay": 0}
@staticmethod
def _add_videos_batch(session: requests.Session, csrf: str, section_id: int, episodes: list[dict[str, object]]) -> dict[str, object]:
time.sleep(random.uniform(5.0, 10.0))
result = session.post(
"https://member.bilibili.com/x2/creative/web/season/section/episodes/add",
params={"csrf": csrf},
json={"sectionId": section_id, "episodes": episodes},
timeout=20,
).json()
if result.get("code") == 0:
return {"status": "added"}
if result.get("code") == 20080:
return {"status": "already_exists", "message": result.get("message", "")}
return {"status": "failed", "message": result.get("message", "unknown error"), "code": result.get("code")}
@staticmethod
def _move_videos_to_section_end(session: requests.Session, csrf: str, section_id: int, added_aids: list[int]) -> bool:
detail = session.get(
"https://member.bilibili.com/x2/creative/web/season/section",
params={"id": section_id},
timeout=20,
).json()
if detail.get("code") != 0:
return False
section = detail.get("data", {}).get("section", {})
episodes = detail.get("data", {}).get("episodes", []) or []
if not episodes:
return True
target_aids = {int(aid) for aid in added_aids}
existing = []
appended = []
for episode in episodes:
item = {"id": episode.get("id")}
if item["id"] is None:
continue
if episode.get("aid") in target_aids:
appended.append(item)
else:
existing.append(item)
ordered = existing + appended
payload = {
"section": {
"id": section["id"],
"seasonId": section["seasonId"],
"title": section["title"],
"type": section["type"],
},
"sorts": [{"id": item["id"], "sort": idx + 1} for idx, item in enumerate(ordered)],
}
result = session.post(
"https://member.bilibili.com/x2/creative/web/season/section/edit",
params={"csrf": csrf},
json=payload,
timeout=20,
).json()
return result.get("code") == 0

View File

@ -0,0 +1,179 @@
from __future__ import annotations
import json
import time
from pathlib import Path
from typing import Any
import requests
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Task
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
class LegacyBilibiliTopCommentProvider:
manifest = ProviderManifest(
id="bilibili_top_comment",
name="Legacy Bilibili Top Comment Provider",
version="0.1.0",
provider_type="comment_provider",
entrypoint="biliup_next.infra.adapters.bilibili_top_comment_legacy:LegacyBilibiliTopCommentProvider",
capabilities=["comment"],
enabled_by_default=True,
)
def comment(self, task: Task, settings: dict[str, Any]) -> dict[str, object]:
session_dir = Path(str(settings["session_dir"])) / task.title
songs_path = session_dir / "songs.txt"
songs_json_path = session_dir / "songs.json"
bvid_path = session_dir / "bvid.txt"
if not songs_path.exists() or not bvid_path.exists():
raise ModuleError(
code="COMMENT_INPUT_MISSING",
message=f"缺少评论所需文件: {session_dir}",
retryable=True,
)
timeline_content = songs_path.read_text(encoding="utf-8").strip()
split_content = self._build_split_comment_content(songs_json_path, songs_path)
if not timeline_content and not split_content:
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
return {"status": "skipped", "reason": "comment_content_empty"}
cookies = self._load_cookies(Path(str(settings["cookies_file"])))
csrf = cookies.get("bili_jct")
if not csrf:
raise ModuleError(code="COOKIE_CSRF_MISSING", message="Cookie 缺少 bili_jct", retryable=False)
session = requests.Session()
session.cookies.update(cookies)
session.headers.update(
{
"User-Agent": "Mozilla/5.0",
"Referer": "https://www.bilibili.com/",
"Origin": "https://www.bilibili.com",
}
)
split_result = {"status": "skipped", "reason": "disabled"}
full_result = {"status": "skipped", "reason": "disabled"}
split_done = (session_dir / "comment_split_done.flag").exists()
full_done = (session_dir / "comment_full_done.flag").exists()
if settings.get("post_split_comment", True) and not split_done:
split_bvid = bvid_path.read_text(encoding="utf-8").strip()
if split_content:
split_result = self._post_and_top_comment(session, csrf, split_bvid, split_content, "split")
else:
split_result = {"status": "skipped", "reason": "split_comment_empty"}
split_done = True
(session_dir / "comment_split_done.flag").touch()
elif not split_done:
split_done = True
(session_dir / "comment_split_done.flag").touch()
if settings.get("post_full_video_timeline_comment", True) and not full_done:
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
if full_bvid and timeline_content:
full_result = self._post_and_top_comment(session, csrf, full_bvid, timeline_content, "full")
else:
full_result = {"status": "skipped", "reason": "full_video_bvid_not_found" if not full_bvid else "timeline_comment_empty"}
full_done = True
(session_dir / "comment_full_done.flag").touch()
elif not full_done:
full_done = True
(session_dir / "comment_full_done.flag").touch()
if split_done and full_done:
(session_dir / "comment_done.flag").touch()
return {"status": "ok", "split": split_result, "full": full_result}
def _post_and_top_comment(
self,
session: requests.Session,
csrf: str,
bvid: str,
content: str,
target: str,
) -> dict[str, object]:
view = session.get("https://api.bilibili.com/x/web-interface/view", params={"bvid": bvid}, timeout=15).json()
if view.get("code") != 0:
raise ModuleError(
code="COMMENT_VIEW_FAILED",
message=f"获取{target}视频信息失败: {view.get('message')}",
retryable=True,
)
aid = view["data"]["aid"]
add_res = session.post(
"https://api.bilibili.com/x/v2/reply/add",
data={"type": 1, "oid": aid, "message": content, "plat": 1, "csrf": csrf},
timeout=15,
).json()
if add_res.get("code") != 0:
raise ModuleError(
code="COMMENT_POST_FAILED",
message=f"发布{target}评论失败: {add_res.get('message')}",
retryable=True,
)
rpid = add_res["data"]["rpid"]
time.sleep(3)
top_res = session.post(
"https://api.bilibili.com/x/v2/reply/top",
data={"type": 1, "oid": aid, "rpid": rpid, "action": 1, "csrf": csrf},
timeout=15,
).json()
if top_res.get("code") != 0:
raise ModuleError(
code="COMMENT_TOP_FAILED",
message=f"置顶{target}评论失败: {top_res.get('message')}",
retryable=True,
)
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
@staticmethod
def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
if songs_json_path.exists():
try:
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
lines = []
for index, song in enumerate(data.get("songs", []), 1):
title = str(song.get("title", "")).strip()
artist = str(song.get("artist", "")).strip()
if not title:
continue
suffix = f"{artist}" if artist else ""
lines.append(f"{index}. {title}{suffix}")
if lines:
return "\n".join(lines)
except json.JSONDecodeError:
pass
if songs_txt_path.exists():
lines = []
for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
text = raw.strip()
if not text:
continue
parts = text.split(" ", 1)
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
lines.append(f"{index}. {song_text}")
return "\n".join(lines)
return ""
@staticmethod
def _load_cookies(path: Path) -> dict[str, str]:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if "cookie_info" in data:
return {c["name"]: c["value"] for c in data.get("cookie_info", {}).get("cookies", [])}
return data
@staticmethod
def _touch_comment_flags(session_dir: Path, *, split_done: bool, full_done: bool) -> None:
if split_done:
(session_dir / "comment_split_done.flag").touch()
if full_done:
(session_dir / "comment_full_done.flag").touch()
if split_done and full_done:
(session_dir / "comment_done.flag").touch()

View File

@ -0,0 +1,176 @@
from __future__ import annotations
import json
import random
import re
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.legacy_paths import legacy_project_root
class LegacyBiliupPublishProvider:
manifest = ProviderManifest(
id="biliup_cli",
name="Legacy biliup CLI Publish Provider",
version="0.1.0",
provider_type="publish_provider",
entrypoint="biliup_next.infra.adapters.biliup_publish_legacy:LegacyBiliupPublishProvider",
capabilities=["publish"],
enabled_by_default=True,
)
def __init__(self, next_root: Path):
self.next_root = next_root
self.legacy_root = legacy_project_root(next_root)
def publish(self, task: Task, clip_videos: list, settings: dict[str, Any]) -> PublishRecord:
work_dir = Path(str(settings.get("session_dir", str(self.legacy_root / "session")))) / task.title
bvid_file = work_dir / "bvid.txt"
upload_done = work_dir / "upload_done.flag"
config = self._load_upload_config(Path(str(settings.get("upload_config_file", str(self.legacy_root / "upload_config.json")))))
if bvid_file.exists():
bvid = bvid_file.read_text(encoding="utf-8").strip()
return PublishRecord(
id=None,
task_id=task.id,
platform="bilibili",
aid=None,
bvid=bvid,
title=task.title,
published_at=utc_now_iso(),
)
video_files = [artifact.path for artifact in clip_videos]
if not video_files:
raise ModuleError(
code="PUBLISH_NO_CLIPS",
message=f"没有可上传的切片: {task.id}",
retryable=False,
)
parsed = self._parse_filename(task.title, config)
streamer = parsed.get("streamer", task.title)
date = parsed.get("date", "")
songs_txt = work_dir / "songs.txt"
songs_list = songs_txt.read_text(encoding="utf-8").strip() if songs_txt.exists() else ""
songs_json = work_dir / "songs.json"
song_count = 0
if songs_json.exists():
song_count = len(json.loads(songs_json.read_text(encoding="utf-8")).get("songs", []))
quote = self._get_random_quote(config)
template_vars = {
"streamer": streamer,
"date": date,
"song_count": song_count,
"songs_list": songs_list,
"daily_quote": quote.get("text", ""),
"quote_author": quote.get("author", ""),
}
template = config.get("template", {})
title = template.get("title", "{streamer}_{date}").format(**template_vars)
description = template.get("description", "{songs_list}").format(**template_vars)
dynamic = template.get("dynamic", "").format(**template_vars)
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
streamer_cfg = config.get("streamers", {})
if streamer in streamer_cfg:
tags = streamer_cfg[streamer].get("tags", tags)
upload_settings = config.get("upload_settings", {})
tid = upload_settings.get("tid", 31)
biliup_path = str(settings.get("biliup_path", str(self.legacy_root / "biliup")))
cookie_file = str(settings.get("cookie_file", str(self.legacy_root / "cookies.json")))
subprocess.run([biliup_path, "-u", cookie_file, "renew"], capture_output=True, text=True)
first_batch = video_files[:5]
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
upload_cmd = [
biliup_path, "-u", cookie_file, "upload",
*first_batch,
"--title", title,
"--tid", str(tid),
"--tag", tags,
"--copyright", str(upload_settings.get("copyright", 2)),
"--source", upload_settings.get("source", "直播回放"),
"--desc", description,
]
if dynamic:
upload_cmd.extend(["--dynamic", dynamic])
bvid = self._run_upload(upload_cmd, "首批上传")
bvid_file.write_text(bvid, encoding="utf-8")
for idx, batch in enumerate(remaining_batches, 2):
append_cmd = [biliup_path, "-u", cookie_file, "append", "--vid", bvid, *batch]
self._run_append(append_cmd, f"追加第 {idx}")
upload_done.touch()
return PublishRecord(
id=None,
task_id=task.id,
platform="bilibili",
aid=None,
bvid=bvid,
title=title,
published_at=utc_now_iso(),
)
def _run_upload(self, cmd: list[str], label: str) -> str:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout) or re.search(r'(BV[A-Za-z0-9]+)', result.stdout)
if match:
return match.group(1)
raise ModuleError(
code="PUBLISH_UPLOAD_FAILED",
message=f"{label}失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
def _run_append(self, cmd: list[str], label: str) -> None:
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
return
raise ModuleError(
code="PUBLISH_APPEND_FAILED",
message=f"{label}失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
def _load_upload_config(self, path: Path) -> dict[str, Any]:
if not path.exists():
return {}
return json.loads(path.read_text(encoding="utf-8"))
def _parse_filename(self, filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
config = config or {}
patterns = config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
if not regex:
continue
match = re.match(regex, filename)
if match:
data = match.groupdict()
date_format = pattern_config.get("date_format", "{date}")
try:
data["date"] = date_format.format(**data)
except KeyError:
pass
return data
return {"streamer": filename, "date": ""}
def _get_random_quote(self, config: dict[str, Any]) -> dict[str, str]:
quotes = config.get("quotes", [])
if not quotes:
return {"text": "", "author": ""}
return random.choice(quotes)

View File

@ -0,0 +1,140 @@
from __future__ import annotations
import json
import os
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.legacy_paths import legacy_project_root
SONG_SCHEMA = {
"type": "object",
"properties": {
"songs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start": {"type": "string"},
"end": {"type": "string"},
"title": {"type": "string"},
"artist": {"type": "string"},
"confidence": {"type": "number"},
"evidence": {"type": "string"}
},
"required": ["start", "end", "title", "artist", "confidence", "evidence"],
"additionalProperties": False
}
}
},
"required": ["songs"],
"additionalProperties": False
}
TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。
任务:
1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。
2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。歌曲开始时间规则:
- 歌曲开始时间应使用“上一句字幕的结束时间”作为 start_time。
- 这样可以尽量保留歌曲可能存在的前奏。
3. 同一首歌间隔 ≤160s 合并,>160s 分开。若连续识别出相同歌曲,且中间只有短暂对白、空白、转场或无歌词段,应合并为同一首歌.
4. 忽略纯聊天片段。
5. 无法确认的歌曲丢弃,宁缺毋滥:你的输出将直接面向最终用户。
6. 忽略短片段:如果一段演唱持续时间总和少于 15 秒,视为随口哼唱,请直接忽略,不计入列表。
7. 仔细分析每一句歌词,识别出相关歌曲后, 使用该歌曲歌词上下文对比字幕上下文,确定歌曲起始与停止时间
8.歌曲标注规则:
- 可以在歌曲名称后使用括号 () 添加补充说明。
- 常见标注示例:
- (片段):歌曲演唱时间较短,例如 < 60 秒
- (清唱):无伴奏演唱
- (副歌):只演唱副歌部分
- 标注应简洁,仅在确有必要时使用。
9. 通过歌曲起始和结束时间自检, 一般歌曲长度在5分钟以内, 1分钟以上, 可疑片段重新联网搜索检查.
最后请严格按照 Schema 生成 JSON 数据。"""
class LegacyCodexSongDetector:
manifest = ProviderManifest(
id="codex",
name="Legacy Codex Song Detector",
version="0.1.0",
provider_type="song_detector",
entrypoint="biliup_next.infra.adapters.codex_legacy:LegacyCodexSongDetector",
capabilities=["song_detect"],
enabled_by_default=True,
)
def __init__(self, next_root: Path):
self.next_root = next_root
self.legacy_root = legacy_project_root(next_root)
def detect(self, task: Task, subtitle_srt: Artifact, settings: dict[str, Any]) -> tuple[Artifact, Artifact]:
work_dir = Path(subtitle_srt.path).parent
schema_path = work_dir / "song_schema.json"
schema_path.write_text(json.dumps(SONG_SCHEMA, ensure_ascii=False, indent=2), encoding="utf-8")
env = {
**os.environ,
"CODEX_CMD": str(settings.get("codex_cmd", "codex")),
}
cmd = [
str(settings.get("codex_cmd", "codex")),
"exec",
TASK_PROMPT.replace("\n", " "),
"--full-auto",
"--sandbox", "workspace-write",
"--output-schema", "./song_schema.json",
"-o", "songs.json",
"--skip-git-repo-check",
"--json",
]
result = subprocess.run(
cmd,
cwd=str(work_dir),
capture_output=True,
text=True,
env=env,
)
if result.returncode != 0:
raise ModuleError(
code="SONG_DETECT_FAILED",
message="codex exec 执行失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
songs_json = work_dir / "songs.json"
songs_txt = work_dir / "songs.txt"
if songs_json.exists() and not songs_txt.exists():
data = json.loads(songs_json.read_text(encoding="utf-8"))
with songs_txt.open("w", encoding="utf-8") as f:
for song in data.get("songs", []):
start_time = song["start"].split(",")[0].split(".")[0]
f.write(f"{start_time} {song['title']}{song['artist']}\n")
if not songs_json.exists() or not songs_txt.exists():
raise ModuleError(
code="SONG_DETECT_OUTPUT_MISSING",
message=f"未生成 songs.json/songs.txt: {work_dir}",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
return (
Artifact(
id=None,
task_id=task.id,
artifact_type="songs_json",
path=str(songs_json),
metadata_json=json.dumps({"provider": "codex_legacy"}),
created_at=utc_now_iso(),
),
Artifact(
id=None,
task_id=task.id,
artifact_type="songs_txt",
path=str(songs_txt),
metadata_json=json.dumps({"provider": "codex_legacy"}),
created_at=utc_now_iso(),
),
)

View File

@ -0,0 +1,92 @@
from __future__ import annotations
import json
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.legacy_paths import legacy_project_root
class LegacyFfmpegSplitProvider:
manifest = ProviderManifest(
id="ffmpeg_copy",
name="Legacy FFmpeg Split Provider",
version="0.1.0",
provider_type="split_provider",
entrypoint="biliup_next.infra.adapters.ffmpeg_split_legacy:LegacyFfmpegSplitProvider",
capabilities=["split"],
enabled_by_default=True,
)
def __init__(self, next_root: Path):
self.next_root = next_root
self.legacy_root = legacy_project_root(next_root)
def split(self, task: Task, songs_json: Artifact, source_video: Artifact, settings: dict[str, Any]) -> list[Artifact]:
work_dir = Path(songs_json.path).parent
split_dir = work_dir / "split_video"
split_done = work_dir / "split_done.flag"
if split_done.exists() and split_dir.exists():
return self._collect_existing_clips(task.id, split_dir)
with Path(songs_json.path).open("r", encoding="utf-8") as f:
data = json.load(f)
songs = data.get("songs", [])
if not songs:
raise ModuleError(
code="SPLIT_SONGS_EMPTY",
message=f"songs.json 中没有歌曲: {songs_json.path}",
retryable=False,
)
split_dir.mkdir(parents=True, exist_ok=True)
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
video_path = Path(source_video.path)
for idx, song in enumerate(songs, 1):
start = str(song.get("start", "00:00:00,000")).replace(",", ".")
end = str(song.get("end", "00:00:00,000")).replace(",", ".")
title = str(song.get("title", "UNKNOWN")).replace("/", "_").replace("\\", "_")
output_path = split_dir / f"{idx:02d}_{title}{video_path.suffix}"
if output_path.exists():
continue
cmd = [
ffmpeg_bin,
"-y",
"-ss", start,
"-to", end,
"-i", str(video_path),
"-c", "copy",
"-map_metadata", "0",
str(output_path),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise ModuleError(
code="SPLIT_FFMPEG_FAILED",
message=f"ffmpeg 切割失败: {output_path.name}",
retryable=True,
details={"stderr": result.stderr[-2000:]},
)
split_done.touch()
return self._collect_existing_clips(task.id, split_dir)
def _collect_existing_clips(self, task_id: str, split_dir: Path) -> list[Artifact]:
artifacts: list[Artifact] = []
for path in sorted(split_dir.iterdir()):
if path.is_file():
artifacts.append(
Artifact(
id=None,
task_id=task_id,
artifact_type="clip_video",
path=str(path),
metadata_json=json.dumps({"provider": "ffmpeg_copy"}),
created_at=utc_now_iso(),
)
)
return artifacts

View File

@ -0,0 +1,68 @@
from __future__ import annotations
import re
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
def normalize_title(text: str) -> str:
return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()
def fetch_biliup_list(settings: dict[str, Any], *, max_pages: int = 5) -> list[dict[str, str]]:
cmd = [
str(settings["biliup_path"]),
"-u",
str(settings["cookie_file"]),
"list",
"--max-pages",
str(max_pages),
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", check=False)
except FileNotFoundError as exc:
raise ModuleError(code="BILIUP_NOT_FOUND", message=f"找不到 biliup: {settings['biliup_path']}", retryable=False) from exc
if result.returncode != 0:
raise ModuleError(
code="BILIUP_LIST_FAILED",
message="biliup list 执行失败",
retryable=True,
details={"stderr": (result.stderr or "")[-1000:]},
)
videos: list[dict[str, str]] = []
for line in result.stdout.splitlines():
if not line.startswith("BV"):
continue
parts = line.split("\t")
if len(parts) >= 3 and "开放浏览" not in parts[2]:
continue
if len(parts) >= 2:
videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
return videos
def resolve_full_video_bvid(title: str, session_dir: Path, settings: dict[str, Any]) -> str | None:
bvid_file = session_dir / "full_video_bvid.txt"
if bvid_file.exists():
value = bvid_file.read_text(encoding="utf-8").strip()
if value.startswith("BV"):
return value
videos = fetch_biliup_list(settings)
normalized_title = normalize_title(title)
for video in videos:
if normalize_title(video["title"]) == normalized_title:
bvid_file.write_text(video["bvid"], encoding="utf-8")
return video["bvid"]
if settings.get("allow_fuzzy_full_video_match", False):
for video in videos:
normalized_video_title = normalize_title(video["title"])
if normalized_title in normalized_video_title or normalized_video_title in normalized_title:
bvid_file.write_text(video["bvid"], encoding="utf-8")
return video["bvid"]
return None

View File

@ -0,0 +1,79 @@
from __future__ import annotations
import json
import os
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.legacy_paths import legacy_project_root
class LegacyGroqTranscribeProvider:
manifest = ProviderManifest(
id="groq",
name="Legacy Groq Transcribe Provider",
version="0.1.0",
provider_type="transcribe_provider",
entrypoint="biliup_next.infra.adapters.groq_legacy:LegacyGroqTranscribeProvider",
capabilities=["transcribe"],
enabled_by_default=True,
)
def __init__(self, next_root: Path):
self.next_root = next_root
self.legacy_root = legacy_project_root(next_root)
self.python_bin = self._resolve_python_bin()
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
session_dir = Path(str(settings.get("session_dir", str(self.legacy_root / "session"))))
work_dir = (session_dir / task.title).resolve()
cmd = [
self.python_bin,
"video2srt.py",
source_video.path,
str(work_dir),
]
env = {
**os.environ,
"GROQ_API_KEY": str(settings.get("groq_api_key", "")),
"FFMPEG_BIN": str(settings.get("ffmpeg_bin", "ffmpeg")),
}
result = subprocess.run(
cmd,
cwd=str(self.legacy_root),
capture_output=True,
text=True,
env=env,
)
if result.returncode != 0:
raise ModuleError(
code="TRANSCRIBE_FAILED",
message="legacy video2srt.py 执行失败",
retryable=True,
details={"stderr": result.stderr[-2000:], "stdout": result.stdout[-2000:]},
)
srt_path = work_dir / f"{task.title}.srt"
if not srt_path.exists():
raise ModuleError(
code="TRANSCRIBE_OUTPUT_MISSING",
message=f"未找到字幕文件: {srt_path}",
retryable=False,
)
return Artifact(
id=None,
task_id=task.id,
artifact_type="subtitle_srt",
path=str(srt_path),
metadata_json=json.dumps({"provider": "groq_legacy"}),
created_at=utc_now_iso(),
)
def _resolve_python_bin(self) -> str:
venv_python = self.legacy_root / ".venv" / "bin" / "python"
if venv_python.exists():
return str(venv_python)
return "python"

View File

@ -0,0 +1,27 @@
from __future__ import annotations
from pathlib import Path
class CommentFlagMigrationService:
def migrate(self, session_dir: Path) -> dict[str, int]:
migrated_split_flags = 0
legacy_untracked_full = 0
if not session_dir.exists():
return {"migrated_split_flags": 0, "legacy_untracked_full": 0}
for folder in sorted(p for p in session_dir.iterdir() if p.is_dir()):
comment_done = folder / "comment_done.flag"
split_done = folder / "comment_split_done.flag"
full_done = folder / "comment_full_done.flag"
if not comment_done.exists():
continue
if not split_done.exists():
split_done.touch()
migrated_split_flags += 1
if not full_done.exists():
legacy_untracked_full += 1
return {
"migrated_split_flags": migrated_split_flags,
"legacy_untracked_full": legacy_untracked_full,
}

View File

@ -0,0 +1,78 @@
from __future__ import annotations
import sqlite3
from pathlib import Path
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS tasks (
id TEXT PRIMARY KEY,
source_type TEXT NOT NULL,
source_path TEXT NOT NULL,
title TEXT NOT NULL,
status TEXT NOT NULL,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS task_steps (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL,
step_name TEXT NOT NULL,
status TEXT NOT NULL,
error_code TEXT,
error_message TEXT,
retry_count INTEGER NOT NULL DEFAULT 0,
started_at TEXT,
finished_at TEXT,
FOREIGN KEY(task_id) REFERENCES tasks(id)
);
CREATE TABLE IF NOT EXISTS artifacts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL,
artifact_type TEXT NOT NULL,
path TEXT NOT NULL,
metadata_json TEXT NOT NULL,
created_at TEXT NOT NULL,
FOREIGN KEY(task_id) REFERENCES tasks(id)
);
CREATE TABLE IF NOT EXISTS publish_records (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL,
platform TEXT NOT NULL,
aid TEXT,
bvid TEXT,
title TEXT NOT NULL,
published_at TEXT NOT NULL,
FOREIGN KEY(task_id) REFERENCES tasks(id)
);
CREATE TABLE IF NOT EXISTS action_records (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT,
action_name TEXT NOT NULL,
status TEXT NOT NULL,
summary TEXT NOT NULL,
details_json TEXT NOT NULL,
created_at TEXT NOT NULL,
FOREIGN KEY(task_id) REFERENCES tasks(id)
);
"""
class Database:
def __init__(self, db_path: Path):
self.db_path = db_path
def connect(self) -> sqlite3.Connection:
self.db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
def initialize(self) -> None:
with self.connect() as conn:
conn.executescript(SCHEMA_SQL)
conn.commit()

View File

@ -0,0 +1,68 @@
from __future__ import annotations
import json
import shutil
from pathlib import Path
from typing import Any
from biliup_next.core.config import SettingsService
class LegacyAssetSync:
def __init__(self, root_dir: Path):
self.root_dir = root_dir
self.runtime_dir = self.root_dir / "runtime"
self.settings_service = SettingsService(root_dir)
def sync(self) -> dict[str, Any]:
self.runtime_dir.mkdir(parents=True, exist_ok=True)
bundle = self.settings_service.load()
settings = json.loads(json.dumps(bundle.settings))
copied: list[dict[str, str]] = []
missing: list[str] = []
copied_pairs: set[tuple[str, str]] = set()
mapping = [
("paths", "cookies_file", "runtime/cookies.json"),
("paths", "upload_config_file", "runtime/upload_config.json"),
("publish", "cookie_file", "runtime/cookies.json"),
]
for group, field, target_rel in mapping:
current = Path(str(settings[group][field]))
current_abs = current if current.is_absolute() else (self.root_dir / current).resolve()
target_abs = (self.root_dir / target_rel).resolve()
if current_abs == target_abs and target_abs.exists():
continue
if current_abs.exists():
shutil.copy2(current_abs, target_abs)
settings[group][field] = target_rel
pair = (str(current_abs), str(target_abs))
if pair not in copied_pairs:
copied_pairs.add(pair)
copied.append({"from": pair[0], "to": pair[1]})
else:
missing.append(f"{group}.{field}:{current_abs}")
publish_path = Path(str(settings["publish"]["biliup_path"]))
publish_abs = publish_path if publish_path.is_absolute() else (self.root_dir / publish_path).resolve()
local_biliup = self.root_dir / "runtime" / "biliup"
if publish_abs.exists() and publish_abs != local_biliup:
shutil.copy2(publish_abs, local_biliup)
local_biliup.chmod(0o755)
settings["publish"]["biliup_path"] = "runtime/biliup"
pair = (str(publish_abs), str(local_biliup))
if pair not in copied_pairs:
copied_pairs.add(pair)
copied.append({"from": pair[0], "to": pair[1]})
self.settings_service.save_staged(settings)
self.settings_service.promote_staged()
return {
"ok": True,
"runtime_dir": str(self.runtime_dir),
"copied": copied,
"missing": missing,
}

View File

@ -0,0 +1,7 @@
from __future__ import annotations
from pathlib import Path
def legacy_project_root(next_root: Path) -> Path:
return next_root.parent

View File

@ -0,0 +1,42 @@
from __future__ import annotations
from pathlib import Path
ALLOWED_LOG_FILES = {
"monitor.log": Path("/home/theshy/biliup/logs/system/monitor.log"),
"monitorSrt.log": Path("/home/theshy/biliup/logs/system/monitorSrt.log"),
"monitorSongs.log": Path("/home/theshy/biliup/logs/system/monitorSongs.log"),
"upload.log": Path("/home/theshy/biliup/logs/system/upload.log"),
"session_top_comment.py.log": Path("/home/theshy/biliup/logs/system/session_top_comment.py.log"),
"add_to_collection.py.log": Path("/home/theshy/biliup/logs/system/add_to_collection.py.log"),
}
class LogReader:
def list_logs(self) -> dict[str, object]:
return {
"items": [
{
"name": name,
"path": str(path),
"exists": path.exists(),
}
for name, path in sorted(ALLOWED_LOG_FILES.items())
]
}
def tail(self, name: str, lines: int = 200, contains: str | None = None) -> dict[str, object]:
if name not in ALLOWED_LOG_FILES:
raise ValueError(f"unsupported log: {name}")
path = ALLOWED_LOG_FILES[name]
if not path.exists():
return {"name": name, "path": str(path), "exists": False, "content": ""}
content = path.read_text(encoding="utf-8", errors="replace").splitlines()
if contains:
content = [line for line in content if contains in line]
return {
"name": name,
"path": str(path),
"exists": True,
"content": "\n".join(content[-max(1, min(lines, 1000)):]),
}

View File

@ -0,0 +1,61 @@
from __future__ import annotations
import importlib
import inspect
import json
from pathlib import Path
from typing import Any
from biliup_next.core.providers import ProviderManifest
class PluginLoader:
def __init__(self, root_dir: Path):
self.root_dir = root_dir
self.manifests_dir = self.root_dir / "src" / "biliup_next" / "plugins" / "manifests"
def load_manifests(self) -> list[ProviderManifest]:
manifests: list[ProviderManifest] = []
if not self.manifests_dir.exists():
return manifests
for path in sorted(self.manifests_dir.glob("*.json")):
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
manifests.append(
ProviderManifest(
id=data["id"],
name=data["name"],
version=data["version"],
provider_type=data["provider_type"],
entrypoint=data["entrypoint"],
capabilities=data["capabilities"],
config_schema=data.get("config_schema"),
enabled_by_default=data.get("enabled_by_default", True),
)
)
return manifests
def instantiate_provider(self, manifest: ProviderManifest) -> Any:
module_name, _, attr_name = manifest.entrypoint.partition(":")
if not module_name or not attr_name:
raise ValueError(f"invalid provider entrypoint: {manifest.entrypoint}")
module = importlib.import_module(module_name)
provider_cls = getattr(module, attr_name)
kwargs = self._build_constructor_kwargs(provider_cls)
return provider_cls(**kwargs)
def _build_constructor_kwargs(self, provider_cls: type[Any]) -> dict[str, Any]:
signature = inspect.signature(provider_cls)
kwargs: dict[str, Any] = {}
for name, parameter in signature.parameters.items():
if name == "self":
continue
if parameter.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}:
continue
if name in {"root", "next_root", "root_dir"}:
kwargs[name] = self.root_dir
continue
if parameter.default is not inspect._empty:
continue
raise ValueError(f"unsupported provider constructor parameter: {provider_cls.__name__}.{name}")
return kwargs

View File

@ -0,0 +1,54 @@
from __future__ import annotations
import shutil
from pathlib import Path
from biliup_next.core.config import SettingsService
class RuntimeDoctor:
def __init__(self, root_dir: Path):
self.root_dir = root_dir
self.settings_service = SettingsService(root_dir)
def run(self) -> dict[str, object]:
bundle = self.settings_service.load()
settings = bundle.settings
checks: list[dict[str, object]] = []
for group, name in (
("paths", "stage_dir"),
("paths", "backup_dir"),
("paths", "session_dir"),
):
path = (self.root_dir / settings[group][name]).resolve()
checks.append({"name": f"{group}.{name}", "ok": path.exists(), "detail": str(path)})
for group, name in (
("paths", "cookies_file"),
("paths", "upload_config_file"),
):
path = (self.root_dir / settings[group][name]).resolve()
detail = str(path)
if path.exists() and not str(path).startswith(str(self.root_dir)):
detail = f"{path} (external)"
checks.append({"name": f"{group}.{name}", "ok": path.exists(), "detail": detail})
for group, name in (
("ingest", "ffprobe_bin"),
("transcribe", "ffmpeg_bin"),
("song_detect", "codex_cmd"),
("publish", "biliup_path"),
):
value = settings[group][name]
found = shutil.which(value) if "/" not in value else str((self.root_dir / value).resolve())
ok = bool(found) and (Path(found).exists() if "/" in str(found) else True)
detail = str(found or value)
if ok and "/" in detail and not detail.startswith(str(self.root_dir)):
detail = f"{detail} (external)"
checks.append({"name": f"{group}.{name}", "ok": ok, "detail": detail})
return {
"ok": all(item["ok"] for item in checks),
"checks": checks,
}

View File

@ -0,0 +1,93 @@
from __future__ import annotations
import shutil
import uuid
from pathlib import Path
from typing import BinaryIO
from biliup_next.core.errors import ModuleError
from biliup_next.infra.storage_guard import ensure_free_space
class StageImporter:
def import_file(self, source_path: Path, stage_dir: Path, *, min_free_bytes: int = 0) -> dict[str, object]:
if not source_path.exists():
raise FileNotFoundError(f"source not found: {source_path}")
if not source_path.is_file():
raise IsADirectoryError(f"source is not a file: {source_path}")
stage_dir.mkdir(parents=True, exist_ok=True)
source_size = source_path.stat().st_size
ensure_free_space(
stage_dir,
source_size + max(0, int(min_free_bytes)),
code="STAGE_IMPORT_NO_SPACE",
message=f"stage 剩余空间不足,无法导入: {source_path.name}",
retryable=False,
details={"source_size_bytes": source_size},
)
target_path = stage_dir / source_path.name
if target_path.exists():
target_path = self._unique_target(stage_dir, source_path.name)
temp_path = self._temp_target(stage_dir, target_path.name)
try:
shutil.copyfile(source_path, temp_path)
shutil.copymode(source_path, temp_path)
temp_path.replace(target_path)
except Exception:
temp_path.unlink(missing_ok=True)
raise
return {
"source_path": str(source_path.resolve()),
"target_path": str(target_path.resolve()),
}
def import_upload(self, filename: str, fileobj: BinaryIO, stage_dir: Path, *, min_free_bytes: int = 0) -> dict[str, object]:
if not filename:
raise ValueError("missing filename")
stage_dir.mkdir(parents=True, exist_ok=True)
ensure_free_space(
stage_dir,
max(0, int(min_free_bytes)),
code="STAGE_UPLOAD_NO_SPACE",
message=f"stage 剩余空间不足,无法接收上传: {Path(filename).name}",
retryable=False,
)
target_path = stage_dir / Path(filename).name
if target_path.exists():
target_path = self._unique_target(stage_dir, Path(filename).name)
temp_path = self._temp_target(stage_dir, target_path.name)
try:
with temp_path.open("wb") as f:
shutil.copyfileobj(fileobj, f)
temp_path.replace(target_path)
except OSError as exc:
temp_path.unlink(missing_ok=True)
if getattr(exc, "errno", None) == 28:
raise ModuleError(
code="STAGE_UPLOAD_NO_SPACE",
message=f"stage 剩余空间不足,上传中断: {Path(filename).name}",
retryable=False,
) from exc
raise
except Exception:
temp_path.unlink(missing_ok=True)
raise
return {
"uploaded_filename": Path(filename).name,
"target_path": str(target_path.resolve()),
}
@staticmethod
def _unique_target(stage_dir: Path, filename: str) -> Path:
base = Path(filename).stem
suffix = Path(filename).suffix
index = 1
while True:
candidate = stage_dir / f"{base}.{index}{suffix}"
if not candidate.exists():
return candidate
index += 1
@staticmethod
def _temp_target(stage_dir: Path, filename: str) -> Path:
return stage_dir / f".{filename}.{uuid.uuid4().hex}.part"

View File

@ -0,0 +1,41 @@
from __future__ import annotations
import shutil
from pathlib import Path
from biliup_next.core.errors import ModuleError
def mb_to_bytes(value: object) -> int:
try:
number = int(value or 0)
except (TypeError, ValueError):
number = 0
return max(0, number) * 1024 * 1024
def free_bytes_for_path(path: Path) -> int:
target = path if path.exists() else path.parent
return int(shutil.disk_usage(target).free)
def ensure_free_space(
path: Path,
required_free_bytes: int,
*,
code: str,
message: str,
retryable: bool,
details: dict[str, object] | None = None,
) -> None:
free_bytes = free_bytes_for_path(path)
if free_bytes >= required_free_bytes:
return
payload = {
"path": str(path),
"required_free_bytes": int(required_free_bytes),
"available_free_bytes": int(free_bytes),
}
if details:
payload.update(details)
raise ModuleError(code=code, message=message, retryable=retryable, details=payload)

View File

@ -0,0 +1,83 @@
from __future__ import annotations
import subprocess
ALLOWED_SERVICES = {
"biliup-next-worker.service",
"biliup-next-api.service",
"biliup-python.service",
}
ALLOWED_ACTIONS = {"start", "stop", "restart"}
class SystemdRuntime:
def list_services(self) -> dict[str, object]:
items = []
for service in sorted(ALLOWED_SERVICES):
items.append(self._inspect_service(service))
return {"items": items}
def act(self, service: str, action: str) -> dict[str, object]:
if service not in ALLOWED_SERVICES:
raise ValueError(f"unsupported service: {service}")
if action not in ALLOWED_ACTIONS:
raise ValueError(f"unsupported action: {action}")
result = subprocess.run(
["sudo", "systemctl", action, service],
capture_output=True,
text=True,
check=False,
)
payload = self._inspect_service(service)
payload["action"] = action
payload["command_ok"] = result.returncode == 0
payload["stderr"] = (result.stderr or "").strip()
payload["stdout"] = (result.stdout or "").strip()
return payload
def _inspect_service(self, service: str) -> dict[str, object]:
show = subprocess.run(
[
"systemctl",
"show",
service,
"--property=Id,Description,LoadState,ActiveState,SubState,MainPID,ExecMainStatus,FragmentPath",
],
capture_output=True,
text=True,
check=False,
)
info = {
"id": service,
"description": "",
"load_state": "unknown",
"active_state": "unknown",
"sub_state": "unknown",
"main_pid": 0,
"exec_main_status": None,
"fragment_path": "",
}
for line in (show.stdout or "").splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
if key == "Id":
info["id"] = value
elif key == "Description":
info["description"] = value
elif key == "LoadState":
info["load_state"] = value
elif key == "ActiveState":
info["active_state"] = value
elif key == "SubState":
info["sub_state"] = value
elif key == "MainPID":
try:
info["main_pid"] = int(value)
except ValueError:
info["main_pid"] = 0
elif key == "ExecMainStatus":
info["exec_main_status"] = value
elif key == "FragmentPath":
info["fragment_path"] = value
return info

View File

@ -0,0 +1,458 @@
from __future__ import annotations
import json
from pathlib import Path
from datetime import datetime, timezone
from biliup_next.core.models import ActionRecord, Artifact, PublishRecord, Task, TaskStep
from biliup_next.infra.db import Database
TASK_STATUS_ORDER = {
"created": 0,
"transcribed": 1,
"songs_detected": 2,
"split_done": 3,
"published": 4,
"commented": 5,
"collection_synced": 6,
"failed_retryable": 7,
"failed_manual": 8,
}
class TaskRepository:
def __init__(self, db: Database):
self.db = db
def upsert_task(self, task: Task) -> None:
with self.db.connect() as conn:
conn.execute(
"""
INSERT INTO tasks (id, source_type, source_path, title, status, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
source_type=excluded.source_type,
source_path=excluded.source_path,
title=excluded.title,
status=excluded.status,
updated_at=excluded.updated_at
""",
(
task.id,
task.source_type,
task.source_path,
task.title,
task.status,
task.created_at,
task.updated_at,
),
)
conn.commit()
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
with self.db.connect() as conn:
conn.execute(
"UPDATE tasks SET status = ?, updated_at = ? WHERE id = ?",
(status, updated_at, task_id),
)
conn.commit()
def list_tasks(self, limit: int = 100) -> list[Task]:
with self.db.connect() as conn:
rows = conn.execute(
"SELECT id, source_type, source_path, title, status, created_at, updated_at "
"FROM tasks ORDER BY updated_at DESC LIMIT ?",
(limit,),
).fetchall()
return [Task(**dict(row)) for row in rows]
def get_task(self, task_id: str) -> Task | None:
with self.db.connect() as conn:
row = conn.execute(
"SELECT id, source_type, source_path, title, status, created_at, updated_at "
"FROM tasks WHERE id = ?",
(task_id,),
).fetchone()
return Task(**dict(row)) if row else None
def delete_task(self, task_id: str) -> None:
with self.db.connect() as conn:
conn.execute("DELETE FROM action_records WHERE task_id = ?", (task_id,))
conn.execute("DELETE FROM publish_records WHERE task_id = ?", (task_id,))
conn.execute("DELETE FROM artifacts WHERE task_id = ?", (task_id,))
conn.execute("DELETE FROM task_steps WHERE task_id = ?", (task_id,))
conn.execute("DELETE FROM tasks WHERE id = ?", (task_id,))
conn.commit()
def replace_steps(self, task_id: str, steps: list[TaskStep]) -> None:
with self.db.connect() as conn:
conn.execute("DELETE FROM task_steps WHERE task_id = ?", (task_id,))
conn.executemany(
"""
INSERT INTO task_steps (
task_id, step_name, status, error_code, error_message,
retry_count, started_at, finished_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
step.task_id,
step.step_name,
step.status,
step.error_code,
step.error_message,
step.retry_count,
step.started_at,
step.finished_at,
)
for step in steps
],
)
conn.commit()
def list_steps(self, task_id: str) -> list[TaskStep]:
with self.db.connect() as conn:
rows = conn.execute(
"""
SELECT id, task_id, step_name, status, error_code, error_message,
retry_count, started_at, finished_at
FROM task_steps
WHERE task_id = ?
ORDER BY id ASC
""",
(task_id,),
).fetchall()
return [TaskStep(**dict(row)) for row in rows]
def update_step_status(
self,
task_id: str,
step_name: str,
status: str,
*,
error_code: str | None = None,
error_message: str | None = None,
retry_count: int | None = None,
started_at: str | None = None,
finished_at: str | None = None,
) -> None:
with self.db.connect() as conn:
current = conn.execute(
"""
SELECT retry_count, started_at, finished_at
FROM task_steps
WHERE task_id = ? AND step_name = ?
""",
(task_id, step_name),
).fetchone()
if current is None:
raise RuntimeError(f"step not found: {task_id}.{step_name}")
conn.execute(
"""
UPDATE task_steps
SET status = ?,
error_code = ?,
error_message = ?,
retry_count = ?,
started_at = ?,
finished_at = ?
WHERE task_id = ? AND step_name = ?
""",
(
status,
error_code,
error_message,
retry_count if retry_count is not None else current["retry_count"],
started_at if started_at is not None else current["started_at"],
finished_at if finished_at is not None else current["finished_at"],
task_id,
step_name,
),
)
conn.commit()
def add_artifact(self, artifact: Artifact) -> None:
with self.db.connect() as conn:
existing = conn.execute(
"""
SELECT 1
FROM artifacts
WHERE task_id = ? AND artifact_type = ? AND path = ?
LIMIT 1
""",
(artifact.task_id, artifact.artifact_type, artifact.path),
).fetchone()
if existing:
return
conn.execute(
"""
INSERT INTO artifacts (task_id, artifact_type, path, metadata_json, created_at)
VALUES (?, ?, ?, ?, ?)
""",
(
artifact.task_id,
artifact.artifact_type,
artifact.path,
artifact.metadata_json,
artifact.created_at,
),
)
conn.commit()
def list_artifacts(self, task_id: str) -> list[Artifact]:
with self.db.connect() as conn:
rows = conn.execute(
"""
SELECT id, task_id, artifact_type, path, metadata_json, created_at
FROM artifacts
WHERE task_id = ?
ORDER BY id ASC
""",
(task_id,),
).fetchall()
return [Artifact(**dict(row)) for row in rows]
def delete_artifacts(self, task_id: str, artifact_type: str) -> None:
with self.db.connect() as conn:
conn.execute(
"DELETE FROM artifacts WHERE task_id = ? AND artifact_type = ?",
(task_id, artifact_type),
)
conn.commit()
def delete_artifact_by_path(self, task_id: str, path: str) -> None:
with self.db.connect() as conn:
conn.execute(
"DELETE FROM artifacts WHERE task_id = ? AND path = ?",
(task_id, path),
)
conn.commit()
def add_publish_record(self, record: PublishRecord) -> None:
with self.db.connect() as conn:
conn.execute(
"""
INSERT INTO publish_records (task_id, platform, aid, bvid, title, published_at)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
record.task_id,
record.platform,
record.aid,
record.bvid,
record.title,
record.published_at,
),
)
conn.commit()
def add_action_record(self, record: ActionRecord) -> None:
with self.db.connect() as conn:
conn.execute(
"""
INSERT INTO action_records (task_id, action_name, status, summary, details_json, created_at)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
record.task_id,
record.action_name,
record.status,
record.summary,
record.details_json,
record.created_at,
),
)
conn.commit()
def list_action_records(
self,
task_id: str | None = None,
limit: int = 100,
*,
action_name: str | None = None,
status: str | None = None,
) -> list[ActionRecord]:
with self.db.connect() as conn:
conditions: list[str] = []
params: list[object] = []
if task_id is not None:
conditions.append("task_id = ?")
params.append(task_id)
if action_name:
conditions.append("action_name = ?")
params.append(action_name)
if status:
conditions.append("status = ?")
params.append(status)
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
rows = conn.execute(
f"""
SELECT id, task_id, action_name, status, summary, details_json, created_at
FROM action_records
{where}
ORDER BY id DESC
LIMIT ?
""",
(*params, limit),
).fetchall()
return [ActionRecord(**dict(row)) for row in rows]
def bootstrap_from_legacy_sessions(self, session_dir: Path) -> int:
synced = 0
if not session_dir.exists():
return synced
for folder in sorted(p for p in session_dir.iterdir() if p.is_dir()):
task_id = folder.name
existing_task = self.get_task(task_id)
derived_status = "created"
if (folder / "transcribe_done.flag").exists():
derived_status = "transcribed"
if (folder / "songs.json").exists():
derived_status = "songs_detected"
if (folder / "split_done.flag").exists():
derived_status = "split_done"
if (folder / "upload_done.flag").exists():
derived_status = "published"
if (folder / "comment_done.flag").exists():
derived_status = "commented"
if (folder / "collection_a_done.flag").exists() or (folder / "collection_b_done.flag").exists():
derived_status = "collection_synced"
effective_status = self._merge_task_status(existing_task.status if existing_task else None, derived_status)
created_at = (
existing_task.created_at
if existing_task and existing_task.created_at
else self._folder_time_iso(folder)
)
updated_at = (
existing_task.updated_at
if existing_task and existing_task.updated_at
else created_at
)
task = Task(
id=task_id,
source_type=existing_task.source_type if existing_task else "legacy_session",
source_path=existing_task.source_path if existing_task else str(folder),
title=folder.name,
status=effective_status,
created_at=created_at,
updated_at=updated_at,
)
self.upsert_task(task)
steps = self._merge_steps(folder, task_id)
self.replace_steps(task_id, steps)
self._bootstrap_artifacts(folder, task_id)
synced += 1
return synced
def _infer_steps(self, folder: Path, task_id: str) -> list[TaskStep]:
flags = {
"ingest": True,
"transcribe": (folder / "transcribe_done.flag").exists(),
"song_detect": (folder / "songs.json").exists(),
"split": (folder / "split_done.flag").exists(),
"publish": (folder / "upload_done.flag").exists(),
"comment": (folder / "comment_done.flag").exists(),
"collection_a": (folder / "collection_a_done.flag").exists(),
"collection_b": (folder / "collection_b_done.flag").exists(),
}
steps: list[TaskStep] = []
for name, done in flags.items():
steps.append(
TaskStep(
id=None,
task_id=task_id,
step_name=name,
status="succeeded" if done else "pending",
error_code=None,
error_message=None,
retry_count=0,
started_at=None,
finished_at=None,
)
)
return steps
def _merge_steps(self, folder: Path, task_id: str) -> list[TaskStep]:
inferred_steps = {step.step_name: step for step in self._infer_steps(folder, task_id)}
current_steps = {step.step_name: step for step in self.list_steps(task_id)}
merged: list[TaskStep] = []
for step_name, inferred in inferred_steps.items():
current = current_steps.get(step_name)
if current is None:
merged.append(inferred)
continue
if inferred.status == "succeeded":
merged.append(
TaskStep(
id=None,
task_id=task_id,
step_name=step_name,
status="succeeded",
error_code=None,
error_message=None,
retry_count=current.retry_count,
started_at=current.started_at,
finished_at=current.finished_at,
)
)
continue
if current.status != "pending":
merged.append(
TaskStep(
id=None,
task_id=task_id,
step_name=step_name,
status=current.status,
error_code=current.error_code,
error_message=current.error_message,
retry_count=current.retry_count,
started_at=current.started_at,
finished_at=current.finished_at,
)
)
continue
merged.append(inferred)
return merged
@staticmethod
def _merge_task_status(existing_status: str | None, derived_status: str) -> str:
if not existing_status:
return derived_status
existing_rank = TASK_STATUS_ORDER.get(existing_status, -1)
derived_rank = TASK_STATUS_ORDER.get(derived_status, -1)
return existing_status if existing_rank >= derived_rank else derived_status
@staticmethod
def _folder_time_iso(folder: Path) -> str:
return datetime.fromtimestamp(folder.stat().st_mtime, tz=timezone.utc).isoformat()
def _bootstrap_artifacts(self, folder: Path, task_id: str) -> None:
artifacts = []
if any(folder.glob("*.srt")):
for srt in folder.glob("*.srt"):
artifacts.append(("subtitle_srt", srt))
for name in ("songs.json", "songs.txt", "bvid.txt"):
path = folder / name
if path.exists():
artifact_type = {
"songs.json": "songs_json",
"songs.txt": "songs_txt",
"bvid.txt": "publish_bvid",
}[name]
artifacts.append((artifact_type, path))
existing = {(a.artifact_type, a.path) for a in self.list_artifacts(task_id)}
for artifact_type, path in artifacts:
key = (artifact_type, str(path))
if key in existing:
continue
self.add_artifact(
Artifact(
id=None,
task_id=task_id,
artifact_type=artifact_type,
path=str(path),
metadata_json=json.dumps({}),
created_at="",
)
)

View File

@ -0,0 +1,163 @@
from __future__ import annotations
import shutil
from pathlib import Path
from biliup_next.core.models import utc_now_iso
from biliup_next.infra.task_repository import TaskRepository
STEP_ORDER = [
"ingest",
"transcribe",
"song_detect",
"split",
"publish",
"comment",
"collection_a",
"collection_b",
]
STATUS_BEFORE_STEP = {
"transcribe": "created",
"song_detect": "transcribed",
"split": "songs_detected",
"publish": "split_done",
"comment": "published",
"collection_a": "commented",
"collection_b": "commented",
}
class TaskResetService:
def __init__(self, repo: TaskRepository):
self.repo = repo
def reset_to_step(self, task_id: str, step_name: str) -> dict[str, object]:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
if step_name not in STEP_ORDER:
raise RuntimeError(f"unsupported step: {step_name}")
work_dir = self._resolve_work_dir(task)
self._cleanup_files(work_dir, step_name)
self._cleanup_artifacts(task_id, step_name)
self._reset_steps(task_id, step_name)
target_status = STATUS_BEFORE_STEP.get(step_name, "created")
self.repo.update_task_status(task_id, target_status, utc_now_iso())
return {"task_id": task_id, "reset_to": step_name, "work_dir": str(work_dir)}
@staticmethod
def _resolve_work_dir(task) -> Path: # type: ignore[no-untyped-def]
source = Path(task.source_path)
return source.parent if source.is_file() else source
@staticmethod
def _remove_path(path: Path) -> None:
if path.is_dir():
shutil.rmtree(path, ignore_errors=True)
elif path.exists():
path.unlink()
def _cleanup_files(self, work_dir: Path, step_name: str) -> None:
cleanup_map = {
"transcribe": [
work_dir / "transcribe_done.flag",
work_dir / "song_schema.json",
work_dir / "songs.json",
work_dir / "songs.txt",
work_dir / "split_done.flag",
work_dir / "upload_done.flag",
work_dir / "comment_done.flag",
work_dir / "comment_split_done.flag",
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
work_dir / "bvid.txt",
work_dir / "temp_audio",
work_dir / "split_video",
],
"song_detect": [
work_dir / "song_schema.json",
work_dir / "songs.json",
work_dir / "songs.txt",
work_dir / "split_done.flag",
work_dir / "upload_done.flag",
work_dir / "comment_done.flag",
work_dir / "comment_split_done.flag",
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
work_dir / "bvid.txt",
work_dir / "split_video",
],
"split": [
work_dir / "split_done.flag",
work_dir / "upload_done.flag",
work_dir / "comment_done.flag",
work_dir / "comment_split_done.flag",
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
work_dir / "bvid.txt",
work_dir / "split_video",
],
"publish": [
work_dir / "upload_done.flag",
work_dir / "comment_done.flag",
work_dir / "comment_split_done.flag",
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
work_dir / "bvid.txt",
],
"comment": [
work_dir / "comment_done.flag",
work_dir / "comment_split_done.flag",
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
],
"collection_a": [
work_dir / "collection_a_done.flag",
],
"collection_b": [
work_dir / "collection_b_done.flag",
],
}
for path in cleanup_map.get(step_name, []):
self._remove_path(path)
if step_name == "transcribe":
for srt_file in work_dir.glob("*.srt"):
self._remove_path(srt_file)
def _cleanup_artifacts(self, task_id: str, step_name: str) -> None:
type_map = {
"transcribe": {"subtitle_srt", "songs_json", "songs_txt", "clip_video", "publish_bvid"},
"song_detect": {"songs_json", "songs_txt", "clip_video", "publish_bvid"},
"split": {"clip_video", "publish_bvid"},
"publish": {"publish_bvid"},
"comment": set(),
"collection_a": set(),
"collection_b": set(),
}
for artifact_type in type_map.get(step_name, set()):
self.repo.delete_artifacts(task_id, artifact_type)
def _reset_steps(self, task_id: str, step_name: str) -> None:
reset_index = STEP_ORDER.index(step_name)
for index, current_step in enumerate(STEP_ORDER):
if current_step == "ingest":
continue
if index < reset_index:
continue
self.repo.update_step_status(
task_id,
current_step,
"pending",
error_code=None,
error_message=None,
retry_count=0,
started_at=None,
finished_at=None,
)

View File

@ -0,0 +1,40 @@
from __future__ import annotations
import shutil
from pathlib import Path
from biliup_next.infra.task_repository import TaskRepository
class WorkspaceCleanupService:
def __init__(self, repo: TaskRepository):
self.repo = repo
def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
session_dir = Path(str(settings["session_dir"])) / task.title
removed: list[str] = []
skipped: list[str] = []
if settings.get("delete_source_video_after_collection_synced", False):
source_path = Path(task.source_path)
if source_path.exists():
source_path.unlink()
self.repo.delete_artifact_by_path(task_id, str(source_path.resolve()))
removed.append(str(source_path))
else:
skipped.append(str(source_path))
if settings.get("delete_split_videos_after_collection_synced", False):
split_dir = session_dir / "split_video"
if split_dir.exists():
shutil.rmtree(split_dir, ignore_errors=True)
self.repo.delete_artifacts(task_id, "clip_video")
removed.append(str(split_dir))
else:
skipped.append(str(split_dir))
return {"removed": removed, "skipped": skipped}