feat: professionalize control plane and standalone delivery

This commit is contained in:
theshy
2026-04-07 10:46:30 +08:00
parent d0cf1fd0df
commit 862db502b0
100 changed files with 8313 additions and 1483 deletions

View File

@ -0,0 +1,151 @@
from __future__ import annotations
import json
import random
import time
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Task
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
class BilibiliCollectionProvider:
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
self._section_cache: dict[int, int | None] = {}
manifest = ProviderManifest(
id="bilibili_collection",
name="Bilibili Collection Provider",
version="0.1.0",
provider_type="collection_provider",
entrypoint="biliup_next.modules.collection.providers.bilibili_collection:BilibiliCollectionProvider",
capabilities=["collection"],
enabled_by_default=True,
)
def sync(self, task: Task, target: str, settings: dict[str, Any]) -> dict[str, object]:
session_dir = Path(str(settings["session_dir"])) / task.title
cookies = self.bilibili_api.load_cookies(Path(str(settings["cookies_file"])))
csrf = cookies.get("bili_jct")
if not csrf:
raise ModuleError(code="COOKIE_CSRF_MISSING", message="Cookie 缺少 bili_jct", retryable=False)
session = self.bilibili_api.build_session(
cookies=cookies,
referer="https://member.bilibili.com/platform/upload-manager/distribution",
)
if target == "a":
season_id = int(settings["season_id_a"])
bvid = resolve_full_video_bvid(task.title, session_dir, settings)
if not bvid:
(session_dir / "collection_a_done.flag").touch()
return {"status": "skipped", "reason": "full_video_bvid_not_found"}
flag_path = session_dir / "collection_a_done.flag"
else:
season_id = int(settings["season_id_b"])
bvid_path = session_dir / "bvid.txt"
if not bvid_path.exists():
raise ModuleError(code="COLLECTION_BVID_MISSING", message=f"缺少 bvid.txt: {session_dir}", retryable=True)
bvid = bvid_path.read_text(encoding="utf-8").strip()
flag_path = session_dir / "collection_b_done.flag"
if season_id <= 0:
flag_path.touch()
return {"status": "skipped", "reason": "season_disabled"}
section_id = self._resolve_section_id(session, season_id)
if not section_id:
raise ModuleError(code="COLLECTION_SECTION_NOT_FOUND", message=f"未找到合集 section: {season_id}", retryable=True)
info = self._get_video_info(session, bvid)
add_result = self._add_videos_batch(session, csrf, section_id, [info])
if add_result["status"] == "failed":
raise ModuleError(
code="COLLECTION_ADD_FAILED",
message=str(add_result["message"]),
retryable=True,
details=add_result,
)
flag_path.touch()
if add_result["status"] == "added":
append_key = "append_collection_a_new_to_end" if target == "a" else "append_collection_b_new_to_end"
if settings.get(append_key, True):
self._move_videos_to_section_end(session, csrf, section_id, [int(info["aid"])])
return {"status": add_result["status"], "target": target, "bvid": bvid, "season_id": season_id}
def _resolve_section_id(self, session, season_id: int) -> int | None: # type: ignore[no-untyped-def]
if season_id in self._section_cache:
return self._section_cache[season_id]
result = self.bilibili_api.list_seasons(session)
if result.get("code") != 0:
return None
for season in result.get("data", {}).get("seasons", []):
if season.get("season", {}).get("id") == season_id:
sections = season.get("sections", {}).get("sections", [])
section_id = sections[0]["id"] if sections else None
self._section_cache[season_id] = section_id
return section_id
self._section_cache[season_id] = None
return None
def _get_video_info(self, session, bvid: str) -> dict[str, object]: # type: ignore[no-untyped-def]
data = self.bilibili_api.get_video_view(
session,
bvid,
error_code="COLLECTION_VIDEO_INFO_FAILED",
error_message="获取视频信息失败",
)
return {"aid": data["aid"], "cid": data["cid"], "title": data["title"], "charging_pay": 0}
def _add_videos_batch(self, session, csrf: str, section_id: int, episodes: list[dict[str, object]]) -> dict[str, object]: # type: ignore[no-untyped-def]
time.sleep(random.uniform(5.0, 10.0))
result = self.bilibili_api.add_section_episodes(
session,
csrf=csrf,
section_id=section_id,
episodes=episodes,
)
if result.get("code") == 0:
return {"status": "added"}
if result.get("code") == 20080:
return {"status": "already_exists", "message": result.get("message", "")}
return {"status": "failed", "message": result.get("message", "unknown error"), "code": result.get("code")}
def _move_videos_to_section_end(self, session, csrf: str, section_id: int, added_aids: list[int]) -> bool: # type: ignore[no-untyped-def]
detail = self.bilibili_api.get_section_detail(session, section_id=section_id)
if detail.get("code") != 0:
return False
section = detail.get("data", {}).get("section", {})
episodes = detail.get("data", {}).get("episodes", []) or []
if not episodes:
return True
target_aids = {int(aid) for aid in added_aids}
existing = []
appended = []
for episode in episodes:
item = {"id": episode.get("id")}
if item["id"] is None:
continue
if episode.get("aid") in target_aids:
appended.append(item)
else:
existing.append(item)
ordered = existing + appended
payload = {
"section": {
"id": section["id"],
"seasonId": section["seasonId"],
"title": section["title"],
"type": section["type"],
},
"sorts": [{"id": item["id"], "sort": index + 1} for index, item in enumerate(ordered)],
}
result = self.bilibili_api.edit_section(session, csrf=csrf, payload=payload)
return result.get("code") == 0

View File

@ -0,0 +1,161 @@
from __future__ import annotations
import json
import time
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Task
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
class BilibiliTopCommentProvider:
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
manifest = ProviderManifest(
id="bilibili_top_comment",
name="Bilibili Top Comment Provider",
version="0.1.0",
provider_type="comment_provider",
entrypoint="biliup_next.modules.comment.providers.bilibili_top_comment:BilibiliTopCommentProvider",
capabilities=["comment"],
enabled_by_default=True,
)
def comment(self, task: Task, settings: dict[str, Any]) -> dict[str, object]:
session_dir = Path(str(settings["session_dir"])) / task.title
songs_path = session_dir / "songs.txt"
songs_json_path = session_dir / "songs.json"
bvid_path = session_dir / "bvid.txt"
if not songs_path.exists() or not bvid_path.exists():
raise ModuleError(
code="COMMENT_INPUT_MISSING",
message=f"缺少评论所需文件: {session_dir}",
retryable=True,
)
timeline_content = songs_path.read_text(encoding="utf-8").strip()
split_content = self._build_split_comment_content(songs_json_path, songs_path)
if not timeline_content and not split_content:
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
return {"status": "skipped", "reason": "comment_content_empty"}
cookies = self.bilibili_api.load_cookies(Path(str(settings["cookies_file"])))
csrf = cookies.get("bili_jct")
if not csrf:
raise ModuleError(code="COOKIE_CSRF_MISSING", message="Cookie 缺少 bili_jct", retryable=False)
session = self.bilibili_api.build_session(
cookies=cookies,
referer="https://www.bilibili.com/",
origin="https://www.bilibili.com",
)
split_result = {"status": "skipped", "reason": "disabled"}
full_result = {"status": "skipped", "reason": "disabled"}
split_done = (session_dir / "comment_split_done.flag").exists()
full_done = (session_dir / "comment_full_done.flag").exists()
if settings.get("post_split_comment", True) and not split_done:
split_bvid = bvid_path.read_text(encoding="utf-8").strip()
if split_content:
split_result = self._post_and_top_comment(session, csrf, split_bvid, split_content, "split")
else:
split_result = {"status": "skipped", "reason": "split_comment_empty"}
split_done = True
(session_dir / "comment_split_done.flag").touch()
elif not split_done:
split_done = True
(session_dir / "comment_split_done.flag").touch()
if settings.get("post_full_video_timeline_comment", True) and not full_done:
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
if full_bvid and timeline_content:
full_result = self._post_and_top_comment(session, csrf, full_bvid, timeline_content, "full")
else:
reason = "full_video_bvid_not_found" if not full_bvid else "timeline_comment_empty"
full_result = {"status": "skipped", "reason": reason}
full_done = True
(session_dir / "comment_full_done.flag").touch()
elif not full_done:
full_done = True
(session_dir / "comment_full_done.flag").touch()
if split_done and full_done:
(session_dir / "comment_done.flag").touch()
return {"status": "ok", "split": split_result, "full": full_result}
def _post_and_top_comment(
self,
session,
csrf: str,
bvid: str,
content: str,
target: str,
) -> dict[str, object]:
view = self.bilibili_api.get_video_view(
session,
bvid,
error_code="COMMENT_VIEW_FAILED",
error_message=f"获取{target}视频信息失败",
)
aid = int(view["aid"])
add_res = self.bilibili_api.add_reply(
session,
csrf=csrf,
aid=aid,
content=content,
error_message=f"发布{target}评论失败",
)
rpid = int(add_res["rpid"])
time.sleep(3)
self.bilibili_api.top_reply(
session,
csrf=csrf,
aid=aid,
rpid=rpid,
error_message=f"置顶{target}评论失败",
)
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
@staticmethod
def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
if songs_json_path.exists():
try:
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
lines = []
for index, song in enumerate(data.get("songs", []), 1):
title = str(song.get("title", "")).strip()
artist = str(song.get("artist", "")).strip()
if not title:
continue
suffix = f"{artist}" if artist else ""
lines.append(f"{index}. {title}{suffix}")
if lines:
return "\n".join(lines)
except json.JSONDecodeError:
pass
if songs_txt_path.exists():
lines = []
for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
text = raw.strip()
if not text:
continue
parts = text.split(" ", 1)
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
lines.append(f"{index}. {song_text}")
return "\n".join(lines)
return ""
@staticmethod
def _touch_comment_flags(session_dir: Path, *, split_done: bool, full_done: bool) -> None:
if split_done:
(session_dir / "comment_split_done.flag").touch()
if full_done:
(session_dir / "comment_full_done.flag").touch()
if split_done and full_done:
(session_dir / "comment_done.flag").touch()

View File

@ -1,26 +1,51 @@
from __future__ import annotations
import json
import re
import shutil
import subprocess
import time
from datetime import datetime, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, TaskStep, utc_now_iso
from biliup_next.core.models import Artifact, Task, TaskContext, TaskStep, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
SHANGHAI_TZ = ZoneInfo("Asia/Shanghai")
TITLE_PATTERN = re.compile(
r"^(?P<streamer>.+?)\s+(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分"
)
class IngestService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def create_task_from_file(self, source_path: Path, settings: dict[str, object]) -> Task:
def create_task_from_file(
self,
source_path: Path,
settings: dict[str, object],
*,
context_payload: dict[str, object] | None = None,
) -> Task:
provider_id = str(settings.get("provider", "local_file"))
provider = self.registry.get("ingest_provider", provider_id)
provider.validate_source(source_path, settings)
source_path = source_path.resolve()
session_dir = Path(str(settings["session_dir"])).resolve()
try:
source_path.relative_to(session_dir)
except ValueError as exc:
raise ModuleError(
code="SOURCE_OUTSIDE_WORKSPACE",
message=f"源文件不在 session 工作区内: {source_path}",
retryable=False,
details={"session_dir": str(session_dir), "hint": "请先使用 stage/import 或 stage/upload 导入文件"},
) from exc
task_id = source_path.stem
if self.repo.get_task(task_id):
@ -31,10 +56,11 @@ class IngestService:
)
now = utc_now_iso()
context_payload = context_payload or {}
task = Task(
id=task_id,
source_type="local_file",
source_path=str(source_path.resolve()),
source_path=str(source_path),
title=source_path.stem,
status="created",
created_at=now,
@ -59,11 +85,22 @@ class IngestService:
id=None,
task_id=task_id,
artifact_type="source_video",
path=str(source_path.resolve()),
path=str(source_path),
metadata_json=json.dumps({"provider": provider_id}),
created_at=now,
)
)
context = self._build_task_context(
task,
context_payload,
created_at=now,
updated_at=now,
session_gap_minutes=int(settings.get("session_gap_minutes", 60)),
)
self.repo.upsert_task_context(context)
full_video_bvid = (context.full_video_bvid or "").strip()
if full_video_bvid.startswith("BV"):
(source_path.parent / "full_video_bvid.txt").write_text(full_video_bvid, encoding="utf-8")
return task
def scan_stage(self, settings: dict[str, object]) -> dict[str, object]:
@ -123,10 +160,27 @@ class IngestService:
)
continue
sidecar_meta = self._load_sidecar_metadata(
source_path,
enabled=bool(settings.get("meta_sidecar_enabled", True)),
suffix=str(settings.get("meta_sidecar_suffix", ".meta.json")),
)
task_dir = session_dir / task_id
task_dir.mkdir(parents=True, exist_ok=True)
target_source = self._move_to_directory(source_path, task_dir)
task = self.create_task_from_file(target_source, settings)
if sidecar_meta["meta_path"] is not None:
self._move_optional_metadata_file(sidecar_meta["meta_path"], task_dir)
context_payload = {
"source_title": source_path.stem,
"segment_duration_seconds": duration_seconds,
"segment_started_at": sidecar_meta["payload"].get("segment_started_at"),
"streamer": sidecar_meta["payload"].get("streamer"),
"room_id": sidecar_meta["payload"].get("room_id"),
"session_key": sidecar_meta["payload"].get("session_key"),
"full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or source_path.stat().st_mtime,
}
task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
accepted.append(
{
"task_id": task.id,
@ -199,3 +253,202 @@ class IngestService:
if not candidate.exists():
return candidate
index += 1
@staticmethod
def _load_sidecar_metadata(source_path: Path, *, enabled: bool, suffix: str) -> dict[str, object]:
if not enabled:
return {"meta_path": None, "payload": {}}
suffix = suffix.strip() or ".meta.json"
meta_path = source_path.with_name(f"{source_path.stem}{suffix}")
payload: dict[str, object] = {}
if meta_path.exists():
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
raise ModuleError(
code="STAGE_META_INVALID",
message=f"元数据文件不是合法 JSON: {meta_path.name}",
retryable=False,
) from exc
if not isinstance(payload, dict):
raise ModuleError(
code="STAGE_META_INVALID",
message=f"元数据文件必须是对象: {meta_path.name}",
retryable=False,
)
return {"meta_path": meta_path if meta_path.exists() else None, "payload": payload}
def _move_optional_metadata_file(self, meta_path: Path, task_dir: Path) -> None:
if not meta_path.exists():
return
self._move_to_directory(meta_path, task_dir)
def _build_task_context(
self,
task: Task,
context_payload: dict[str, object],
*,
created_at: str,
updated_at: str,
session_gap_minutes: int,
) -> TaskContext:
source_title = self._clean_text(context_payload.get("source_title")) or task.title
streamer = self._clean_text(context_payload.get("streamer"))
room_id = self._clean_text(context_payload.get("room_id"))
session_key = self._clean_text(context_payload.get("session_key"))
full_video_bvid = self._clean_bvid(context_payload.get("full_video_bvid"))
segment_duration = self._coerce_float(context_payload.get("segment_duration_seconds"))
segment_started_at = self._coerce_iso_datetime(context_payload.get("segment_started_at"))
if streamer is None or segment_started_at is None:
inferred = self._infer_from_title(
source_title,
reference_timestamp=context_payload.get("reference_timestamp"),
)
if streamer is None:
streamer = inferred.get("streamer")
if segment_started_at is None:
segment_started_at = inferred.get("segment_started_at")
if session_key is None:
session_key, inherited_bvid = self._infer_session_key(
streamer=streamer,
room_id=room_id,
segment_started_at=segment_started_at,
segment_duration_seconds=segment_duration,
fallback_task_id=task.id,
gap_minutes=session_gap_minutes,
)
if full_video_bvid is None:
full_video_bvid = inherited_bvid
elif full_video_bvid is None:
full_video_bvid = self._find_full_video_bvid_by_session_key(session_key)
if full_video_bvid is None:
binding = self.repo.get_session_binding(session_key=session_key, source_title=source_title)
if binding is not None:
if session_key is None and binding.session_key:
session_key = binding.session_key
full_video_bvid = self._clean_bvid(binding.full_video_bvid)
if session_key is None:
session_key = f"task:{task.id}"
return TaskContext(
id=None,
task_id=task.id,
session_key=session_key,
streamer=streamer,
room_id=room_id,
source_title=source_title,
segment_started_at=segment_started_at,
segment_duration_seconds=segment_duration,
full_video_bvid=full_video_bvid,
created_at=created_at,
updated_at=updated_at,
)
@staticmethod
def _clean_text(value: object) -> str | None:
if value is None:
return None
text = str(value).strip()
return text or None
@staticmethod
def _clean_bvid(value: object) -> str | None:
text = IngestService._clean_text(value)
if text and text.startswith("BV"):
return text
return None
@staticmethod
def _coerce_float(value: object) -> float | None:
if value is None or value == "":
return None
try:
return float(value)
except (TypeError, ValueError):
return None
@staticmethod
def _coerce_iso_datetime(value: object) -> str | None:
if value is None:
return None
text = str(value).strip()
if not text:
return None
try:
return datetime.fromisoformat(text).astimezone(SHANGHAI_TZ).isoformat()
except ValueError:
return None
def _infer_from_title(self, title: str, *, reference_timestamp: object) -> dict[str, str | None]:
match = TITLE_PATTERN.match(title)
if not match:
return {"streamer": None, "segment_started_at": None}
reference_dt = self._reference_datetime(reference_timestamp)
month = int(match.group("month"))
day = int(match.group("day"))
hour = int(match.group("hour"))
minute = int(match.group("minute"))
year = reference_dt.year
if (month, day) > (reference_dt.month, reference_dt.day):
year -= 1
started_at = datetime(year, month, day, hour, minute, tzinfo=SHANGHAI_TZ)
return {
"streamer": match.group("streamer").strip(),
"segment_started_at": started_at.isoformat(),
}
@staticmethod
def _reference_datetime(reference_timestamp: object) -> datetime:
if isinstance(reference_timestamp, (int, float)):
return datetime.fromtimestamp(float(reference_timestamp), tz=SHANGHAI_TZ)
return datetime.now(tz=SHANGHAI_TZ)
def _infer_session_key(
self,
*,
streamer: str | None,
room_id: str | None,
segment_started_at: str | None,
segment_duration_seconds: float | None,
fallback_task_id: str,
gap_minutes: int,
) -> tuple[str | None, str | None]:
if not streamer or not segment_started_at:
return None, None
try:
segment_start = datetime.fromisoformat(segment_started_at)
except ValueError:
return None, None
tolerance = timedelta(minutes=max(gap_minutes, 0))
for context in self.repo.find_recent_task_contexts(streamer):
if room_id and context.room_id and room_id != context.room_id:
continue
candidate_end = self._context_end_time(context)
if candidate_end is None:
continue
if segment_start >= candidate_end and segment_start - candidate_end <= tolerance:
return context.session_key, context.full_video_bvid
date_tag = segment_start.astimezone(SHANGHAI_TZ).strftime("%Y%m%dT%H%M")
return f"{streamer}:{date_tag}", None
@staticmethod
def _context_end_time(context: TaskContext) -> datetime | None:
if not context.segment_started_at or context.segment_duration_seconds is None:
return None
try:
started_at = datetime.fromisoformat(context.segment_started_at)
except ValueError:
return None
return started_at + timedelta(seconds=float(context.segment_duration_seconds))
def _find_full_video_bvid_by_session_key(self, session_key: str) -> str | None:
for context in self.repo.list_task_contexts_by_session_key(session_key):
bvid = self._clean_bvid(context.full_video_bvid)
if bvid:
return bvid
return None

View File

@ -0,0 +1,247 @@
from __future__ import annotations
import json
import random
import re
import time
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
class BiliupCliPublishProvider:
def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
self.adapter = adapter or BiliupCliAdapter()
manifest = ProviderManifest(
id="biliup_cli",
name="biliup CLI Publish Provider",
version="0.1.0",
provider_type="publish_provider",
entrypoint="biliup_next.modules.publish.providers.biliup_cli:BiliupCliPublishProvider",
capabilities=["publish"],
enabled_by_default=True,
)
def publish(self, task: Task, clip_videos: list, settings: dict[str, Any]) -> PublishRecord:
work_dir = Path(str(settings["session_dir"])) / task.title
bvid_file = work_dir / "bvid.txt"
upload_done = work_dir / "upload_done.flag"
config = self._load_upload_config(Path(str(settings["upload_config_file"])))
video_files = [artifact.path for artifact in clip_videos]
if not video_files:
raise ModuleError(
code="PUBLISH_NO_CLIPS",
message=f"没有可上传的切片: {task.id}",
retryable=False,
)
parsed = self._parse_filename(task.title, config)
streamer = parsed.get("streamer", task.title)
date = parsed.get("date", "")
songs_txt = work_dir / "songs.txt"
songs_json = work_dir / "songs.json"
songs_list = songs_txt.read_text(encoding="utf-8").strip() if songs_txt.exists() else ""
song_count = 0
if songs_json.exists():
song_count = len(json.loads(songs_json.read_text(encoding="utf-8")).get("songs", []))
quote = self._get_random_quote(config)
template_vars = {
"streamer": streamer,
"date": date,
"song_count": song_count,
"songs_list": songs_list,
"daily_quote": quote.get("text", ""),
"quote_author": quote.get("author", ""),
}
template = config.get("template", {})
title = template.get("title", "{streamer}_{date}").format(**template_vars)
description = template.get("description", "{songs_list}").format(**template_vars)
dynamic = template.get("dynamic", "").format(**template_vars)
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
streamer_cfg = config.get("streamers", {})
if streamer in streamer_cfg:
tags = streamer_cfg[streamer].get("tags", tags)
upload_settings = config.get("upload_settings", {})
tid = upload_settings.get("tid", 31)
biliup_path = str(settings["biliup_path"])
cookie_file = str(settings["cookie_file"])
retry_count = max(1, int(settings.get("retry_count", 5)))
self.adapter.run_optional([biliup_path, "-u", cookie_file, "renew"])
first_batch = video_files[:5]
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
if upload_done.exists() and existing_bvid.startswith("BV"):
return PublishRecord(
id=None,
task_id=task.id,
platform="bilibili",
aid=None,
bvid=existing_bvid,
title=title,
published_at=utc_now_iso(),
)
bvid = existing_bvid if existing_bvid.startswith("BV") else self._upload_first_batch(
biliup_path=biliup_path,
cookie_file=cookie_file,
first_batch=first_batch,
title=title,
tid=tid,
tags=tags,
description=description,
dynamic=dynamic,
upload_settings=upload_settings,
retry_count=retry_count,
)
bvid_file.write_text(bvid, encoding="utf-8")
for batch_index, batch in enumerate(remaining_batches, start=2):
self._append_batch(
biliup_path=biliup_path,
cookie_file=cookie_file,
bvid=bvid,
batch=batch,
batch_index=batch_index,
retry_count=retry_count,
)
upload_done.touch()
return PublishRecord(
id=None,
task_id=task.id,
platform="bilibili",
aid=None,
bvid=bvid,
title=title,
published_at=utc_now_iso(),
)
def _upload_first_batch(
self,
*,
biliup_path: str,
cookie_file: str,
first_batch: list[str],
title: str,
tid: int,
tags: str,
description: str,
dynamic: str,
upload_settings: dict[str, Any],
retry_count: int,
) -> str:
upload_cmd = [
biliup_path,
"-u",
cookie_file,
"upload",
*first_batch,
"--title",
title,
"--tid",
str(tid),
"--tag",
tags,
"--copyright",
str(upload_settings.get("copyright", 2)),
"--source",
str(upload_settings.get("source", "直播回放")),
"--desc",
description,
]
if dynamic:
upload_cmd.extend(["--dynamic", dynamic])
cover = str(upload_settings.get("cover", "")).strip()
if cover and Path(cover).exists():
upload_cmd.extend(["--cover", cover])
for attempt in range(1, retry_count + 1):
result = self.adapter.run(upload_cmd, label=f"首批上传[{attempt}/{retry_count}]")
if result.returncode == 0:
match = re.search(r'"bvid":"(BV[A-Za-z0-9]+)"', result.stdout) or re.search(r"(BV[A-Za-z0-9]+)", result.stdout)
if match:
return match.group(1)
if attempt < retry_count:
time.sleep(self._wait_seconds(attempt - 1))
continue
raise ModuleError(
code="PUBLISH_UPLOAD_FAILED",
message="首批上传失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
raise AssertionError("unreachable")
def _append_batch(
self,
*,
biliup_path: str,
cookie_file: str,
bvid: str,
batch: list[str],
batch_index: int,
retry_count: int,
) -> None:
time.sleep(45)
append_cmd = [biliup_path, "-u", cookie_file, "append", "--vid", bvid, *batch]
for attempt in range(1, retry_count + 1):
result = self.adapter.run(append_cmd, label=f"追加第{batch_index}批[{attempt}/{retry_count}]")
if result.returncode == 0:
return
if attempt < retry_count:
time.sleep(self._wait_seconds(attempt - 1))
continue
raise ModuleError(
code="PUBLISH_APPEND_FAILED",
message=f"追加第 {batch_index} 批失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
@staticmethod
def _wait_seconds(retry_index: int) -> int:
return min(300 * (2**retry_index), 3600)
@staticmethod
def _load_upload_config(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
return json.loads(path.read_text(encoding="utf-8"))
@staticmethod
def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
config = config or {}
patterns = config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
if not regex:
continue
match = re.match(regex, filename)
if match:
data = match.groupdict()
date_format = pattern_config.get("date_format", "{date}")
try:
data["date"] = date_format.format(**data)
except KeyError:
pass
return data
return {"streamer": filename, "date": ""}
@staticmethod
def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
quotes = config.get("quotes", [])
if not quotes:
return {"text": "", "author": ""}
return random.choice(quotes)

View File

@ -0,0 +1,138 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.codex_cli import CodexCliAdapter
SONG_SCHEMA = {
"type": "object",
"properties": {
"songs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start": {"type": "string"},
"end": {"type": "string"},
"title": {"type": "string"},
"artist": {"type": "string"},
"confidence": {"type": "number"},
"evidence": {"type": "string"},
},
"required": ["start", "end", "title", "artist", "confidence", "evidence"],
"additionalProperties": False,
},
}
},
"required": ["songs"],
"additionalProperties": False,
}
TASK_PROMPT = """你是音乐片段识别助手。当前目录下有一个字幕文件。
任务:
1. 结合字幕内容并允许联网搜索进行纠错(识别同音字、唱错等)。
2. 识别出直播中唱过的所有歌曲,给出精确的开始和结束时间。歌曲开始时间规则:
- 歌曲开始时间应使用“上一句字幕的结束时间”作为 start_time。
- 这样可以尽量保留歌曲可能存在的前奏。
3. 同一首歌间隔 ≤160s 合并,>160s 分开。若连续识别出相同歌曲,且中间只有短暂对白、空白、转场或无歌词段,应合并为同一首歌.
4. 忽略纯聊天片段。
5. 无法确认的歌曲丢弃,宁缺毋滥:你的输出将直接面向最终用户。
6. 忽略短片段:如果一段演唱持续时间总和少于 15 秒,视为随口哼唱,请直接忽略,不计入列表。
7. 仔细分析每一句歌词,识别出相关歌曲后, 使用该歌曲歌词上下文对比字幕上下文,确定歌曲起始与停止时间
8.歌曲标注规则:
- 可以在歌曲名称后使用括号 () 添加补充说明。
- 常见标注示例:
- (片段):歌曲演唱时间较短,例如 < 60 秒
- (清唱):无伴奏演唱
- (副歌):只演唱副歌部分
- 标注应简洁,仅在确有必要时使用。
9. 通过歌曲起始和结束时间自检, 一般歌曲长度在5分钟以内, 1分钟以上, 可疑片段重新联网搜索检查.
最后请严格按照 Schema 生成 JSON 数据。"""
class CodexSongDetector:
def __init__(self, adapter: CodexCliAdapter | None = None) -> None:
self.adapter = adapter or CodexCliAdapter()
manifest = ProviderManifest(
id="codex",
name="Codex Song Detector",
version="0.1.0",
provider_type="song_detector",
entrypoint="biliup_next.modules.song_detect.providers.codex:CodexSongDetector",
capabilities=["song_detect"],
enabled_by_default=True,
)
def detect(self, task: Task, subtitle_srt: Artifact, settings: dict[str, Any]) -> tuple[Artifact, Artifact]:
work_dir = Path(subtitle_srt.path).resolve().parent
schema_path = work_dir / "song_schema.json"
songs_json_path = work_dir / "songs.json"
songs_txt_path = work_dir / "songs.txt"
schema_path.write_text(json.dumps(SONG_SCHEMA, ensure_ascii=False, indent=2), encoding="utf-8")
codex_cmd = str(settings.get("codex_cmd", "codex"))
result = self.adapter.run_song_detect(
codex_cmd=codex_cmd,
work_dir=work_dir,
prompt=TASK_PROMPT,
)
if result.returncode != 0:
raise ModuleError(
code="SONG_DETECT_FAILED",
message="codex exec 执行失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
if songs_json_path.exists() and not songs_txt_path.exists():
self._generate_txt_fallback(songs_json_path, songs_txt_path)
if not songs_json_path.exists() or not songs_txt_path.exists():
raise ModuleError(
code="SONG_DETECT_OUTPUT_MISSING",
message=f"未生成 songs.json/songs.txt: {work_dir}",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
)
return (
Artifact(
id=None,
task_id=task.id,
artifact_type="songs_json",
path=str(songs_json_path.resolve()),
metadata_json=json.dumps({"provider": "codex"}),
created_at=utc_now_iso(),
),
Artifact(
id=None,
task_id=task.id,
artifact_type="songs_txt",
path=str(songs_txt_path.resolve()),
metadata_json=json.dumps({"provider": "codex"}),
created_at=utc_now_iso(),
),
)
def _generate_txt_fallback(self, songs_json_path: Path, songs_txt_path: Path) -> None:
try:
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
songs = data.get("songs", [])
with songs_txt_path.open("w", encoding="utf-8") as file_handle:
for song in songs:
start_time = str(song["start"]).split(",")[0].split(".")[0]
file_handle.write(f"{start_time} {song['title']}{song['artist']}\n")
except Exception as exc: # noqa: BLE001
raise ModuleError(
code="SONGS_TXT_GENERATE_FAILED",
message=f"生成 songs.txt 失败: {songs_txt_path}",
retryable=False,
details={"error": str(exc)},
) from exc

View File

@ -0,0 +1,101 @@
from __future__ import annotations
import json
import subprocess
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
class FfmpegCopySplitProvider:
manifest = ProviderManifest(
id="ffmpeg_copy",
name="FFmpeg Copy Split Provider",
version="0.1.0",
provider_type="split_provider",
entrypoint="biliup_next.modules.split.providers.ffmpeg_copy:FfmpegCopySplitProvider",
capabilities=["split"],
enabled_by_default=True,
)
def split(self, task: Task, songs_json: Artifact, source_video: Artifact, settings: dict[str, Any]) -> list[Artifact]:
work_dir = Path(songs_json.path).resolve().parent
split_dir = work_dir / "split_video"
split_done = work_dir / "split_done.flag"
if split_done.exists() and split_dir.exists():
return self._collect_existing_clips(task.id, split_dir)
with Path(songs_json.path).open("r", encoding="utf-8") as file_handle:
data = json.load(file_handle)
songs = data.get("songs", [])
if not songs:
raise ModuleError(
code="SPLIT_SONGS_EMPTY",
message=f"songs.json 中没有歌曲: {songs_json.path}",
retryable=False,
)
split_dir.mkdir(parents=True, exist_ok=True)
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
video_path = Path(source_video.path).resolve()
for index, song in enumerate(songs, 1):
start = str(song.get("start", "00:00:00,000")).replace(",", ".")
end = str(song.get("end", "00:00:00,000")).replace(",", ".")
title = str(song.get("title", "UNKNOWN")).replace("/", "_").replace("\\", "_")
output_path = split_dir / f"{index:02d}_{title}{video_path.suffix}"
if output_path.exists():
continue
cmd = [
ffmpeg_bin,
"-y",
"-ss",
start,
"-to",
end,
"-i",
str(video_path),
"-c",
"copy",
"-map_metadata",
"0",
str(output_path),
]
try:
subprocess.run(cmd, capture_output=True, text=True, check=True)
except FileNotFoundError as exc:
raise ModuleError(
code="FFMPEG_NOT_FOUND",
message=f"找不到 ffmpeg: {ffmpeg_bin}",
retryable=False,
) from exc
except subprocess.CalledProcessError as exc:
raise ModuleError(
code="SPLIT_FFMPEG_FAILED",
message=f"ffmpeg 切割失败: {output_path.name}",
retryable=True,
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
) from exc
split_done.touch()
return self._collect_existing_clips(task.id, split_dir)
def _collect_existing_clips(self, task_id: str, split_dir: Path) -> list[Artifact]:
artifacts: list[Artifact] = []
for path in sorted(split_dir.iterdir()):
if not path.is_file():
continue
artifacts.append(
Artifact(
id=None,
task_id=task_id,
artifact_type="clip_video",
path=str(path.resolve()),
metadata_json=json.dumps({"provider": "ffmpeg_copy"}),
created_at=utc_now_iso(),
)
)
return artifacts

View File

@ -0,0 +1,191 @@
from __future__ import annotations
import json
import math
import shutil
import subprocess
import time
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
LANGUAGE = "zh"
BITRATE_KBPS = 64
MODEL_NAME = "whisper-large-v3-turbo"
class GroqTranscribeProvider:
manifest = ProviderManifest(
id="groq",
name="Groq Transcribe Provider",
version="0.1.0",
provider_type="transcribe_provider",
entrypoint="biliup_next.modules.transcribe.providers.groq:GroqTranscribeProvider",
capabilities=["transcribe"],
enabled_by_default=True,
)
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
groq_api_key = str(settings.get("groq_api_key", "")).strip()
if not groq_api_key:
raise ModuleError(
code="GROQ_API_KEY_MISSING",
message="未配置 transcribe.groq_api_key",
retryable=False,
)
try:
from groq import Groq
except ModuleNotFoundError as exc:
raise ModuleError(
code="GROQ_DEPENDENCY_MISSING",
message="未安装 groq 依赖,请在 biliup-next 环境中执行 pip install -e .",
retryable=False,
) from exc
source_path = Path(source_video.path).resolve()
if not source_path.exists():
raise ModuleError(
code="TRANSCRIBE_SOURCE_MISSING",
message=f"源视频不存在: {source_path}",
retryable=False,
)
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
max_file_size_mb = int(settings.get("max_file_size_mb", 23))
work_dir = source_path.parent
temp_audio_dir = work_dir / "temp_audio"
temp_audio_dir.mkdir(parents=True, exist_ok=True)
segment_duration = max(1, math.floor((max_file_size_mb * 8 * 1024) / BITRATE_KBPS))
output_pattern = temp_audio_dir / "part_%03d.mp3"
self._extract_audio_segments(
ffmpeg_bin=ffmpeg_bin,
source_path=source_path,
output_pattern=output_pattern,
segment_duration=segment_duration,
)
segments = sorted(temp_audio_dir.glob("part_*.mp3"))
if not segments:
raise ModuleError(
code="TRANSCRIBE_AUDIO_SEGMENTS_MISSING",
message=f"未生成音频分片: {source_path.name}",
retryable=False,
)
client = Groq(api_key=groq_api_key)
srt_path = work_dir / f"{task.title}.srt"
global_idx = 1
try:
with srt_path.open("w", encoding="utf-8") as srt_file:
for index, segment in enumerate(segments):
offset_seconds = index * segment_duration
segment_data = self._transcribe_with_retry(client, segment)
for chunk in segment_data:
start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
text = str(chunk["text"]).strip()
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
global_idx += 1
finally:
shutil.rmtree(temp_audio_dir, ignore_errors=True)
return Artifact(
id=None,
task_id=task.id,
artifact_type="subtitle_srt",
path=str(srt_path.resolve()),
metadata_json=json.dumps(
{
"provider": "groq",
"model": MODEL_NAME,
"segment_duration_seconds": segment_duration,
}
),
created_at=utc_now_iso(),
)
def _extract_audio_segments(
self,
*,
ffmpeg_bin: str,
source_path: Path,
output_pattern: Path,
segment_duration: int,
) -> None:
cmd = [
ffmpeg_bin,
"-y",
"-i",
str(source_path),
"-vn",
"-acodec",
"libmp3lame",
"-b:a",
f"{BITRATE_KBPS}k",
"-ac",
"1",
"-ar",
"22050",
"-f",
"segment",
"-segment_time",
str(segment_duration),
"-reset_timestamps",
"1",
str(output_pattern),
]
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except FileNotFoundError as exc:
raise ModuleError(
code="FFMPEG_NOT_FOUND",
message=f"找不到 ffmpeg: {ffmpeg_bin}",
retryable=False,
) from exc
except subprocess.CalledProcessError as exc:
raise ModuleError(
code="FFMPEG_AUDIO_EXTRACT_FAILED",
message=f"音频提取失败: {source_path.name}",
retryable=True,
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
) from exc
def _transcribe_with_retry(self, client: Any, audio_file: Path) -> list[dict[str, Any]]:
retry_count = 0
while True:
try:
with audio_file.open("rb") as file_handle:
response = client.audio.transcriptions.create(
file=(audio_file.name, file_handle.read()),
model=MODEL_NAME,
response_format="verbose_json",
language=LANGUAGE,
temperature=0.0,
)
return [dict(segment) for segment in response.segments]
except Exception as exc: # noqa: BLE001
retry_count += 1
err_str = str(exc)
if "429" in err_str or "rate_limit" in err_str.lower():
time.sleep(25)
continue
raise ModuleError(
code="GROQ_TRANSCRIBE_FAILED",
message=f"Groq 转录失败: {audio_file.name}",
retryable=True,
details={"error": err_str, "retry_count": retry_count},
) from exc
@staticmethod
def _format_srt_time(seconds: float) -> str:
td_hours = int(seconds // 3600)
td_mins = int((seconds % 3600) // 60)
td_secs = int(seconds % 60)
td_millis = int((seconds - int(seconds)) * 1000)
return f"{td_hours:02}:{td_mins:02}:{td_secs:02},{td_millis:03}"