init biliup-next

This commit is contained in:
theshy
2026-04-01 00:44:58 +08:00
commit d0cf1fd0df
127 changed files with 15582 additions and 0 deletions

View File

@ -0,0 +1,34 @@
from __future__ import annotations
from biliup_next.core.models import utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.workspace_cleanup import WorkspaceCleanupService
from biliup_next.infra.task_repository import TaskRepository
class CollectionService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
self.cleanup = WorkspaceCleanupService(repo)
def run(self, task_id: str, target: str, settings: dict[str, object]) -> dict[str, object]:
if target not in {"a", "b"}:
raise RuntimeError(f"unsupported collection target: {target}")
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
step_name = f"collection_{target}"
provider = self.registry.get("collection_provider", str(settings.get("provider", "bilibili_collection")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, step_name, "running", started_at=started_at)
result = provider.sync(task, target, settings)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, step_name, "succeeded", finished_at=finished_at)
steps = {step.step_name: step for step in self.repo.list_steps(task_id)}
if steps.get("collection_a") and steps["collection_a"].status == "succeeded" and steps.get("collection_b") and steps["collection_b"].status == "succeeded":
self.repo.update_task_status(task_id, "collection_synced", finished_at)
cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
return {**result, "cleanup": cleanup_result}
return result

View File

@ -0,0 +1,24 @@
from __future__ import annotations
from biliup_next.core.models import utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
class CommentService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def run(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
provider = self.registry.get("comment_provider", str(settings.get("provider", "bilibili_top_comment")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, "comment", "running", started_at=started_at)
result = provider.comment(task, settings)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, "comment", "succeeded", finished_at=finished_at)
self.repo.update_task_status(task_id, "commented", finished_at)
return result

View File

@ -0,0 +1,42 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
from biliup_next.core.errors import ModuleError
from biliup_next.core.providers import ProviderManifest
class LocalFileIngestProvider:
manifest = ProviderManifest(
id="local_file",
name="Local File Ingest",
version="0.1.0",
provider_type="ingest_provider",
entrypoint="biliup_next.modules.ingest.providers.local_file:LocalFileIngestProvider",
capabilities=["ingest"],
enabled_by_default=True,
)
def validate_source(self, source_path: Path, settings: dict[str, Any]) -> None:
if not source_path.exists():
raise ModuleError(
code="SOURCE_NOT_FOUND",
message=f"源文件不存在: {source_path}",
retryable=False,
)
if not source_path.is_file():
raise ModuleError(
code="SOURCE_NOT_FILE",
message=f"源路径不是文件: {source_path}",
retryable=False,
)
suffix = source_path.suffix.lower()
allowed = [str(item).lower() for item in settings.get("allowed_extensions", [])]
if suffix not in allowed:
raise ModuleError(
code="SOURCE_EXTENSION_NOT_ALLOWED",
message=f"文件扩展名不受支持: {suffix}",
retryable=False,
details={"allowed_extensions": allowed},
)

View File

@ -0,0 +1,201 @@
from __future__ import annotations
import json
import shutil
import subprocess
import time
from pathlib import Path
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task, TaskStep, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
class IngestService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def create_task_from_file(self, source_path: Path, settings: dict[str, object]) -> Task:
provider_id = str(settings.get("provider", "local_file"))
provider = self.registry.get("ingest_provider", provider_id)
provider.validate_source(source_path, settings)
task_id = source_path.stem
if self.repo.get_task(task_id):
raise ModuleError(
code="TASK_ALREADY_EXISTS",
message=f"任务已存在: {task_id}",
retryable=False,
)
now = utc_now_iso()
task = Task(
id=task_id,
source_type="local_file",
source_path=str(source_path.resolve()),
title=source_path.stem,
status="created",
created_at=now,
updated_at=now,
)
self.repo.upsert_task(task)
self.repo.replace_steps(
task_id,
[
TaskStep(None, task_id, "ingest", "succeeded", None, None, 0, now, now),
TaskStep(None, task_id, "transcribe", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "song_detect", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "split", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "publish", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "comment", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "collection_a", "pending", None, None, 0, None, None),
TaskStep(None, task_id, "collection_b", "pending", None, None, 0, None, None),
],
)
self.repo.add_artifact(
Artifact(
id=None,
task_id=task_id,
artifact_type="source_video",
path=str(source_path.resolve()),
metadata_json=json.dumps({"provider": provider_id}),
created_at=now,
)
)
return task
def scan_stage(self, settings: dict[str, object]) -> dict[str, object]:
stage_dir = Path(str(settings["stage_dir"])).resolve()
backup_dir = Path(str(settings["backup_dir"])).resolve()
session_dir = Path(str(settings["session_dir"])).resolve()
ffprobe_bin = str(settings.get("ffprobe_bin", "ffprobe"))
min_duration = int(settings.get("min_duration_seconds", 0))
stability_wait_seconds = int(settings.get("stability_wait_seconds", 30))
stage_dir.mkdir(parents=True, exist_ok=True)
backup_dir.mkdir(parents=True, exist_ok=True)
session_dir.mkdir(parents=True, exist_ok=True)
accepted: list[dict[str, object]] = []
rejected: list[dict[str, object]] = []
skipped: list[dict[str, object]] = []
allowed = {str(item).lower() for item in settings.get("allowed_extensions", [])}
for source_path in sorted(p for p in stage_dir.iterdir() if p.is_file()):
if source_path.name.startswith(".") or source_path.name.endswith(".part"):
continue
if source_path.suffix.lower() not in allowed:
continue
if not self._is_stable_enough(source_path, stability_wait_seconds):
skipped.append(
{
"source_path": str(source_path),
"reason": "file_not_stable_yet",
"stability_wait_seconds": stability_wait_seconds,
}
)
continue
task_id = source_path.stem
if self.repo.get_task(task_id):
target = self._move_to_directory(source_path, backup_dir)
skipped.append(
{
"source_path": str(source_path),
"reason": "task_exists",
"moved_to": str(target),
}
)
continue
duration_seconds = self._probe_duration_seconds(source_path, ffprobe_bin)
if duration_seconds < min_duration:
target = self._move_to_directory(source_path, backup_dir)
rejected.append(
{
"source_path": str(source_path),
"reason": "duration_too_short",
"duration_seconds": duration_seconds,
"min_duration_seconds": min_duration,
"moved_to": str(target),
}
)
continue
task_dir = session_dir / task_id
task_dir.mkdir(parents=True, exist_ok=True)
target_source = self._move_to_directory(source_path, task_dir)
task = self.create_task_from_file(target_source, settings)
accepted.append(
{
"task_id": task.id,
"title": task.title,
"source_path": str(target_source),
"duration_seconds": duration_seconds,
}
)
return {"accepted": accepted, "rejected": rejected, "skipped": skipped}
@staticmethod
def _is_stable_enough(source_path: Path, stability_wait_seconds: int) -> bool:
if stability_wait_seconds <= 0:
return True
age_seconds = time.time() - source_path.stat().st_mtime
return age_seconds >= stability_wait_seconds
def _probe_duration_seconds(self, source_path: Path, ffprobe_bin: str) -> float:
cmd = [
ffprobe_bin,
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
str(source_path),
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
except FileNotFoundError as exc:
raise ModuleError(
code="FFPROBE_NOT_FOUND",
message=f"找不到 ffprobe: {ffprobe_bin}",
retryable=False,
) from exc
except subprocess.CalledProcessError as exc:
raise ModuleError(
code="FFPROBE_FAILED",
message=f"ffprobe 获取时长失败: {source_path.name}",
retryable=False,
details={"stderr": exc.stderr.strip()},
) from exc
try:
return float(result.stdout.strip())
except ValueError as exc:
raise ModuleError(
code="FFPROBE_INVALID_DURATION",
message=f"ffprobe 返回非法时长: {source_path.name}",
retryable=False,
details={"stdout": result.stdout.strip()},
) from exc
def _move_to_directory(self, source_path: Path, target_dir: Path) -> Path:
target_dir.mkdir(parents=True, exist_ok=True)
target_path = target_dir / source_path.name
if target_path.exists():
target_path = self._unique_target_path(target_dir, source_path.name)
shutil.move(str(source_path), str(target_path))
return target_path.resolve()
@staticmethod
def _unique_target_path(target_dir: Path, filename: str) -> Path:
base = Path(filename).stem
suffix = Path(filename).suffix
index = 1
while True:
candidate = target_dir / f"{base}.{index}{suffix}"
if not candidate.exists():
return candidate
index += 1

View File

@ -0,0 +1,43 @@
from __future__ import annotations
import json
from pathlib import Path
from biliup_next.core.models import Artifact, PublishRecord, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
class PublishService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def run(self, task_id: str, settings: dict[str, object]) -> PublishRecord:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
artifacts = self.repo.list_artifacts(task_id)
clip_videos = [a for a in artifacts if a.artifact_type == "clip_video"]
provider = self.registry.get("publish_provider", str(settings.get("provider", "biliup_cli")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, "publish", "running", started_at=started_at)
record = provider.publish(task, clip_videos, settings)
self.repo.add_publish_record(record)
if record.bvid:
session_dir = Path(str(settings.get("session_dir", "session"))) / task.title
bvid_path = str((session_dir / "bvid.txt").resolve())
self.repo.add_artifact(
Artifact(
id=None,
task_id=task_id,
artifact_type="publish_bvid",
path=bvid_path,
metadata_json=json.dumps({}),
created_at=utc_now_iso(),
)
)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, "publish", "succeeded", finished_at=finished_at)
self.repo.update_task_status(task_id, "published", finished_at)
return record

View File

@ -0,0 +1,28 @@
from __future__ import annotations
from biliup_next.core.models import Artifact, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
class SongDetectService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def run(self, task_id: str, settings: dict[str, object]) -> tuple[Artifact, Artifact]:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
artifacts = self.repo.list_artifacts(task_id)
subtitle_srt = next(a for a in artifacts if a.artifact_type == "subtitle_srt")
provider = self.registry.get("song_detector", str(settings.get("provider", "codex")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, "song_detect", "running", started_at=started_at)
songs_json, songs_txt = provider.detect(task, subtitle_srt, settings)
self.repo.add_artifact(songs_json)
self.repo.add_artifact(songs_txt)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, "song_detect", "succeeded", finished_at=finished_at)
self.repo.update_task_status(task_id, "songs_detected", finished_at)
return songs_json, songs_txt

View File

@ -0,0 +1,45 @@
from __future__ import annotations
from pathlib import Path
from biliup_next.core.models import Artifact, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.storage_guard import ensure_free_space, mb_to_bytes
from biliup_next.infra.task_repository import TaskRepository
class SplitService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def run(self, task_id: str, settings: dict[str, object]) -> list[Artifact]:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
artifacts = self.repo.list_artifacts(task_id)
songs_json = next(a for a in artifacts if a.artifact_type == "songs_json")
source_video = next(a for a in artifacts if a.artifact_type == "source_video")
source_path = Path(source_video.path)
source_size = source_path.stat().st_size
reserve_bytes = mb_to_bytes(settings.get("min_free_space_mb", 0))
ensure_free_space(
source_path.parent,
source_size + reserve_bytes,
code="SPLIT_NO_SPACE",
message=f"剩余空间不足,无法开始切歌: {source_path.name}",
retryable=True,
details={"source_size_bytes": source_size, "reserve_bytes": reserve_bytes},
)
provider = self.registry.get("split_provider", str(settings.get("provider", "ffmpeg_copy")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, "split", "running", started_at=started_at)
clip_artifacts = provider.split(task, songs_json, source_video, settings)
existing = {(a.artifact_type, a.path) for a in artifacts}
for artifact in clip_artifacts:
if (artifact.artifact_type, artifact.path) not in existing:
self.repo.add_artifact(artifact)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, "split", "succeeded", finished_at=finished_at)
self.repo.update_task_status(task_id, "split_done", finished_at)
return clip_artifacts

View File

@ -0,0 +1,27 @@
from __future__ import annotations
from biliup_next.core.models import Artifact, utc_now_iso
from biliup_next.core.registry import Registry
from biliup_next.infra.task_repository import TaskRepository
class TranscribeService:
def __init__(self, registry: Registry, repo: TaskRepository):
self.registry = registry
self.repo = repo
def run(self, task_id: str, settings: dict[str, object]) -> Artifact:
task = self.repo.get_task(task_id)
if task is None:
raise RuntimeError(f"task not found: {task_id}")
artifacts = self.repo.list_artifacts(task_id)
source_video = next(a for a in artifacts if a.artifact_type == "source_video")
provider = self.registry.get("transcribe_provider", str(settings.get("provider", "groq")))
started_at = utc_now_iso()
self.repo.update_step_status(task_id, "transcribe", "running", started_at=started_at)
artifact = provider.transcribe(task, source_video, settings)
self.repo.add_artifact(artifact)
finished_at = utc_now_iso()
self.repo.update_step_status(task_id, "transcribe", "succeeded", finished_at=finished_at)
self.repo.update_task_status(task_id, "transcribed", finished_at)
return artifact