feat: package docker deployment and publish flow

2026-04-22 16:20:03 +08:00
parent 055474360e
commit 2146687dc6
178 changed files with 24318 additions and 20855 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,19 @@
+.git
+.venv
+.pytest_cache
+__pycache__
+*.pyc
+
+data/
+runtime/cookies.json
+runtime/upload_config.json
+runtime/biliup
+runtime/codex/
+runtime/logs/
+
+frontend/node_modules/
+frontend/dist/
+
+.env
+config/settings.json
+config/settings.staged.json
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,49 @@
+# Web/API port exposed on the host.
+BILIUP_NEXT_PORT=8000
+
+# Image used by both api and worker. Override this when using a versioned tag
+# or a private registry image, for example 192.168.1.100:25490/biliup-next:20260420.
+BILIUP_NEXT_IMAGE=biliup-next:local
+
+# Worker polling interval in seconds.
+WORKER_INTERVAL=5
+
+# Container timezone.
+TZ=Asia/Shanghai
+
+# Optional container outbound proxy. In Docker Desktop/WSL, host.docker.internal
+# points to the Windows host; set this to your local proxy port.
+# These values are also passed as Docker build args for apt/pip/npm.
+# HTTP_PROXY=http://host.docker.internal:7897
+# HTTPS_PROXY=http://host.docker.internal:7897
+# ALL_PROXY=http://host.docker.internal:7897
+# NO_PROXY=localhost,127.0.0.1,api,worker
+#
+# Docker build-time proxy. Separate names avoid being overridden by host
+# HTTP_PROXY/HTTPS_PROXY when Compose interpolates build args.
+# DOCKER_BUILD_HTTP_PROXY=http://host.docker.internal:7897
+# DOCKER_BUILD_HTTPS_PROXY=http://host.docker.internal:7897
+# DOCKER_BUILD_ALL_PROXY=http://host.docker.internal:7897
+# DOCKER_BUILD_NO_PROXY=localhost,127.0.0.1,api,worker
+
+# Required for Groq transcription. Prefer this env var over writing the key
+# directly into config/settings.json.
+GROQ_API_KEY=
+# Optional key pool. Use a JSON array; keys here are tried before GROQ_API_KEY.
+# GROQ_API_KEYS=["gsk_xxx","gsk_yyy"]
+
+# Optional for the Codex song detector when you do not mount an existing
+# Codex login state into runtime/codex.
+OPENAI_API_KEY=
+
+# Bilibili collection IDs.
+# A: live full-video collection
+# B: live split/pure-song collection
+COLLECTION_SEASON_ID_A=7196643
+COLLECTION_SEASON_ID_B=7196624
+
+# Optional explicit config overrides. The generic format is:
+# BILIUP_NEXT__GROUP__FIELD=value
+#
+# BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
+# BILIUP_NEXT__PUBLISH__RATE_LIMIT_RETRY_SCHEDULE_MINUTES=[15,30,60]
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,8 @@
 .venv/
+.codex
+.codex/
+.env
+.tmp-tests/
 __pycache__/
 *.pyc
 *.pyo
@ -12,6 +16,7 @@ systemd/rendered/
 runtime/cookies.json
 runtime/upload_config.json
 runtime/biliup
+runtime/codex/
 runtime/logs/

 frontend/node_modules/
--- a/61
+++ b/61
@ -0,0 +1,61 @@
+FROM node:24-bookworm-slim AS frontend-builder
+
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG ALL_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG all_proxy
+ARG no_proxy
+
+WORKDIR /build/frontend
+COPY frontend/package*.json ./
+RUN npm ci
+COPY frontend/ ./
+RUN npm run build
+
+FROM python:3.12-slim AS app
+
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG ALL_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG all_proxy
+ARG no_proxy
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    BILIUP_NEXT_CONTAINER=1
+
+WORKDIR /app
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        curl \
+        ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY pyproject.toml README.md ./
+COPY src ./src
+COPY config ./config
+COPY runtime/README.md runtime/cookies.example.json runtime/upload_config.example.json ./runtime/
+COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
+COPY --from=frontend-builder /usr/local/bin/node /usr/local/bin/node
+COPY --from=frontend-builder /usr/local/lib/node_modules /usr/local/lib/node_modules
+
+RUN pip install --editable . \
+    && pip install yt-dlp \
+    && ln -sf ../lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm \
+    && ln -sf ../lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx \
+    && npm install -g @openai/codex
+
+RUN mkdir -p /app/data/workspace/stage /app/data/workspace/session /app/data/workspace/backup /app/runtime/logs /root/.codex
+
+EXPOSE 8000
+
+CMD ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@ -59,6 +59,10 @@ bash setup.sh

 - `docs/cold-start-checklist.md`

+发布流程、输出文案和评论示例见：
+
+- `docs/publish-output-examples.md`
+
 浏览器访问：

 ```text
@ -192,6 +196,29 @@ cd /home/theshy/biliup/biliup-next
  - 内容按 `P1/P2/P3` 分组
  - 依赖 `full_video_bvid.txt` 或通过标题匹配解析到完整版 BV

+评论格式和投稿文案一样，优先从 `runtime/upload_config.json` 读取。可编辑字段：
+
+```json
+"comment_template": {
+  "split_header": "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n上次纯享：{previous_pure_video_link} （上一场歌曲纯享版）",
+  "full_header": "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n上次完整版：{previous_full_video_link} （上一场完整录播）",
+  "split_part_header": "P{part_index}:",
+  "full_part_header": "P{part_index}:",
+  "split_song_line": "{song_index}. {title}{artist_suffix}",
+  "split_text_song_line": "{song_index}. {song_text}",
+  "full_timeline_line": "{song_index}. {line_text}"
+}
+```
+
+常用变量：
+
+- 链接：`{current_full_video_link}`、`{current_pure_video_link}`、`{previous_full_video_link}`、`{previous_pure_video_link}`
+- 分段与序号：`{part_index}`、`{song_index}`
+- 纯享歌单：`{title}`、`{artist}`、`{artist_suffix}`、`{song_text}`
+- 完整版时间轴：`{line_text}`
+
+如果某一行包含空链接变量，例如 `{previous_full_video_link}` 为空，这一整行会自动跳过。
+
 清理默认关闭：

 - `cleanup.delete_source_video_after_collection_synced = false`
@ -201,11 +228,14 @@ cd /home/theshy/biliup/biliup-next

 ## Full Video BV Input

-完整版 `BV` 目前支持 3 种来源：
+完整版 `BV` 目前支持 4 种来源：

 - `stage/*.meta.json` 中的 `full_video_bvid`
 - 前端 / API 手工绑定
 - webhook：`POST /webhooks/full-video-uploaded`
+- `biliup list` 标题匹配，包含 `开放浏览` 和 `审核中` 状态
+
+只要完整版上传后已经生成 BV，即使仍在审核中，也可以被用于纯享版简介、动态和评论互链。

 推荐 webhook 负载：

@ -320,3 +350,14 @@ curl -X POST http://127.0.0.1:8787/tasks \

 - `ingest.provider = bilibili_url`
 - `ingest.yt_dlp_cmd = yt-dlp`
+
+## Docker Compose Deployment
+
+如果希望用容器方式一键运行 API 和 worker，请参考 [README_DEPLOY.md](README_DEPLOY.md)。
+
+快速入口：
+
+```bash
+./scripts/init-docker-config.sh
+docker compose up -d --build
+```
--- a/README_DEPLOY.md
+++ b/README_DEPLOY.md
@ -0,0 +1,176 @@
+# Docker Compose Deployment
+
+This deployment runs the API and worker as two services from the same image.
+Runtime state, credentials, staged videos, generated sessions, and the SQLite
+database stay on the host through bind mounts.
+
+## 1. Initialize Local Files
+
+```bash
+chmod +x scripts/init-docker-config.sh
+./scripts/init-docker-config.sh
+```
+
+This creates these files if they do not already exist:
+
+```text
+.env
+config/settings.json
+runtime/cookies.json
+runtime/upload_config.json
+data/workspace/
+```
+
+## 2. Edit Required Secrets And IDs
+
+Edit `.env`:
+
+```env
+GROQ_API_KEY=your_groq_key
+OPENAI_API_KEY=your_openai_key_if_using_codex
+COLLECTION_SEASON_ID_A=7196643
+COLLECTION_SEASON_ID_B=7196624
+```
+
+Edit `runtime/cookies.json` and `runtime/upload_config.json` with real Bilibili
+credentials and upload metadata.
+
+`runtime/upload_config.json` also controls pure-video title, description,
+dynamic text, and top-comment formatting. Existing deployments mount
+`./runtime` from the host, so updating the image does not overwrite this file.
+When you want to change output text, edit the host file directly.
+
+Common output templates:
+
+```json
+{
+  "template": {
+    "title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
+    "description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版：{current_full_video_link}\n上次直播：{previous_full_video_link}\n\n本视频为歌曲纯享切片，适合只听歌曲。",
+    "dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版：{current_full_video_link}\n上次直播：{previous_full_video_link}"
+  },
+  "comment_template": {
+    "split_header": "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n上次直播：{previous_full_video_link} （上一场完整录播）",
+    "full_header": "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n上次直播：{previous_full_video_link} （上一场完整录播）",
+    "split_part_header": "P{part_index}:",
+    "full_part_header": "P{part_index}:",
+    "split_song_line": "{song_index}. {title}{artist_suffix}",
+    "split_text_song_line": "{song_index}. {song_text}",
+    "full_timeline_line": "{song_index}. {line_text}"
+  }
+}
+```
+
+Supported comment variables:
+
+- `{current_full_video_link}` / `{current_pure_video_link}`
+- `{previous_full_video_link}` / `{previous_pure_video_link}`
+- `{part_index}` / `{song_index}`
+- `{title}` / `{artist}` / `{artist_suffix}` / `{song_text}` / `{line_text}`
+
+If a comment header line contains an empty link variable, that whole line is
+omitted. This prevents comments from showing blank `上次直播：` lines when the
+previous live video cannot be found.
+
+Provide the `biliup` binary at:
+
+```text
+runtime/biliup
+```
+
+It must be executable inside the container:
+
+```bash
+chmod +x runtime/biliup
+```
+
+The image installs the `codex` CLI for `song_detect.provider=codex`. Provide
+Codex auth in one of these ways:
+
+```text
+OPENAI_API_KEY in .env
+runtime/codex mounted to /root/.codex
+```
+
+## 3. Start
+
+```bash
+docker compose up -d --build
+```
+
+Open:
+
+```text
+http://127.0.0.1:8000
+```
+
+Drop videos into:
+
+```text
+data/workspace/stage/
+```
+
+## Common Commands
+
+```bash
+docker compose logs -f api
+docker compose logs -f worker
+docker compose restart worker
+docker compose down
+```
+
+Run one scheduler cycle manually:
+
+```bash
+docker compose run --rm worker biliup-next run-once
+```
+
+Run doctor:
+
+```bash
+docker compose run --rm api biliup-next doctor
+```
+
+## Environment Overrides
+
+`config/settings.json` is still the base configuration. Environment variables
+override selected values at runtime.
+
+The Compose file already forces container-safe paths such as
+`/app/data/workspace` and `/app/runtime/cookies.json`, so an existing local
+`config/settings.json` with host paths can still be mounted safely.
+
+Generic format:
+
+```text
+BILIUP_NEXT__GROUP__FIELD=value
+```
+
+Examples:
+
+```env
+BILIUP_NEXT__PATHS__STAGE_DIR=/app/data/workspace/stage
+BILIUP_NEXT__PUBLISH__BILIUP_PATH=/app/runtime/biliup
+BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
+```
+
+Convenience aliases:
+
+```env
+GROQ_API_KEY=...
+COLLECTION_SEASON_ID_A=7196643
+COLLECTION_SEASON_ID_B=7196624
+```
+
+## Data Persistence
+
+These host paths are mounted into the containers:
+
+```text
+./config          -> /app/config
+./runtime         -> /app/runtime
+./data/workspace  -> /app/data/workspace
+```
+
+Do not store `cookies.json`, Groq keys, or generated workspace data in the image.
+They should stay in the mounted host directories.
--- a/config/settings.docker.example.json
+++ b/config/settings.docker.example.json
@ -0,0 +1,127 @@
+{
+  "runtime": {
+    "database_path": "/app/data/workspace/biliup_next.db",
+    "control_token": "",
+    "log_level": "INFO"
+  },
+  "paths": {
+    "stage_dir": "/app/data/workspace/stage",
+    "backup_dir": "/app/data/workspace/backup",
+    "session_dir": "/app/data/workspace/session",
+    "cookies_file": "/app/runtime/cookies.json",
+    "upload_config_file": "/app/runtime/upload_config.json"
+  },
+  "scheduler": {
+    "candidate_scan_limit": 500,
+    "max_tasks_per_cycle": 50,
+    "prioritize_retry_due": true,
+    "oldest_first": true,
+    "status_priority": [
+      "failed_retryable",
+      "created",
+      "transcribed",
+      "songs_detected",
+      "split_done",
+      "published",
+      "commented",
+      "collection_synced"
+    ]
+  },
+  "ingest": {
+    "provider": "local_file",
+    "min_duration_seconds": 900,
+    "ffprobe_bin": "ffprobe",
+    "yt_dlp_cmd": "yt-dlp",
+    "yt_dlp_format": "",
+    "allowed_extensions": [
+      ".mp4",
+      ".flv",
+      ".mkv",
+      ".mov"
+    ],
+    "stage_min_free_space_mb": 1024,
+    "stability_wait_seconds": 30,
+    "session_gap_minutes": 60,
+    "meta_sidecar_enabled": true,
+    "meta_sidecar_suffix": ".meta.json"
+  },
+  "transcribe": {
+    "provider": "groq",
+    "groq_api_key": "",
+    "groq_api_keys": [],
+    "ffmpeg_bin": "ffmpeg",
+    "max_file_size_mb": 12,
+    "request_timeout_seconds": 180,
+    "request_max_retries": 1,
+    "request_retry_backoff_seconds": 30,
+    "serialize_groq_requests": true,
+    "retry_count": 3,
+    "retry_backoff_seconds": 300,
+    "retry_schedule_minutes": [
+      5,
+      10,
+      15
+    ]
+  },
+  "song_detect": {
+    "provider": "codex",
+    "codex_cmd": "codex",
+    "qwen_cmd": "qwen",
+    "poll_interval_seconds": 2,
+    "retry_count": 3,
+    "retry_backoff_seconds": 300,
+    "retry_schedule_minutes": [
+      5,
+      10,
+      15
+    ]
+  },
+  "split": {
+    "provider": "ffmpeg_copy",
+    "ffmpeg_bin": "ffmpeg",
+    "poll_interval_seconds": 2,
+    "min_free_space_mb": 2048
+  },
+  "publish": {
+    "provider": "biliup_cli",
+    "biliup_path": "/app/runtime/biliup",
+    "cookie_file": "/app/runtime/cookies.json",
+    "retry_count": 5,
+    "retry_schedule_minutes": [
+      15,
+      5,
+      5,
+      5,
+      5
+    ],
+    "retry_backoff_seconds": 300,
+    "command_timeout_seconds": 1800,
+    "rate_limit_retry_schedule_minutes": [
+      15,
+      30,
+      60
+    ]
+  },
+  "comment": {
+    "provider": "bilibili_top_comment",
+    "enabled": true,
+    "max_retries": 5,
+    "base_delay_seconds": 180,
+    "poll_interval_seconds": 10,
+    "post_split_comment": true,
+    "post_full_video_timeline_comment": true
+  },
+  "collection": {
+    "provider": "bilibili_collection",
+    "enabled": true,
+    "season_id_a": 7196643,
+    "season_id_b": 7196624,
+    "allow_fuzzy_full_video_match": false,
+    "append_collection_a_new_to_end": true,
+    "append_collection_b_new_to_end": true
+  },
+  "cleanup": {
+    "delete_source_video_after_collection_synced": false,
+    "delete_split_videos_after_collection_synced": false
+  }
+}
--- a/config/settings.json
+++ b/config/settings.json
@ -1,15 +1,15 @@
 {
  "runtime": {
-    "database_path": "data/workspace/biliup_next.db",
+    "database_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/biliup_next.db",
    "control_token": "",
    "log_level": "INFO"
  },
  "paths": {
-    "stage_dir": "data/workspace/stage",
-    "backup_dir": "data/workspace/backup",
-    "session_dir": "data/workspace/session",
-    "cookies_file": "runtime/cookies.json",
-    "upload_config_file": "runtime/upload_config.json"
+    "stage_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/stage",
+    "backup_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/backup",
+    "session_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/session",
+    "cookies_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
+    "upload_config_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/upload_config.json"
  },
  "scheduler": {
    "candidate_scan_limit": 500,
@ -31,7 +31,7 @@
    "provider": "local_file",
    "min_duration_seconds": 900,
    "ffprobe_bin": "ffprobe",
-    "yt_dlp_cmd": "yt-dlp",
+    "yt_dlp_cmd": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/.venv/bin/yt-dlp",
    "yt_dlp_format": "",
    "allowed_extensions": [
      ".mp4",
@ -47,15 +47,34 @@
  },
  "transcribe": {
    "provider": "groq",
-    "groq_api_key": "",
+    "groq_api_key": "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",
+    "groq_api_keys": [],
    "ffmpeg_bin": "ffmpeg",
-    "max_file_size_mb": 23
+    "max_file_size_mb": 12,
+    "request_timeout_seconds": 180,
+    "request_max_retries": 1,
+    "request_retry_backoff_seconds": 30,
+    "serialize_groq_requests": true,
+    "retry_count": 3,
+    "retry_backoff_seconds": 300,
+    "retry_schedule_minutes": [
+      5,
+      10,
+      15
+    ]
  },
  "song_detect": {
-    "provider": "qwen_cli",
+    "provider": "codex",
    "codex_cmd": "codex",
    "qwen_cmd": "qwen",
-    "poll_interval_seconds": 2
+    "poll_interval_seconds": 2,
+    "retry_count": 3,
+    "retry_backoff_seconds": 300,
+    "retry_schedule_minutes": [
+      5,
+      10,
+      15
+    ]
  },
  "split": {
    "provider": "ffmpeg_copy",
@ -65,8 +84,8 @@
  },
  "publish": {
    "provider": "biliup_cli",
-    "biliup_path": "runtime/biliup",
-    "cookie_file": "runtime/cookies.json",
+    "biliup_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/biliup",
+    "cookie_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
    "retry_count": 5,
    "retry_schedule_minutes": [
      15,
@ -78,9 +97,9 @@
    "retry_backoff_seconds": 300,
    "command_timeout_seconds": 1800,
    "rate_limit_retry_schedule_minutes": [
+      15,
      30,
-      60,
-      120
+      60
    ]
  },
  "comment": {
@ -95,8 +114,8 @@
  "collection": {
    "provider": "bilibili_collection",
    "enabled": true,
-    "season_id_a": 0,
-    "season_id_b": 0,
+    "season_id_a": 7196643,
+    "season_id_b": 7196624,
    "allow_fuzzy_full_video_match": false,
    "append_collection_a_new_to_end": true,
    "append_collection_b_new_to_end": true
--- a/config/settings.schema.json
+++ b/config/settings.schema.json
@ -229,6 +229,16 @@
        "description": "用于调用 Groq 转录 API。",
        "sensitive": true
      },
+      "groq_api_keys": {
+        "type": "array",
+        "default": [],
+        "title": "Groq API Keys",
+        "ui_order": 12,
+        "ui_widget": "secret_list",
+        "items": { "type": "string" },
+        "description": "可选 Groq API Key 池。遇到单个 key 限流时会自动切换下一个 key；为空时使用 groq_api_key。",
+        "sensitive": true
+      },
      "ffmpeg_bin": {
        "type": "string",
        "default": "ffmpeg",
@ -238,10 +248,66 @@
      },
      "max_file_size_mb": {
        "type": "integer",
-        "default": 23,
+        "default": 12,
        "title": "Max File Size MB",
        "ui_order": 40,
-        "minimum": 1
+        "minimum": 1,
+        "description": "Groq 音频分片目标上限。实际切分会额外保留安全余量，避免贴近上传限制。"
+      },
+      "request_timeout_seconds": {
+        "type": "integer",
+        "default": 180,
+        "title": "Request Timeout Seconds",
+        "ui_order": 50,
+        "minimum": 1,
+        "description": "单个 Groq 转录请求的超时时间。"
+      },
+      "request_max_retries": {
+        "type": "integer",
+        "default": 1,
+        "title": "Request Max Retries",
+        "ui_order": 60,
+        "minimum": 0,
+        "description": "单个音频分片在超时、限流或连接错误时的请求级重试次数。"
+      },
+      "request_retry_backoff_seconds": {
+        "type": "integer",
+        "default": 30,
+        "title": "Request Retry Backoff Seconds",
+        "ui_order": 70,
+        "minimum": 0,
+        "description": "Groq 请求级重试之间的等待时间。"
+      },
+      "serialize_groq_requests": {
+        "type": "boolean",
+        "default": true,
+        "title": "Serialize Groq Requests",
+        "ui_order": 75,
+        "description": "是否串行化 Groq 分片上传请求，避免多个 worker 或多个任务同时上传导致超时。"
+      },
+      "retry_count": {
+        "type": "integer",
+        "default": 3,
+        "title": "Task Retry Count",
+        "ui_order": 80,
+        "minimum": 0,
+        "description": "transcribe 步骤允许的任务级失败重试次数。"
+      },
+      "retry_backoff_seconds": {
+        "type": "integer",
+        "default": 300,
+        "title": "Task Retry Backoff Seconds",
+        "ui_order": 90,
+        "minimum": 0,
+        "description": "未配置 retry_schedule_minutes 时，transcribe 任务级重试的等待时间。"
+      },
+      "retry_schedule_minutes": {
+        "type": "array",
+        "default": [5, 10, 15],
+        "title": "Task Retry Schedule Minutes",
+        "ui_order": 100,
+        "items": { "type": "integer", "minimum": 0 },
+        "description": "transcribe 任务级失败后的自动重试等待时间。"
      }
    },
    "song_detect": {
@ -275,6 +341,30 @@
        "title": "Poll Interval Seconds",
        "ui_order": 30,
        "minimum": 1
+      },
+      "retry_count": {
+        "type": "integer",
+        "default": 3,
+        "title": "Task Retry Count",
+        "ui_order": 40,
+        "minimum": 0,
+        "description": "song_detect 步骤允许的任务级失败重试次数。认证失败会直接进入人工失败，不会重试。"
+      },
+      "retry_backoff_seconds": {
+        "type": "integer",
+        "default": 300,
+        "title": "Task Retry Backoff Seconds",
+        "ui_order": 50,
+        "minimum": 0,
+        "description": "未配置 retry_schedule_minutes 时，song_detect 任务级重试的等待时间。"
+      },
+      "retry_schedule_minutes": {
+        "type": "array",
+        "default": [5, 10, 15],
+        "title": "Task Retry Schedule Minutes",
+        "ui_order": 60,
+        "items": { "type": "integer", "minimum": 0 },
+        "description": "song_detect 任务级失败后的自动重试等待时间。"
      }
    },
    "split": {
@ -375,9 +465,9 @@
      "rate_limit_retry_schedule_minutes": {
        "type": "array",
        "default": [
+          15,
          30,
-          60,
-          120
+          60
        ],
        "title": "Rate Limit Retry Schedule Minutes",
        "ui_order": 70,
--- a/config/settings.standalone.example.json
+++ b/config/settings.standalone.example.json
@ -27,6 +27,7 @@
  "transcribe": {
    "provider": "groq",
    "groq_api_key": "",
+    "groq_api_keys": [],
    "ffmpeg_bin": "ffmpeg",
    "max_file_size_mb": 23
  },
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,74 @@
+services:
+  api:
+    build:
+      context: .
+      args:
+        HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
+        HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
+        ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
+        NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
+        http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
+        https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
+        all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
+        no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
+    image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
+    command: ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
+    env_file:
+      - path: .env
+        required: false
+    environment:
+      TZ: ${TZ:-Asia/Shanghai}
+      BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
+      BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
+      BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
+      BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
+      BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
+      BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
+      BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
+      BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
+      BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
+    ports:
+      - "${BILIUP_NEXT_PORT:-8000}:8000"
+    volumes:
+      - ./config:/app/config
+      - ./runtime:/app/runtime
+      - ./data/workspace:/app/data/workspace
+      - ./runtime/codex:/root/.codex
+    restart: unless-stopped
+
+  worker:
+    image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
+    build:
+      context: .
+      args:
+        HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
+        HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
+        ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
+        NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
+        http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
+        https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
+        all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
+        no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
+    command: ["sh", "-c", "biliup-next worker --interval ${WORKER_INTERVAL:-5}"]
+    env_file:
+      - path: .env
+        required: false
+    environment:
+      TZ: ${TZ:-Asia/Shanghai}
+      BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
+      BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
+      BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
+      BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
+      BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
+      BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
+      BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
+      BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
+      BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
+    volumes:
+      - ./config:/app/config
+      - ./runtime:/app/runtime
+      - ./data/workspace:/app/data/workspace
+      - ./runtime/codex:/root/.codex
+    restart: unless-stopped
+    depends_on:
+      - api
--- a/docs/config-system.md
+++ b/docs/config-system.md
@ -155,6 +155,60 @@ User edits config
 - `base_delay_seconds`
 - `poll_interval_seconds`

+## Upload And Comment Templates
+
+`paths.upload_config_file` 指向 `runtime/upload_config.json`。这个文件不只控制 `biliup upload` 的标题、简介、动态和标签，也控制 B 站置顶评论格式。
+
+投稿字段在 `template` 中：
+
+```json
+{
+  "template": {
+    "title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
+    "description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版：{current_full_video_link}\n上次直播：{previous_full_video_link}",
+    "tag": "可爱,王海颖,唱歌,音乐",
+    "dynamic": "{streamer} {date} 歌曲纯享版已发布。\n直播完整版：{current_full_video_link}"
+  }
+}
+```
+
+评论字段在 `comment_template` 中：
+
+```json
+{
+  "comment_template": {
+    "split_header": "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n上次纯享：{previous_pure_video_link} （上一场歌曲纯享版）",
+    "full_header": "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n上次完整版：{previous_full_video_link} （上一场完整录播）",
+    "split_part_header": "P{part_index}:",
+    "full_part_header": "P{part_index}:",
+    "split_song_line": "{song_index}. {title}{artist_suffix}",
+    "split_text_song_line": "{song_index}. {song_text}",
+    "full_timeline_line": "{song_index}. {line_text}"
+  }
+}
+```
+
+可用变量：
+
+- `streamer`：主播名。
+- `date`：从文件名解析出来的日期和时间。
+- `song_count`：识别到的歌曲数量。
+- `songs_list`：`songs.txt` 原始歌单内容。
+- `daily_quote` / `quote_author`：随机引用文本。
+- `current_full_video_bvid` / `current_full_video_link`：本场直播完整版 BV 和链接。
+- `current_pure_video_bvid` / `current_pure_video_link`：本场歌曲纯享版 BV 和链接。
+- `previous_full_video_bvid` / `previous_full_video_link`：上一场直播完整版 BV 和链接。
+- `previous_pure_video_bvid` / `previous_pure_video_link`：上一场歌曲纯享版 BV 和链接。
+- `part_index`：评论中的 `P1/P2/P3` 分段序号。
+- `song_index`：全局歌曲序号。
+- `title` / `artist` / `artist_suffix`：从 `songs.json` 生成纯享歌单时使用。
+- `song_text`：从 `songs.txt` 兜底生成纯享歌单时使用，通常不含时间戳。
+- `line_text`：完整版时间轴的原始行，通常包含时间戳。
+
+评论头部模板有一条额外规则：如果某一行包含空链接变量，例如 `{previous_full_video_link}` 为空，这一整行会自动跳过，避免发出空链接提示。
+
+Docker 部署时 `./runtime` 是宿主机挂载目录。镜像更新不会覆盖已有 `runtime/upload_config.json`，因此调整文案或评论格式时应修改宿主机上的这个文件，然后重启容器。
+
 ### collection

 - `enabled`
--- a/docs/domain-model.md
+++ b/docs/domain-model.md
@ -75,7 +75,7 @@
  "platform": "bilibili",
  "aid": 123456,
  "bvid": "BV1xxxx",
-  "title": "【王海颖 (歌曲纯享版)】_03月29日 22时02分 共18首歌",
+  "title": "【王海颖 (歌曲纯享版)】 03月29日 22时02分 共18首歌",
  "published_at": "2026-03-30T07:56:13+08:00"
 }
 ```
--- a/docs/publish-output-examples.md
+++ b/docs/publish-output-examples.md
@ -0,0 +1,321 @@
+# 发布输出示例与流程说明
+
+本文档面向使用者说明 `biliup-next` 的主流程、输入输出、当前已实现功能，以及一次多段同场直播发布后的示例文案。
+
+## 项目功能
+
+`biliup-next` 将一场直播录播拆成两个最终发布目标：
+
+- 直播完整版：由外部流程或人工上传到 B 站，本项目负责记录/绑定它的 BV 号，并给它补充置顶时间轴评论、加入完整版合集。
+- 歌曲纯享版：由本项目从直播录播中识别歌曲、切出歌曲片段、合并发布为一个分 P 视频，并给它补充置顶歌单评论、加入纯享版合集。
+
+当前主链路：
+
+```text
+stage 输入视频
+  -> ingest 导入并归并 session
+  -> transcribe 语音转字幕
+  -> song_detect 识别歌曲
+  -> split 切出歌曲片段
+  -> publish 发布歌曲纯享版
+  -> comment 发布/置顶评论
+  -> collection 加入合集
+```
+
+## 输入
+
+最常见输入是把录播视频放入 `data/workspace/stage/`。
+
+支持的形式：
+
+- 单个视频文件：一场直播只有一个录播文件。
+- 多个视频文件：同一场直播被分成多段录播文件。
+- 浏览器上传：通过控制台上传到 stage。
+- 本机复制：通过控制台把服务器上的文件复制到 stage。
+
+输入文件名会用于推测主播和直播开始时间，例如：
+
+```text
+王海颖唱歌录播 04月19日 22时10分.mp4
+王海颖唱歌录播 04月19日 23时05分.mp4
+王海颖唱歌录播 04月20日 00时01分.mp4
+```
+
+## Session 归并
+
+同一主播、时间接近的多个录播片段会归入同一个 session。
+
+同一 session 的行为：
+
+- 只发布一个歌曲纯享版 BV。
+- 多段录播的歌曲会按时间顺序聚合。
+- 评论按 `P1`、`P2`、`P3` 分段展示。
+- 歌曲序号全局递增，不在每个 P 内重新从 1 开始。
+
+示例：
+
+```text
+P1:
+1. 程艾影 — 赵雷
+2. 钟无艳 — 谢安琪
+
+P2:
+3. 慢慢喜欢你 — 莫文蔚
+
+P3:
+4. 空白格 — 蔡健雅
+```
+
+## BV 获取
+
+### 歌曲纯享版 BV
+
+歌曲纯享版由本项目调用 `biliup upload` 发布。
+
+发布成功后，项目会从 `biliup` 输出中提取 BV 号，并写入当前 session 目录：
+
+```text
+bvid.txt
+```
+
+这个 BV 会用于：
+
+- 纯享版评论发布。
+- 完整版评论顶部反向链接。
+- 纯享版合集同步。
+
+### 直播完整版 BV
+
+完整版 BV 可以来自三种方式：
+
+- 控制台手动绑定。
+- API/webhook 传入。
+- `biliup list` 标题匹配。
+
+`biliup list` 会同时接受 `开放浏览` 和 `审核中` 状态。完整版视频只要上传后生成了 BV，即使仍在审核中，也可以被写入纯享版简介、动态和评论互链。
+
+成功解析后会写入：
+
+```text
+full_video_bvid.txt
+```
+
+默认标题匹配是保守的精确匹配：会先去掉空格、标点、括号、冒号等，只保留中文、英文、数字，再比较标题是否相等。
+
+如果 `allow_fuzzy_full_video_match=false`，不会做包含式模糊匹配。为了避免误匹配，推荐在完整版上传完成后手动绑定 BV。
+
+## 示例场景
+
+假设本次直播由三段录播组成：
+
+```text
+王海颖唱歌录播 04月19日 22时10分
+王海颖唱歌录播 04月19日 23时05分
+王海颖唱歌录播 04月20日 00时01分
+```
+
+假设 BV 绑定结果如下：
+
+```text
+本次直播完整版：BVFULLCURR
+本次歌曲纯享版：BVPURECURR
+上次直播完整版：BVFULLPREV
+```
+
+假设识别出的歌曲如下：
+
+```text
+P1:
+00:06:32 程艾影 — 赵雷
+00:14:45 钟无艳 — 谢安琪
+
+P2:
+00:20:57 慢慢喜欢你 — 莫文蔚
+
+P3:
+00:27:16 空白格 — 蔡健雅
+```
+
+## 歌曲纯享版标题
+
+当前模板：
+
+```text
+【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌
+```
+
+示例：
+
+```text
+【王海颖 (歌曲纯享版)】 04月19日 22时10分 共4首歌
+```
+
+## 歌曲纯享版简介
+
+当前模板会保持简介较短，完整歌单放到置顶评论中，避免 B 站简介截断。
+
+示例：
+
+```text
+王海颖 04月19日 22时10分 歌曲纯享版。
+
+完整歌单与时间轴见置顶评论。
+直播完整版：https://www.bilibili.com/video/BVFULLCURR
+上次直播：https://www.bilibili.com/video/BVFULLPREV
+
+本视频为歌曲纯享切片，适合只听歌曲。
+```
+
+如果某个链接暂时没有 BV，项目会自动移除对应的空链接行。
+
+## 歌曲纯享版动态
+
+示例：
+
+```text
+王海颖 04月19日 22时10分 歌曲纯享版已发布。完整歌单见置顶评论。
+直播完整版：https://www.bilibili.com/video/BVFULLCURR
+上次直播：https://www.bilibili.com/video/BVFULLPREV
+```
+
+## 歌曲纯享版置顶评论
+
+纯享版评论主要给听歌用户看，不带歌曲时间轴，只展示歌名、歌手和互链。
+
+默认由 `runtime/upload_config.json` 的 `comment_template.split_header`、`comment_template.split_part_header`、`comment_template.split_song_line` 生成。
+
+示例：
+
+```text
+当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。
+直播完整版：https://www.bilibili.com/video/BVFULLCURR （完整录播，含聊天/互动/完整流程）
+上次纯享：https://www.bilibili.com/video/BVPUREPREV （上一场歌曲纯享版）
+
+P1:
+1. 程艾影 — 赵雷
+2. 钟无艳 — 谢安琪
+
+P2:
+3. 慢慢喜欢你 — 莫文蔚
+
+P3:
+4. 空白格 — 蔡健雅
+```
+
+## 直播完整版置顶评论
+
+完整版评论主要给看完整录播的用户跳转歌曲纯享版，并提供完整时间轴。
+
+默认由 `runtime/upload_config.json` 的 `comment_template.full_header`、`comment_template.full_part_header`、`comment_template.full_timeline_line` 生成。
+
+示例：
+
+```text
+当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。
+歌曲纯享版：https://www.bilibili.com/video/BVPURECURR （只听歌曲看这里）
+上次完整版：https://www.bilibili.com/video/BVFULLPREV （上一场完整录播）
+
+P1:
+1. 00:06:32 程艾影 — 赵雷
+2. 00:14:45 钟无艳 — 谢安琪
+
+P2:
+3. 00:20:57 慢慢喜欢你 — 莫文蔚
+
+P3:
+4. 00:27:16 空白格 — 蔡健雅
+```
+
+## 评论格式配置
+
+评论格式可以像标题、简介、动态一样通过 `runtime/upload_config.json` 修改：
+
+```json
+"comment_template": {
+  "split_header": "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n上次纯享：{previous_pure_video_link} （上一场歌曲纯享版）",
+  "full_header": "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n上次完整版：{previous_full_video_link} （上一场完整录播）",
+  "split_part_header": "P{part_index}:",
+  "full_part_header": "P{part_index}:",
+  "split_song_line": "{song_index}. {title}{artist_suffix}",
+  "split_text_song_line": "{song_index}. {song_text}",
+  "full_timeline_line": "{song_index}. {line_text}"
+}
+```
+
+字段含义：
+
+- `split_header`：纯享版评论顶部说明。
+- `full_header`：完整版评论顶部说明。
+- `split_part_header` / `full_part_header`：多片段 session 的分段标题，例如 `P1:`。
+- `split_song_line`：从 `songs.json` 生成纯享歌单时的单行格式。
+- `split_text_song_line`：`songs.json` 不可用时，从 `songs.txt` 兜底生成纯享歌单的单行格式。
+- `full_timeline_line`：完整版时间轴评论的单行格式。
+
+常用变量：
+
+- `{current_full_video_link}`：本场直播完整版链接。
+- `{current_pure_video_link}`：本场歌曲纯享版链接。
+- `{previous_full_video_link}`：上一场直播完整版链接。
+- `{previous_pure_video_link}`：上一场歌曲纯享版链接。
+- `{part_index}`：P 分段序号。
+- `{song_index}`：歌曲全局序号。
+- `{title}` / `{artist}` / `{artist_suffix}`：歌曲标题、歌手、带分隔符的歌手后缀。
+- `{song_text}`：不带时间戳的歌曲文本。
+- `{line_text}`：原始时间轴行，通常包含时间戳。
+
+如果评论头部某一行包含空链接变量，例如 `{previous_full_video_link}` 为空，这一整行会自动省略。
+
+## 合集同步
+
+项目维护两个合集目标：
+
+- 合集 A：直播完整版。
+- 合集 B：歌曲纯享版。
+
+当前配置中的示例 ID：
+
+```text
+直播完整版合集：7196643
+歌曲纯享版合集：7196624
+```
+
+合集同步完成后，如果启用了清理策略，项目可以删除本地原视频或切片视频以节省空间。当前默认不删除。
+
+## 幂等与重试
+
+项目会在 session 目录写入标记文件，避免重复上传和重复评论。
+
+常见标记：
+
+```text
+bvid.txt
+full_video_bvid.txt
+upload_done.flag
+comment_split_done.flag
+comment_full_done.flag
+collection_a_done.flag
+collection_b_done.flag
+```
+
+发布阶段的关键行为：
+
+- 首批最多上传 5 个分 P。
+- 超过 5 个分 P 时，后续通过 append 追加。
+- 已经写入 `bvid.txt` 后，重试会优先 append 到已有视频，而不是重新发布。
+- `publish_progress.json` 记录 append 进度，避免重试时重复追加已完成批次。
+
+评论阶段的关键行为：
+
+- 同一 session 只由最早片段负责聚合评论。
+- 非 anchor 片段进入评论步骤时会跳过实际发评。
+- 这样可以避免同一场直播的多个片段重复发布相同评论。
+
+## 使用建议
+
+发布前建议确认：
+
+- stage 中的视频文件名能解析出主播和时间。
+- `runtime/upload_config.json` 中标题、简介、动态符合预期。
+- 完整版上传完成后，尽量手动绑定 `full_video_bvid`。
+- worker 重启前确认已有 `bvid.txt` 和 `publish_progress.json` 是否符合当前发布进度。
+- 如需自动匹配完整版 BV，确认 `biliup list` 中完整视频标题与任务标题标准化后相等。
--- a/runtime/README.md
+++ b/runtime/README.md
@ -25,3 +25,11 @@ cd /home/theshy/biliup/biliup-next
 - `upload_config.json` <- `upload_config.example.json`

 它们只用于占位，能保证项目进入“可配置、可 doctor”的状态，但不代表上传链路已经可用。
+
+`upload_config.json` 同时控制：
+
+- 纯享版投稿标题、简介、动态、标签：`template`
+- 纯享版和完整版置顶评论格式：`comment_template`
+- 文件名解析规则：`filename_patterns`
+
+Docker 部署时这个目录通常会作为 `./runtime:/app/runtime` 挂载到容器内。镜像更新不会覆盖已有 `upload_config.json`，所以修改评论、动态、简介格式时，应直接改宿主机上的 `runtime/upload_config.json`。
--- a/runtime/upload_config.example.json
+++ b/runtime/upload_config.example.json
@ -1,5 +1,95 @@
 {
-  "line": "AUTO",
-  "limit": 3,
-  "threads": 3
+  "comment": "B站投稿配置文件 - 根据您的需要修改模板内容",
+  "upload_settings": {
+    "tid": 31,
+    "copyright": 1,
+    "source": "王海颖好听的歌声分享",
+    "cover": ""
+  },
+  "template": {
+    "title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
+    "description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版：{current_full_video_link}\n上次直播：{previous_full_video_link}\n\n本视频为歌曲纯享切片，适合只听歌曲。",
+    "tag": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐",
+    "dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版：{current_full_video_link}\n上次直播：{previous_full_video_link}"
+  },
+  "comment_template": {
+    "split_header": "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n上次纯享：{previous_pure_video_link} （上一场歌曲纯享版）",
+    "full_header": "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n上次完整版：{previous_full_video_link} （上一场完整录播）",
+    "split_part_header": "P{part_index}:",
+    "full_part_header": "P{part_index}:",
+    "split_song_line": "{song_index}. {title}{artist_suffix}",
+    "split_text_song_line": "{song_index}. {song_text}",
+    "full_timeline_line": "{song_index}. {line_text}"
+  },
+  "streamers": {
+    "王海颖": {
+      "display_name": "王海颖",
+      "tags": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐"
+    },
+    "示例主播": {
+      "display_name": "示例主播",
+      "tags": "示例,标签1,标签2,唱歌,音乐"
+    }
+  },
+  "quotes": [
+    {
+      "text": "此心安处是吾乡。",
+      "author": "苏轼《定风波·南海归赠王定国侍人寓娘》"
+    },
+    {
+      "text": "山重水复疑无路，柳暗花明又一村。",
+      "author": "陆游《游山西村》"
+    },
+    {
+      "text": "长风破浪会有时，直挂云帆济沧海。",
+      "author": "李白《行路难·其一》"
+    }
+  ],
+  "filename_patterns": {
+    "comment": "从文件名提取信息的正则表达式模式 - 按优先级从高到低排列",
+    "patterns": [
+      {
+        "name": "主播名唱歌录播 日期 时间",
+        "regex": "^(?P<streamer>.+?)唱歌录播 (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分",
+        "date_format": "{month}月{day}日 {hour}时{minute}分",
+        "example": "王海颖唱歌录播 01月28日 22时06分"
+      },
+      {
+        "name": "日期 时间 主播名 唱歌录播",
+        "regex": "^(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 (?P<streamer>.+?)唱歌录播",
+        "date_format": "{month}月{day}日 {hour}时{minute}分",
+        "example": "01月25日 09时20分 王海颖唱歌录播"
+      },
+      {
+        "name": "主播名唱歌录播： 年月日 时分 [BV号]",
+        "regex": "^(?P<streamer>.+?)唱歌录播[：:] (?P<year>\\d{4})年(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
+        "date_format": "{month}月{day}日 {hour}时{minute}分",
+        "example": "王海颖唱歌录播： 2026年01月22日 22时09分 [BV1wEzcBqEhW]"
+      },
+      {
+        "name": "主播名 日期 时分 [BV号]",
+        "regex": "^(?P<streamer>.+?) (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})点(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
+        "date_format": "{month}月{day}日 {hour}点{minute}分",
+        "example": "王海颖 01月25日 02点24分 [BV1KCzQBpEXC]"
+      },
+      {
+        "name": "主播名_日期",
+        "regex": "^(?P<streamer>.+?)_(?P<date>\\d{1,2}月\\d{1,2}日)",
+        "date_format": "{date}",
+        "example": "王海颖_1月20日"
+      },
+      {
+        "name": "主播名_完整日期",
+        "regex": "^(?P<streamer>.+?)_(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})",
+        "date_format": "{month}月{day}日",
+        "example": "王海颖_2026-01-20"
+      },
+      {
+        "name": "主播名_描述",
+        "regex": "^(?P<streamer>.+?)_(?P<desc>.+)",
+        "date_format": "{desc}",
+        "example": "测试搬运_前15分钟"
+      }
+    ]
+  }
 }
--- a/scripts/init-docker-config.sh
+++ b/scripts/init-docker-config.sh
@ -0,0 +1,28 @@
+#!/usr/bin/env sh
+set -eu
+
+mkdir -p config runtime/codex data/workspace/stage data/workspace/session data/workspace/backup
+
+if [ ! -f .env ]; then
+  cp .env.example .env
+  echo "created .env from .env.example"
+fi
+
+if [ ! -f config/settings.json ]; then
+  cp config/settings.docker.example.json config/settings.json
+  echo "created config/settings.json from config/settings.docker.example.json"
+fi
+
+if [ ! -f runtime/cookies.json ]; then
+  cp runtime/cookies.example.json runtime/cookies.json
+  echo "created runtime/cookies.json placeholder"
+fi
+
+if [ ! -f runtime/upload_config.json ]; then
+  cp runtime/upload_config.example.json runtime/upload_config.json
+  echo "created runtime/upload_config.json placeholder"
+fi
+
+if [ ! -x runtime/biliup ]; then
+  echo "warning: runtime/biliup is missing or not executable; publish will fail until you provide it" >&2
+fi
--- a/src/biliup_next.egg-info/PKG-INFO
+++ b/src/biliup_next.egg-info/PKG-INFO
@ -4,3 +4,4 @@ Version: 0.1.0
 Summary: Next-generation control-plane-first biliup pipeline
 Requires-Python: >=3.11
 Requires-Dist: requests>=2.32.0
+Requires-Dist: groq>=0.18.0
--- a/src/biliup_next.egg-info/SOURCES.txt
+++ b/src/biliup_next.egg-info/SOURCES.txt
@ -10,7 +10,19 @@ src/biliup_next.egg-info/top_level.txt
 src/biliup_next/app/api_server.py
 src/biliup_next/app/bootstrap.py
 src/biliup_next/app/cli.py
+src/biliup_next/app/control_plane_get_dispatcher.py
+src/biliup_next/app/control_plane_post_dispatcher.py
 src/biliup_next/app/dashboard.py
+src/biliup_next/app/retry_meta.py
+src/biliup_next/app/scheduler.py
+src/biliup_next/app/serializers.py
+src/biliup_next/app/session_delivery_service.py
+src/biliup_next/app/task_actions.py
+src/biliup_next/app/task_audit.py
+src/biliup_next/app/task_control_service.py
+src/biliup_next/app/task_engine.py
+src/biliup_next/app/task_policies.py
+src/biliup_next/app/task_runner.py
 src/biliup_next/app/worker.py
 src/biliup_next/core/config.py
 src/biliup_next/core/errors.py
@ -18,25 +30,56 @@ src/biliup_next/core/models.py
 src/biliup_next/core/providers.py
 src/biliup_next/core/registry.py
 src/biliup_next/infra/db.py
+src/biliup_next/infra/legacy_asset_sync.py
 src/biliup_next/infra/log_reader.py
 src/biliup_next/infra/plugin_loader.py
 src/biliup_next/infra/runtime_doctor.py
 src/biliup_next/infra/stage_importer.py
+src/biliup_next/infra/storage_guard.py
 src/biliup_next/infra/systemd_runtime.py
 src/biliup_next/infra/task_repository.py
 src/biliup_next/infra/task_reset.py
+src/biliup_next/infra/workspace_cleanup.py
+src/biliup_next/infra/workspace_paths.py
+src/biliup_next/infra/adapters/bilibili_api.py
+src/biliup_next/infra/adapters/biliup_cli.py
+src/biliup_next/infra/adapters/codex_cli.py
 src/biliup_next/infra/adapters/full_video_locator.py
+src/biliup_next/infra/adapters/qwen_cli.py
+src/biliup_next/infra/adapters/yt_dlp.py
 src/biliup_next/modules/collection/service.py
 src/biliup_next/modules/collection/providers/bilibili_collection.py
 src/biliup_next/modules/comment/service.py
 src/biliup_next/modules/comment/providers/bilibili_top_comment.py
 src/biliup_next/modules/ingest/service.py
+src/biliup_next/modules/ingest/providers/bilibili_url.py
 src/biliup_next/modules/ingest/providers/local_file.py
 src/biliup_next/modules/publish/service.py
 src/biliup_next/modules/publish/providers/biliup_cli.py
 src/biliup_next/modules/song_detect/service.py
 src/biliup_next/modules/song_detect/providers/codex.py
+src/biliup_next/modules/song_detect/providers/common.py
+src/biliup_next/modules/song_detect/providers/qwen_cli.py
 src/biliup_next/modules/split/service.py
 src/biliup_next/modules/split/providers/ffmpeg_copy.py
 src/biliup_next/modules/transcribe/service.py
 src/biliup_next/modules/transcribe/providers/groq.py
+tests/test_api_server.py
+tests/test_bilibili_top_comment_provider.py
+tests/test_biliup_cli_publish_provider.py
+tests/test_control_plane_get_dispatcher.py
+tests/test_control_plane_post_dispatcher.py
+tests/test_ingest_bilibili_url.py
+tests/test_ingest_session_grouping.py
+tests/test_publish_service.py
+tests/test_retry_meta.py
+tests/test_serializers.py
+tests/test_session_delivery_service.py
+tests/test_settings_service.py
+tests/test_song_detect_providers.py
+tests/test_task_actions.py
+tests/test_task_control_service.py
+tests/test_task_engine.py
+tests/test_task_policies.py
+tests/test_task_repository_sqlite.py
+tests/test_task_runner.py
--- a/src/biliup_next.egg-info/requires.txt
+++ b/src/biliup_next.egg-info/requires.txt
@ -1 +1,2 @@
 requests>=2.32.0
+groq>=0.18.0
--- a/src/biliup_next/app/retry_meta.py
+++ b/src/biliup_next/app/retry_meta.py
@ -3,6 +3,8 @@ from __future__ import annotations
 from datetime import datetime, timedelta, timezone

 STEP_SETTINGS_GROUP = {
+    "transcribe": "transcribe",
+    "song_detect": "song_detect",
    "publish": "publish",
    "comment": "comment",
 }
@ -54,6 +56,26 @@ def publish_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
    )


+def transcribe_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
+    return retry_schedule_seconds(
+        settings,
+        count_key="retry_count",
+        backoff_key="retry_backoff_seconds",
+        default_count=3,
+        default_backoff=300,
+    )
+
+
+def song_detect_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
+    return retry_schedule_seconds(
+        settings,
+        count_key="retry_count",
+        backoff_key="retry_backoff_seconds",
+        default_count=3,
+        default_backoff=300,
+    )
+
+
 def comment_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
    return retry_schedule_seconds(
        settings,
@ -77,7 +99,11 @@ def retry_meta_for_step(step, settings_by_group: dict[str, object]) -> dict[str,
    if not isinstance(group_settings, dict):
        group_settings = {}

-    if step_name == "publish":
+    if step_name == "transcribe":
+        schedule = transcribe_retry_schedule_seconds(group_settings)
+    elif step_name == "song_detect":
+        schedule = song_detect_retry_schedule_seconds(group_settings)
+    elif step_name == "publish":
        schedule = publish_retry_schedule_seconds(group_settings)
    elif step_name == "comment":
        schedule = comment_retry_schedule_seconds(group_settings)
--- a/src/biliup_next/app/task_engine.py
+++ b/src/biliup_next/app/task_engine.py
@ -52,7 +52,16 @@ def infer_error_step_name(task, steps: dict[str, object]) -> str:  # type: ignor
 def retry_wait_payload(task_id: str, step, state: dict[str, object]) -> dict[str, object] | None:  # type: ignore[no-untyped-def]
    if step.status != "failed_retryable":
        return None
-    meta = retry_meta_for_step(step, {"publish": settings_for(state, "publish")})
+    step_settings_group = {
+        "transcribe": "transcribe",
+        "song_detect": "song_detect",
+        "publish": "publish",
+        "comment": "comment",
+    }.get(step.step_name)
+    settings_by_group = {}
+    if step_settings_group is not None and step_settings_group in state["settings"]:
+        settings_by_group[step_settings_group] = settings_for(state, step_settings_group)
+    meta = retry_meta_for_step(step, settings_by_group)
    if meta is None or meta["retry_due"]:
        return None
    return {
--- a/src/biliup_next/app/task_policies.py
+++ b/src/biliup_next/app/task_policies.py
@ -2,6 +2,8 @@ from __future__ import annotations

 from biliup_next.app.retry_meta import comment_retry_schedule_seconds
 from biliup_next.app.retry_meta import publish_retry_schedule_seconds
+from biliup_next.app.retry_meta import song_detect_retry_schedule_seconds
+from biliup_next.app.retry_meta import transcribe_retry_schedule_seconds
 from biliup_next.app.task_engine import infer_error_step_name, settings_for as task_engine_settings_for
 from biliup_next.core.models import utc_now_iso

@ -35,6 +37,18 @@ def resolve_failure(task, repo, state: dict[str, object], exc) -> dict[str, obje
    next_retry_count = current_retry + 1
    next_status = "failed_retryable" if exc.retryable else "failed_manual"
    next_retry_delay_seconds: int | None = None
+    if exc.retryable and step_name == "transcribe":
+        schedule = transcribe_retry_schedule_seconds(settings_for(state, "transcribe"))
+        if next_retry_count > len(schedule):
+            next_status = "failed_manual"
+        else:
+            next_retry_delay_seconds = schedule[next_retry_count - 1]
+    if exc.retryable and step_name == "song_detect":
+        schedule = song_detect_retry_schedule_seconds(settings_for(state, "song_detect"))
+        if next_retry_count > len(schedule):
+            next_status = "failed_manual"
+        else:
+            next_retry_delay_seconds = schedule[next_retry_count - 1]
    if exc.retryable and step_name == "publish":
        publish_settings = settings_for(state, "publish")
        if exc.code == "PUBLISH_RATE_LIMITED":
--- a/src/biliup_next/core/config.py
+++ b/src/biliup_next/core/config.py
@ -1,6 +1,7 @@
 from __future__ import annotations

 import json
+import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
@ -32,6 +33,7 @@ class SettingsService:
        schema = self._read_json(self.schema_path)
        settings = self._read_json(self.settings_path)
        settings = self._apply_schema_defaults(settings, schema)
+        settings = self._apply_env_overrides(settings, schema)
        settings = self._normalize_paths(settings)
        self.validate(settings, schema)
        return SettingsBundle(schema=schema, settings=settings)
@ -125,6 +127,57 @@ class SettingsService:
                group_value[field_name] = self._clone_default(field_schema["default"])
        return merged

+    def _apply_env_overrides(self, settings: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
+        merged = json.loads(json.dumps(settings))
+        aliases = {
+            ("transcribe", "groq_api_key"): ["GROQ_API_KEY"],
+            ("transcribe", "groq_api_keys"): ["GROQ_API_KEYS"],
+            ("collection", "season_id_a"): ["COLLECTION_SEASON_ID_A"],
+            ("collection", "season_id_b"): ["COLLECTION_SEASON_ID_B"],
+        }
+        for group_name, fields in schema.get("groups", {}).items():
+            group_value = merged.setdefault(group_name, {})
+            if not isinstance(group_value, dict):
+                continue
+            for field_name, field_schema in fields.items():
+                env_names = [
+                    f"BILIUP_NEXT__{group_name}__{field_name}".upper(),
+                    f"BILIUP_NEXT_{group_name}_{field_name}".upper(),
+                    *aliases.get((group_name, field_name), []),
+                ]
+                raw_value = self._first_env_value(env_names)
+                if raw_value is None:
+                    continue
+                group_value[field_name] = self._parse_env_value(raw_value, field_schema)
+        return merged
+
+    @staticmethod
+    def _first_env_value(names: list[str]) -> str | None:
+        for name in names:
+            value = os.environ.get(name)
+            if value:
+                return value
+        return None
+
+    @staticmethod
+    def _parse_env_value(value: str, field_schema: dict[str, Any]) -> Any:
+        expected = field_schema.get("type")
+        if expected == "integer":
+            return int(value)
+        if expected == "boolean":
+            normalized = value.strip().lower()
+            if normalized in {"1", "true", "yes", "on"}:
+                return True
+            if normalized in {"0", "false", "no", "off"}:
+                return False
+            raise ConfigError(f"无法解析布尔环境变量值: {value}")
+        if expected == "array":
+            stripped = value.strip()
+            if stripped.startswith("["):
+                return json.loads(stripped)
+            return [item.strip() for item in value.split(",") if item.strip()]
+        return value
+
    @staticmethod
    def _clone_default(value: Any) -> Any:
        return json.loads(json.dumps(value))
--- a/src/biliup_next/infra/adapters/codex_cli.py
+++ b/src/biliup_next/infra/adapters/codex_cli.py
@ -1,5 +1,6 @@
 from __future__ import annotations

+import os
 import subprocess
 from pathlib import Path

@ -18,9 +19,7 @@ class CodexCliAdapter:
            codex_cmd,
            "exec",
            prompt.replace("\n", " "),
-            "--full-auto",
-            "--sandbox",
-            "workspace-write",
+            "--dangerously-bypass-approvals-and-sandbox",
            "--output-schema",
            "./song_schema.json",
            "-o",
@ -35,6 +34,7 @@ class CodexCliAdapter:
                capture_output=True,
                text=True,
                check=False,
+                env=self._subprocess_env(),
            )
        except FileNotFoundError as exc:
            raise ModuleError(
@ -42,3 +42,12 @@ class CodexCliAdapter:
                message=f"找不到 codex 命令: {codex_cmd}",
                retryable=False,
            ) from exc
+
+    @staticmethod
+    def _subprocess_env() -> dict[str, str]:
+        env = os.environ.copy()
+        for key in ("HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"):
+            value = env.get(key)
+            if value and "://" not in value:
+                env[key] = f"http://{value}"
+        return env
--- a/src/biliup_next/infra/adapters/full_video_locator.py
+++ b/src/biliup_next/infra/adapters/full_video_locator.py
@ -8,6 +8,9 @@ from typing import Any
 from biliup_next.core.errors import ModuleError


+VISIBLE_BILIUP_LIST_STATES = {"开放浏览", "审核中"}
+
+
 def normalize_title(text: str) -> str:
    return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()

@ -38,7 +41,7 @@ def fetch_biliup_list(settings: dict[str, Any], *, max_pages: int = 5) -> list[d
        if not line.startswith("BV"):
            continue
        parts = line.split("\t")
-        if len(parts) >= 3 and "开放浏览" not in parts[2]:
+        if len(parts) >= 3 and not any(state in parts[2] for state in VISIBLE_BILIUP_LIST_STATES):
            continue
        if len(parts) >= 2:
            videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
--- a/src/biliup_next/infra/task_reset.py
+++ b/src/biliup_next/infra/task_reset.py
@ -115,7 +115,6 @@ class TaskResetService:
                work_dir / "comment_full_done.flag",
                work_dir / "collection_a_done.flag",
                work_dir / "collection_b_done.flag",
-                work_dir / "bvid.txt",
            ],
            "comment": [
                work_dir / "comment_done.flag",
--- a/src/biliup_next/infra/video_links.py
+++ b/src/biliup_next/infra/video_links.py
@ -0,0 +1,199 @@
+from __future__ import annotations
+
+from datetime import datetime
+from pathlib import Path
+import re
+from typing import Any
+
+from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list, resolve_full_video_bvid
+from biliup_next.infra.workspace_paths import resolve_task_work_dir
+
+
+def bilibili_video_url(bvid: str | None) -> str:
+    bvid = (bvid or "").strip()
+    return f"https://www.bilibili.com/video/{bvid}" if bvid.startswith("BV") else ""
+
+
+def read_task_split_bvid(task: Any) -> str:
+    path = resolve_task_work_dir(task) / "bvid.txt"
+    return _read_bvid(path)
+
+
+def read_task_full_bvid(task: Any, context: Any | None = None) -> str:
+    if context is not None and getattr(context, "full_video_bvid", None):
+        return str(context.full_video_bvid).strip()
+    path = resolve_task_work_dir(task) / "full_video_bvid.txt"
+    return _read_bvid(path)
+
+
+def link_context_for_task(task: Any, repo: Any | None, settings: dict[str, Any] | None = None) -> dict[str, str]:
+    context = _get_context(repo, task.id)
+    full_bvid = read_task_full_bvid(task, context)
+    if not full_bvid:
+        full_bvid = resolve_current_full_video_bvid(task, settings)
+    split_bvid = read_task_split_bvid(task)
+    previous = previous_live_links(task, repo, context, settings)
+    return {
+        "current_full_video_bvid": full_bvid,
+        "current_full_video_link": bilibili_video_url(full_bvid),
+        "current_pure_video_bvid": split_bvid,
+        "current_pure_video_link": bilibili_video_url(split_bvid),
+        "previous_full_video_bvid": previous.get("previous_full_video_bvid", ""),
+        "previous_full_video_link": previous.get("previous_full_video_link", ""),
+        "previous_pure_video_bvid": previous.get("previous_pure_video_bvid", ""),
+        "previous_pure_video_link": previous.get("previous_pure_video_link", ""),
+    }
+
+
+def resolve_current_full_video_bvid(task: Any, settings: dict[str, Any] | None = None) -> str:
+    if not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
+        return ""
+    try:
+        return resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings) or ""
+    except Exception:
+        return ""
+
+
+def previous_live_links(
+    task: Any,
+    repo: Any | None,
+    context: Any | None = None,
+    settings: dict[str, Any] | None = None,
+) -> dict[str, str]:
+    context = context or _get_context(repo, task.id)
+    streamer = _context_streamer(context) or _parse_streamer_from_title(task.title)
+    if not streamer:
+        return {}
+
+    current_started = _parse_datetime(getattr(context, "segment_started_at", None)) if context is not None else None
+    if current_started is None:
+        current_started = _parse_title_datetime(task.title)
+    current_session_key = getattr(context, "session_key", None) if context is not None else None
+
+    previous: dict[str, str] = {}
+    if repo is not None and hasattr(repo, "find_recent_task_contexts") and hasattr(repo, "get_task"):
+        for candidate in repo.find_recent_task_contexts(streamer, limit=50):
+            if candidate.task_id == task.id:
+                continue
+            if current_session_key and getattr(candidate, "session_key", None) == current_session_key:
+                continue
+            candidate_started = _parse_datetime(getattr(candidate, "segment_started_at", None))
+            if current_started is not None and candidate_started is not None and candidate_started >= current_started:
+                continue
+            candidate_task = repo.get_task(candidate.task_id)
+            if candidate_task is None:
+                continue
+            full_bvid = read_task_full_bvid(candidate_task, candidate)
+            split_bvid = read_task_split_bvid(candidate_task)
+            if full_bvid or split_bvid:
+                previous = {
+                    "previous_full_video_bvid": full_bvid,
+                    "previous_full_video_link": bilibili_video_url(full_bvid),
+                    "previous_pure_video_bvid": split_bvid,
+                    "previous_pure_video_link": bilibili_video_url(split_bvid),
+                }
+                break
+    if not previous.get("previous_full_video_bvid") or not previous.get("previous_pure_video_bvid"):
+        listed_previous = _previous_live_from_biliup_list(streamer, current_started, settings)
+        for key, value in listed_previous.items():
+            if value and not previous.get(key):
+                previous[key] = value
+    return previous
+
+
+def _get_context(repo: Any | None, task_id: str) -> Any | None:
+    if repo is None or not hasattr(repo, "get_task_context"):
+        return None
+    return repo.get_task_context(task_id)
+
+
+def _context_streamer(context: Any | None) -> str:
+    if context is None:
+        return ""
+    return str(getattr(context, "streamer", "") or "").strip()
+
+
+def _read_bvid(path: Path) -> str:
+    if not path.exists():
+        return ""
+    bvid = path.read_text(encoding="utf-8").strip()
+    return bvid if bvid.startswith("BV") else ""
+
+
+def _parse_datetime(value: str | None) -> datetime | None:
+    if not value:
+        return None
+    try:
+        return datetime.fromisoformat(value)
+    except ValueError:
+        return None
+
+
+def _parse_title_datetime(title: str) -> datetime | None:
+    patterns = (
+        r"(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
+        r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
+    )
+    for pattern in patterns:
+        match = re.search(pattern, title)
+        if not match:
+            continue
+        data = match.groupdict()
+        year = int(data.get("year") or datetime.now().year)
+        try:
+            return datetime(year, int(data["month"]), int(data["day"]), int(data["hour"]), int(data["minute"]))
+        except ValueError:
+            return None
+    return None
+
+
+def _parse_streamer_from_title(title: str) -> str:
+    marker = "唱歌录播"
+    if marker in title:
+        return title.split(marker, 1)[0].strip()
+    return ""
+
+
+def _previous_live_from_biliup_list(
+    streamer: str,
+    current_started: datetime | None,
+    settings: dict[str, Any] | None,
+) -> dict[str, str]:
+    if current_started is None or not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
+        return {}
+    try:
+        videos = fetch_biliup_list(settings)
+    except Exception:
+        return {}
+
+    current_compare = current_started.replace(tzinfo=None)
+    full_candidates: list[tuple[datetime, str]] = []
+    pure_candidates: list[tuple[datetime, str]] = []
+    for video in videos:
+        title = video.get("title", "")
+        bvid = video.get("bvid", "")
+        if not bvid.startswith("BV"):
+            continue
+        if streamer not in title:
+            continue
+        started = _parse_title_datetime(title)
+        if started is not None and started > current_compare and "年" not in title:
+            started = started.replace(year=started.year - 1)
+        if started is None or started >= current_compare:
+            continue
+        if "纯享" in title:
+            pure_candidates.append((started, bvid))
+        elif "唱歌录播" in title:
+            full_candidates.append((started, bvid))
+
+    if not full_candidates and not pure_candidates:
+        return {}
+
+    full_bvid = max(full_candidates, key=lambda item: item[0])[1] if full_candidates else ""
+    pure_bvid = max(pure_candidates, key=lambda item: item[0])[1] if pure_candidates else ""
+    return {
+        "previous_full_video_bvid": full_bvid,
+        "previous_full_video_link": bilibili_video_url(full_bvid),
+        "previous_pure_video_bvid": pure_bvid,
+        "previous_pure_video_link": bilibili_video_url(pure_bvid),
+    }
--- a/src/biliup_next/infra/workspace_cleanup.py
+++ b/src/biliup_next/infra/workspace_cleanup.py
@ -1,6 +1,8 @@
 from __future__ import annotations

 import shutil
+from pathlib import Path
+from typing import Any

 from biliup_next.infra.task_repository import TaskRepository
 from biliup_next.infra.workspace_paths import resolve_task_work_dir
@ -11,35 +13,59 @@ class WorkspaceCleanupService:
        self.repo = repo

    def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
-        task = self.repo.get_task(task_id)
-        if task is None:
+        cleanup_tasks = self._cleanup_tasks(task_id)
+        if not cleanup_tasks:
            raise RuntimeError(f"task not found: {task_id}")

-        session_dir = resolve_task_work_dir(task)
        removed: list[str] = []
        skipped: list[str] = []
+        cleaned_task_ids: list[str] = []

-        if settings.get("delete_source_video_after_collection_synced", False):
-            source_path = Path(task.source_path).resolve()
-            try:
-                source_path.relative_to(session_dir)
-                source_managed = True
-            except ValueError:
-                source_managed = False
-            if source_path.exists() and source_managed:
-                source_path.unlink()
-                self.repo.delete_artifact_by_path(task_id, str(source_path.resolve()))
-                removed.append(str(source_path))
-            else:
-                skipped.append(str(source_path))
+        for task in cleanup_tasks:
+            session_dir = resolve_task_work_dir(task)
+            cleaned_task_ids.append(task.id)

-        if settings.get("delete_split_videos_after_collection_synced", False):
-            split_dir = session_dir / "split_video"
-            if split_dir.exists():
-                shutil.rmtree(split_dir, ignore_errors=True)
-                self.repo.delete_artifacts(task_id, "clip_video")
-                removed.append(str(split_dir))
-            else:
-                skipped.append(str(split_dir))
+            if settings.get("delete_source_video_after_collection_synced", False):
+                source_path = Path(task.source_path).resolve()
+                try:
+                    source_path.relative_to(session_dir)
+                    source_managed = True
+                except ValueError:
+                    source_managed = False
+                if source_path.exists() and source_managed:
+                    source_path.unlink()
+                    self.repo.delete_artifact_by_path(task.id, str(source_path.resolve()))
+                    removed.append(str(source_path))
+                else:
+                    skipped.append(str(source_path))

-        return {"removed": removed, "skipped": skipped}
+            if settings.get("delete_split_videos_after_collection_synced", False):
+                for video_dir_name in ("split_video", "publish_video"):
+                    video_dir = session_dir / video_dir_name
+                    if video_dir.exists():
+                        shutil.rmtree(video_dir, ignore_errors=True)
+                        removed.append(str(video_dir))
+                    else:
+                        skipped.append(str(video_dir))
+                self.repo.delete_artifacts(task.id, "clip_video")
+
+        return {"removed": removed, "skipped": skipped, "task_ids": cleaned_task_ids}
+
+    def _cleanup_tasks(self, task_id: str) -> list[Any]:
+        task = self.repo.get_task(task_id)
+        if task is None:
+            return []
+
+        if not hasattr(self.repo, "get_task_context") or not hasattr(self.repo, "list_task_contexts_by_session_key"):
+            return [task]
+
+        context = self.repo.get_task_context(task_id)
+        if context is None or not context.session_key or context.session_key.startswith("task:"):
+            return [task]
+
+        tasks = []
+        for session_context in self.repo.list_task_contexts_by_session_key(context.session_key):
+            session_task = self.repo.get_task(session_context.task_id)
+            if session_task is not None:
+                tasks.append(session_task)
+        return tasks or [task]
--- a/src/biliup_next/modules/collection/service.py
+++ b/src/biliup_next/modules/collection/service.py
@ -31,4 +31,5 @@ class CollectionService:
            self.repo.update_task_status(task_id, "collection_synced", finished_at)
            cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
            return {**result, "cleanup": cleanup_result}
+        self.repo.update_task_status(task_id, "commented", finished_at)
        return result
--- a/src/biliup_next/modules/comment/providers/bilibili_top_comment.py
+++ b/src/biliup_next/modules/comment/providers/bilibili_top_comment.py
@ -11,9 +11,34 @@ from biliup_next.core.models import Task
 from biliup_next.core.providers import ProviderManifest
 from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
 from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
+from biliup_next.infra.video_links import bilibili_video_url, link_context_for_task
 from biliup_next.infra.workspace_paths import resolve_task_work_dir


+DEFAULT_COMMENT_TEMPLATE = {
+    "split_header": (
+        "当前视频：歌曲纯享版：只保留本场直播中的歌曲片段，歌单见下方。\n"
+        "直播完整版：{current_full_video_link} （完整录播，含聊天/互动/完整流程）\n"
+        "上次纯享：{previous_pure_video_link} （上一场歌曲纯享版）"
+    ),
+    "full_header": (
+        "当前视频：直播完整版：保留本场完整录播内容，歌曲时间轴见下方。\n"
+        "歌曲纯享版：{current_pure_video_link} （只听歌曲看这里）\n"
+        "上次完整版：{previous_full_video_link} （上一场完整录播）"
+    ),
+    "split_part_header": "P{part_index}:",
+    "full_part_header": "P{part_index}:",
+    "split_song_line": "{song_index}. {title}{artist_suffix}",
+    "split_text_song_line": "{song_index}. {song_text}",
+    "full_timeline_line": "{song_index}. {line_text}",
+}
+
+
+class _SafeFormatDict(dict):
+    def __missing__(self, key: str) -> str:
+        return ""
+
+
 class BilibiliTopCommentProvider:
    def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
        self.bilibili_api = bilibili_api or BilibiliApiAdapter()
@ -41,7 +66,8 @@ class BilibiliTopCommentProvider:
            )

        timeline_content = songs_path.read_text(encoding="utf-8").strip()
-        split_content, split_reason = self._build_split_comment(task, settings)
+        comment_template = self._load_comment_template(settings)
+        split_content, split_reason = self._build_split_comment(task, settings, comment_template)
        if not timeline_content and not split_content:
            self._touch_comment_flags(session_dir, split_done=True, full_done=True)
            return {"status": "skipped", "reason": "comment_content_empty"}
@ -78,7 +104,7 @@ class BilibiliTopCommentProvider:

        if settings.get("post_full_video_timeline_comment", True) and not full_done:
            full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
-            full_content, full_reason = self._build_full_comment_content(task, settings)
+            full_content, full_reason = self._build_full_comment_content(task, settings, comment_template)
            if full_reason is not None:
                full_result = {"status": "skipped", "reason": full_reason}
            elif full_bvid and full_content:
@ -135,44 +161,116 @@ class BilibiliTopCommentProvider:
        return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}

    @staticmethod
-    def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
+    def _build_split_comment_content(
+        songs_json_path: Path,
+        songs_txt_path: Path,
+        *,
+        start_index: int = 1,
+        comment_template: dict[str, str] | None = None,
+    ) -> tuple[str, int]:
+        comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
+        next_index = start_index
        if songs_json_path.exists():
            try:
                data = json.loads(songs_json_path.read_text(encoding="utf-8"))
                lines = []
-                for index, song in enumerate(data.get("songs", []), 1):
+                for song in data.get("songs", []):
                    title = str(song.get("title", "")).strip()
                    artist = str(song.get("artist", "")).strip()
                    if not title:
                        continue
                    suffix = f" — {artist}" if artist else ""
-                    lines.append(f"{index}. {title}{suffix}")
+                    lines.append(
+                        BilibiliTopCommentProvider._format_template(
+                            comment_template.get("split_song_line", DEFAULT_COMMENT_TEMPLATE["split_song_line"]),
+                            {
+                                "song_index": str(next_index),
+                                "title": title,
+                                "artist": artist,
+                                "artist_suffix": suffix,
+                            },
+                        )
+                    )
+                    next_index += 1
                if lines:
-                    return "\n".join(lines)
+                    return "\n".join(lines), next_index
            except json.JSONDecodeError:
                pass
        if songs_txt_path.exists():
            lines = []
-            for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
+            for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
                text = raw.strip()
                if not text:
                    continue
                parts = text.split(" ", 1)
                song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
-                lines.append(f"{index}. {song_text}")
-            return "\n".join(lines)
-        return ""
+                lines.append(
+                    BilibiliTopCommentProvider._format_template(
+                        comment_template.get("split_text_song_line", DEFAULT_COMMENT_TEMPLATE["split_text_song_line"]),
+                        {
+                            "song_index": str(next_index),
+                            "song_text": song_text,
+                            "line_text": text,
+                        },
+                    )
+                )
+                next_index += 1
+            return "\n".join(lines), next_index
+        return "", next_index

-    def _build_split_comment(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
+    @staticmethod
+    def _build_full_timeline_content(
+        songs_txt_path: Path,
+        *,
+        start_index: int = 1,
+        comment_template: dict[str, str] | None = None,
+    ) -> tuple[str, int]:
+        if not songs_txt_path.exists():
+            return "", start_index
+        comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
+        next_index = start_index
+        lines = []
+        for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
+            text = raw.strip()
+            if not text:
+                continue
+            lines.append(
+                BilibiliTopCommentProvider._format_template(
+                    comment_template.get("full_timeline_line", DEFAULT_COMMENT_TEMPLATE["full_timeline_line"]),
+                    {
+                        "song_index": str(next_index),
+                        "line_text": text,
+                    },
+                )
+            )
+            next_index += 1
+        return "\n".join(lines), next_index
+
+    def _build_split_comment(
+        self,
+        task: Task,
+        settings: dict[str, Any],
+        comment_template: dict[str, str],
+    ) -> tuple[str, str | None]:
        repo = settings.get("__repo")
        if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
            session_dir = resolve_task_work_dir(task)
-            return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
+            content, _ = self._build_split_comment_content(
+                session_dir / "songs.json",
+                session_dir / "songs.txt",
+                comment_template=comment_template,
+            )
+            return self._with_split_footer(content, task, settings, comment_template), None

        context = repo.get_task_context(task.id)
        if context is None or not context.session_key or context.session_key.startswith("task:"):
            session_dir = resolve_task_work_dir(task)
-            return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
+            content, _ = self._build_split_comment_content(
+                session_dir / "songs.json",
+                session_dir / "songs.txt",
+                comment_template=comment_template,
+            )
+            return self._with_split_footer(content, task, settings, comment_template), None

        ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
        if not ordered_contexts:
@ -182,31 +280,42 @@ class BilibiliTopCommentProvider:
            return "", "session_split_comment_owned_by_anchor"

        blocks: list[str] = []
+        next_song_index = 1
        for index, session_context in enumerate(ordered_contexts, start=1):
            session_task = repo.get_task(session_context.task_id)
            if session_task is None:
                continue
            task_dir = resolve_task_work_dir(session_task)
-            content = self._build_split_comment_content(task_dir / "songs.json", task_dir / "songs.txt")
+            content, next_song_index = self._build_split_comment_content(
+                task_dir / "songs.json",
+                task_dir / "songs.txt",
+                start_index=next_song_index,
+                comment_template=comment_template,
+            )
            if not content:
                continue
-            blocks.append(f"P{index}:\n{content}")
+            blocks.append(f"{self._part_header(comment_template, 'split_part_header', index)}\n{content}")
        if not blocks:
            return "", "split_comment_empty"
-        return "\n\n".join(blocks), None
+        return self._with_split_footer("\n\n".join(blocks), task, settings, comment_template), None

-    def _build_full_comment_content(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
+    def _build_full_comment_content(
+        self,
+        task: Task,
+        settings: dict[str, Any],
+        comment_template: dict[str, str],
+    ) -> tuple[str, str | None]:
        repo = settings.get("__repo")
        if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
            session_dir = resolve_task_work_dir(task)
-            content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
-            return content, None if content else "timeline_comment_empty"
+            content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
+            return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"

        context = repo.get_task_context(task.id)
        if context is None or not context.session_key or context.session_key.startswith("task:"):
            session_dir = resolve_task_work_dir(task)
-            content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
-            return content, None if content else "timeline_comment_empty"
+            content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
+            return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"

        ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
        if not ordered_contexts:
@ -216,21 +325,109 @@ class BilibiliTopCommentProvider:
            return "", "session_full_comment_owned_by_anchor"

        blocks: list[str] = []
+        next_song_index = 1
        for index, session_context in enumerate(ordered_contexts, start=1):
            session_task = repo.get_task(session_context.task_id)
            if session_task is None:
                continue
            task_dir = resolve_task_work_dir(session_task)
            songs_path = task_dir / "songs.txt"
-            if not songs_path.exists():
-                continue
-            content = songs_path.read_text(encoding="utf-8").strip()
+            content, next_song_index = self._build_full_timeline_content(
+                songs_path,
+                start_index=next_song_index,
+                comment_template=comment_template,
+            )
            if not content:
                continue
-            blocks.append(f"P{index}:\n{content}")
+            blocks.append(f"{self._part_header(comment_template, 'full_part_header', index)}\n{content}")
        if not blocks:
            return "", "timeline_comment_empty"
-        return "\n\n".join(blocks), None
+        return self._with_full_footer("\n\n".join(blocks), task, settings, comment_template), None
+
+    def _with_split_footer(
+        self,
+        content: str,
+        task: Task,
+        settings: dict[str, Any],
+        comment_template: dict[str, str],
+    ) -> str:
+        links = link_context_for_task(task, settings.get("__repo"), settings)
+        current_full_link = links.get("current_full_video_link", "")
+        if not current_full_link and settings.get("biliup_path") and settings.get("cookie_file"):
+            full_bvid = resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings)
+            current_full_link = bilibili_video_url(full_bvid)
+        header_vars = dict(links)
+        header_vars["current_full_video_link"] = current_full_link
+        header = self._format_header_template(
+            comment_template.get("split_header", DEFAULT_COMMENT_TEMPLATE["split_header"]),
+            header_vars,
+        )
+        return self._prepend_header(content, header)
+
+    def _with_full_footer(
+        self,
+        content: str,
+        task: Task,
+        settings: dict[str, Any],
+        comment_template: dict[str, str],
+    ) -> str:
+        links = link_context_for_task(task, settings.get("__repo"), settings)
+        header = self._format_header_template(
+            comment_template.get("full_header", DEFAULT_COMMENT_TEMPLATE["full_header"]),
+            links,
+        )
+        return self._prepend_header(content, header)
+
+    @staticmethod
+    def _prepend_header(content: str, header: str) -> str:
+        content = content.strip()
+        lines = [line.rstrip() for line in header.splitlines() if line.strip()]
+        if not content:
+            return "\n".join(lines)
+        if not lines:
+            return content
+        return "\n".join(lines) + f"\n\n{content}"
+
+    @staticmethod
+    def _part_header(comment_template: dict[str, str], key: str, part_index: int) -> str:
+        return BilibiliTopCommentProvider._format_template(
+            comment_template.get(key, DEFAULT_COMMENT_TEMPLATE[key]),
+            {"part_index": str(part_index)},
+        )
+
+    @staticmethod
+    def _format_template(template: str, values: dict[str, str]) -> str:
+        return template.format_map(_SafeFormatDict(values)).strip()
+
+    @staticmethod
+    def _format_header_template(template: str, values: dict[str, str]) -> str:
+        lines = []
+        for raw_line in template.splitlines():
+            if any(f"{{{key}}}" in raw_line and not value for key, value in values.items()):
+                continue
+            lines.append(BilibiliTopCommentProvider._format_template(raw_line, values))
+        return "\n".join(line for line in lines if line.strip()).strip()
+
+    @staticmethod
+    def _load_comment_template(settings: dict[str, Any]) -> dict[str, str]:
+        merged = dict(DEFAULT_COMMENT_TEMPLATE)
+        path_value = settings.get("upload_config_file")
+        if not path_value:
+            return merged
+        path = Path(str(path_value))
+        if not path.exists():
+            return merged
+        try:
+            config = json.loads(path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError:
+            return merged
+        template = config.get("comment_template", {})
+        if not isinstance(template, dict):
+            return merged
+        for key, value in template.items():
+            if key in merged and isinstance(value, str):
+                merged[key] = value
+        return merged

    def _ordered_session_contexts(self, repo, session_key: str) -> list[object]:  # type: ignore[no-untyped-def]
        contexts = list(repo.list_task_contexts_by_session_key(session_key))
--- a/src/biliup_next/modules/ingest/service.py
+++ b/src/biliup_next/modules/ingest/service.py
@ -217,7 +217,7 @@ class IngestService:
                "room_id": sidecar_meta["payload"].get("room_id"),
                "session_key": sidecar_meta["payload"].get("session_key"),
                "full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
-                "reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or source_path.stat().st_mtime,
+                "reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or target_source.stat().st_mtime,
            }
            task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
            accepted.append(
--- a/src/biliup_next/modules/publish/providers/biliup_cli.py
+++ b/src/biliup_next/modules/publish/providers/biliup_cli.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 import random
 import re
+import shutil
 import time
 from pathlib import Path
 from typing import Any
@ -11,9 +12,13 @@ from biliup_next.core.errors import ModuleError
 from biliup_next.core.models import PublishRecord, Task, utc_now_iso
 from biliup_next.core.providers import ProviderManifest
 from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
+from biliup_next.infra.video_links import link_context_for_task
 from biliup_next.infra.workspace_paths import resolve_task_work_dir


+DESC_MAX_CHARS = 1900
+
+
 class BiliupCliPublishProvider:
    def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
        self.adapter = adapter or BiliupCliAdapter()
@ -36,7 +41,7 @@ class BiliupCliPublishProvider:
        publish_progress = work_dir / "publish_progress.json"
        config = self._load_upload_config(Path(str(settings["upload_config_file"])))

-        video_files = [artifact.path for artifact in clip_videos]
+        video_files = self._prepare_publish_video_files(work_dir, [artifact.path for artifact in clip_videos])
        if not video_files:
            raise ModuleError(
                code="PUBLISH_NO_CLIPS",
@ -64,10 +69,13 @@ class BiliupCliPublishProvider:
            "daily_quote": quote.get("text", ""),
            "quote_author": quote.get("author", ""),
        }
+        template_vars.update(link_context_for_task(task, settings.get("__repo"), settings))
        template = config.get("template", {})
        title = template.get("title", "{streamer}_{date}").format(**template_vars)
-        description = template.get("description", "{songs_list}").format(**template_vars)
-        dynamic = template.get("dynamic", "").format(**template_vars)
+        description = self._fit_bilibili_desc(
+            self._drop_empty_link_lines(template.get("description", "{songs_list}").format(**template_vars))
+        )
+        dynamic = self._drop_empty_link_lines(template.get("dynamic", "").format(**template_vars))
        tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
        streamer_cfg = config.get("streamers", {})
        if streamer in streamer_cfg:
@ -90,8 +98,12 @@ class BiliupCliPublishProvider:
        first_batch = video_files[:5]
        remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]

-        existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
        progress = self._load_publish_progress(publish_progress)
+        existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
+        progress_bvid = str(progress.get("bvid", "")).strip()
+        if not existing_bvid.startswith("BV") and progress_bvid.startswith("BV"):
+            existing_bvid = progress_bvid
+            bvid_file.write_text(existing_bvid, encoding="utf-8")
        if upload_done.exists() and existing_bvid.startswith("BV"):
            return PublishRecord(
                id=None,
@ -201,6 +213,7 @@ class BiliupCliPublishProvider:
            upload_cmd.extend(["--cover", cover])

        for attempt in range(1, retry_count + 1):
+            self._append_description_summary(publish_log, description)
            result = self.adapter.run(
                upload_cmd,
                label=f"首批上传[{attempt}/{retry_count}]",
@ -253,6 +266,29 @@ class BiliupCliPublishProvider:
    def _wait_seconds(retry_index: int) -> int:
        return min(300 * (2**retry_index), 3600)

+    @staticmethod
+    def _prepare_publish_video_files(work_dir: Path, video_files: list[str]) -> list[str]:
+        publish_dir = work_dir / "publish_video"
+        if publish_dir.exists():
+            shutil.rmtree(publish_dir)
+        publish_dir.mkdir(parents=True, exist_ok=True)
+
+        prepared: list[str] = []
+        for index, video_file in enumerate(video_files, start=1):
+            source = Path(video_file)
+            name = BiliupCliPublishProvider._strip_clip_number_prefix(source.name)
+            target = publish_dir / f"{index:02d}_{name}"
+            try:
+                target.hardlink_to(source)
+            except OSError:
+                shutil.copy2(source, target)
+            prepared.append(str(target))
+        return prepared
+
+    @staticmethod
+    def _strip_clip_number_prefix(filename: str) -> str:
+        return re.sub(r"^\d+[_-]+", "", filename, count=1)
+
    @staticmethod
    def _load_upload_config(path: Path) -> dict[str, Any]:
        if not path.exists():
@ -262,6 +298,9 @@ class BiliupCliPublishProvider:
    @staticmethod
    def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
        config = config or {}
+        builtin = BiliupCliPublishProvider._parse_builtin_filename(filename)
+        if builtin:
+            return builtin
        patterns = config.get("filename_patterns", {}).get("patterns", [])
        for pattern_config in patterns:
            regex = pattern_config.get("regex")
@ -278,6 +317,48 @@ class BiliupCliPublishProvider:
                return data
        return {"streamer": filename, "date": ""}

+    @staticmethod
+    def _parse_builtin_filename(filename: str) -> dict[str, str]:
+        patterns = (
+            r"^(?P<streamer>.+?)唱歌录播\s+(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
+            r"^(?P<streamer>.+?)唱歌录播[：:]\s*(?P<year>\d{4})年(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
+        )
+        for pattern in patterns:
+            match = re.match(pattern, filename)
+            if not match:
+                continue
+            data = match.groupdict()
+            data["date"] = f"{data['month']}月{data['day']}日 {data['hour']}时{data['minute']}分"
+            return data
+        return {}
+
+    @staticmethod
+    def _drop_empty_link_lines(text: str) -> str:
+        lines = []
+        for line in text.splitlines():
+            stripped = line.strip()
+            if stripped in {"直播完整版：", "歌曲纯享版：", "上次直播：", "上次纯享：", "上次完整版："}:
+                continue
+            lines.append(line.rstrip())
+        return "\n".join(lines).strip()
+
+    @staticmethod
+    def _fit_bilibili_desc(text: str, max_chars: int = DESC_MAX_CHARS) -> str:
+        text = text.strip()
+        if len(text) <= max_chars:
+            return text
+        suffix = "\n\n完整歌单见置顶评论。"
+        return text[: max(0, max_chars - len(suffix))].rstrip() + suffix
+
+    @staticmethod
+    def _append_description_summary(log_path: Path, description: str) -> None:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        line = f"description_chars: {len(description)}\n"
+        if log_path.exists():
+            log_path.write_text(log_path.read_text(encoding="utf-8") + line, encoding="utf-8")
+        else:
+            log_path.write_text(line, encoding="utf-8")
+
    @staticmethod
    def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
        quotes = config.get("quotes", [])
--- a/src/biliup_next/modules/publish/service.py
+++ b/src/biliup_next/modules/publish/service.py
@ -26,7 +26,9 @@ class PublishService:
        session_contexts = self._session_contexts(task_id)
        if len(session_contexts) <= 1:
            clip_videos = self._clip_videos_for_task(task_id)
-            record = provider.publish(task, clip_videos, settings)
+            provider_settings = dict(settings)
+            provider_settings["__repo"] = self.repo
+            record = provider.publish(task, clip_videos, provider_settings)
            self._persist_publish_success(task, record)
            return record

@ -50,6 +52,7 @@ class PublishService:
        if anchor_task is None:
            raise RuntimeError(f"anchor task not found: {anchor_context.task_id}")
        session_settings = dict(settings)
+        session_settings["__repo"] = self.repo
        session_settings.update(self._session_publish_metadata(anchor_task, session_contexts, settings))
        record = provider.publish(anchor_task, clip_videos, session_settings)
        for context in session_contexts:
--- a/src/biliup_next/modules/song_detect/providers/codex.py
+++ b/src/biliup_next/modules/song_detect/providers/codex.py
@ -37,13 +37,17 @@ class CodexSongDetector:
            work_dir=work_dir,
            prompt=TASK_PROMPT,
        )
+        self._write_codex_log(work_dir, result)

        if result.returncode != 0:
+            stderr = result.stderr[-2000:]
+            stdout = result.stdout[-2000:]
+            retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
            raise ModuleError(
                code="SONG_DETECT_FAILED",
                message="codex exec 执行失败",
-                retryable=True,
-                details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
+                retryable=retryable,
+                details={"stdout": stdout, "stderr": stderr},
            )

        ensure_song_outputs(
@ -72,3 +76,37 @@ class CodexSongDetector:
                created_at=utc_now_iso(),
            ),
        )
+
+    @staticmethod
+    def _write_codex_log(work_dir: Path, result) -> None:  # noqa: ANN001
+        log_path = work_dir / "codex.log"
+        log_path.write_text(
+            "\n".join(
+                [
+                    "codex song_detect",
+                    f"returncode: {result.returncode}",
+                    "",
+                    "stdout:",
+                    result.stdout,
+                    "",
+                    "stderr:",
+                    result.stderr,
+                    "",
+                ]
+            ),
+            encoding="utf-8",
+        )
+
+    @staticmethod
+    def _is_auth_error(text: str) -> bool:
+        lowered = text.lower()
+        return any(
+            needle in lowered
+            for needle in (
+                "401",
+                "invalid access token",
+                "token expired",
+                "unauthorized",
+                "authentication",
+            )
+        )
--- a/src/biliup_next/modules/song_detect/providers/qwen_cli.py
+++ b/src/biliup_next/modules/song_detect/providers/qwen_cli.py
@ -43,11 +43,14 @@ class QwenCliSongDetector:
        )

        if result.returncode != 0:
+            stderr = result.stderr[-2000:]
+            stdout = result.stdout[-2000:]
+            retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
            raise ModuleError(
                code="SONG_DETECT_FAILED",
                message="qwen -p 执行失败",
-                retryable=True,
-                details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
+                retryable=retryable,
+                details={"stdout": stdout, "stderr": stderr},
            )

        ensure_song_outputs(
@ -76,3 +79,17 @@ class QwenCliSongDetector:
                created_at=utc_now_iso(),
            ),
        )
+
+    @staticmethod
+    def _is_auth_error(text: str) -> bool:
+        lowered = text.lower()
+        return any(
+            needle in lowered
+            for needle in (
+                "401",
+                "invalid access token",
+                "token expired",
+                "unauthorized",
+                "authentication",
+            )
+        )
--- a/src/biliup_next/modules/transcribe/providers/groq.py
+++ b/src/biliup_next/modules/transcribe/providers/groq.py
@ -2,9 +2,12 @@ from __future__ import annotations

 import json
 import math
+import os
 import shutil
 import subprocess
 import time
+from contextlib import suppress
+from contextlib import contextmanager
 from pathlib import Path
 from typing import Any

@ -16,6 +19,7 @@ from biliup_next.core.providers import ProviderManifest
 LANGUAGE = "zh"
 BITRATE_KBPS = 64
 MODEL_NAME = "whisper-large-v3-turbo"
+SEGMENT_SIZE_SAFETY_RATIO = 0.75


 class GroqTranscribeProvider:
@ -30,11 +34,11 @@ class GroqTranscribeProvider:
    )

    def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
-        groq_api_key = str(settings.get("groq_api_key", "")).strip()
-        if not groq_api_key:
+        groq_api_keys = self._groq_api_keys(settings)
+        if not groq_api_keys:
            raise ModuleError(
                code="GROQ_API_KEY_MISSING",
-                message="未配置 transcribe.groq_api_key",
+                message="未配置 transcribe.groq_api_key 或 transcribe.groq_api_keys",
                retryable=False,
            )
        try:
@ -55,18 +59,23 @@ class GroqTranscribeProvider:
            )

        ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
-        max_file_size_mb = int(settings.get("max_file_size_mb", 23))
+        max_file_size_mb = int(settings.get("max_file_size_mb", 12))
        work_dir = source_path.parent
        temp_audio_dir = work_dir / "temp_audio"
+        checkpoint_dir = work_dir / "transcribe_segments"
        temp_audio_dir.mkdir(parents=True, exist_ok=True)
-        segment_duration = max(1, math.floor((max_file_size_mb * 8 * 1024) / BITRATE_KBPS))
+        checkpoint_dir.mkdir(parents=True, exist_ok=True)
+        max_segment_bytes = max(1, max_file_size_mb) * 1024 * 1024
+        segment_duration = self._initial_segment_duration(max_file_size_mb)
        output_pattern = temp_audio_dir / "part_%03d.mp3"

-        self._extract_audio_segments(
+        segment_duration = self._extract_audio_segments_with_size_guard(
            ffmpeg_bin=ffmpeg_bin,
            source_path=source_path,
            output_pattern=output_pattern,
-            segment_duration=segment_duration,
+            temp_audio_dir=temp_audio_dir,
+            initial_segment_duration=segment_duration,
+            max_segment_bytes=max_segment_bytes,
        )

        segments = sorted(temp_audio_dir.glob("part_*.mp3"))
@ -77,22 +86,47 @@ class GroqTranscribeProvider:
                retryable=False,
            )

-        client = Groq(api_key=groq_api_key)
+        request_timeout_seconds = max(1, int(settings.get("request_timeout_seconds", 180)))
+        request_max_retries = max(0, int(settings.get("request_max_retries", 1)))
+        request_retry_backoff_seconds = max(0, int(settings.get("request_retry_backoff_seconds", 30)))
+        lock_enabled = bool(settings.get("serialize_groq_requests", True))
+        lock_path = self._groq_lock_path(settings, work_dir)
+        clients = [Groq(api_key=key, timeout=request_timeout_seconds, max_retries=0) for key in groq_api_keys]
        srt_path = work_dir / f"{task.title}.srt"
+        temp_srt_path = work_dir / f".{task.title}.srt.tmp"
        global_idx = 1

        try:
-            with srt_path.open("w", encoding="utf-8") as srt_file:
+            with temp_srt_path.open("w", encoding="utf-8") as srt_file:
                for index, segment in enumerate(segments):
                    offset_seconds = index * segment_duration
-                    segment_data = self._transcribe_with_retry(client, segment)
+                    segment_checkpoint = checkpoint_dir / f"{segment.stem}.json"
+                    segment_data = self._load_segment_checkpoint(segment_checkpoint, segment_duration=segment_duration)
+                    if segment_data is None:
+                        with self._optional_groq_lock(lock_path, enabled=lock_enabled):
+                            segment_data = self._transcribe_with_retry(
+                                clients,
+                                segment,
+                                request_timeout_seconds=request_timeout_seconds,
+                                request_max_retries=request_max_retries,
+                                request_retry_backoff_seconds=request_retry_backoff_seconds,
+                            )
+                        self._write_segment_checkpoint(
+                            segment_checkpoint,
+                            segment_data,
+                            segment_duration=segment_duration,
+                            audio_file=segment,
+                        )
                    for chunk in segment_data:
                        start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
                        end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
                        text = str(chunk["text"]).strip()
                        srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
                        global_idx += 1
+            temp_srt_path.replace(srt_path)
        finally:
+            with suppress(FileNotFoundError):
+                temp_srt_path.unlink()
            shutil.rmtree(temp_audio_dir, ignore_errors=True)

        return Artifact(
@ -104,12 +138,126 @@ class GroqTranscribeProvider:
                {
                    "provider": "groq",
                    "model": MODEL_NAME,
+                    "api_key_count": len(groq_api_keys),
                    "segment_duration_seconds": segment_duration,
+                    "checkpoint_dir": str(checkpoint_dir.resolve()),
                }
            ),
            created_at=utc_now_iso(),
        )

+    @staticmethod
+    def _groq_api_keys(settings: dict[str, Any]) -> list[str]:
+        keys: list[str] = []
+        raw_keys = settings.get("groq_api_keys")
+        if isinstance(raw_keys, list):
+            keys.extend(str(key).strip() for key in raw_keys if str(key).strip())
+        legacy_key = str(settings.get("groq_api_key", "")).strip()
+        if legacy_key:
+            keys.append(legacy_key)
+        deduped: list[str] = []
+        seen: set[str] = set()
+        for key in keys:
+            if key in seen:
+                continue
+            seen.add(key)
+            deduped.append(key)
+        return deduped
+
+    @staticmethod
+    def _initial_segment_duration(max_file_size_mb: int) -> int:
+        safe_target_mb = max_file_size_mb * SEGMENT_SIZE_SAFETY_RATIO
+        return max(1, math.floor((safe_target_mb * 8 * 1024) / BITRATE_KBPS))
+
+    def _extract_audio_segments_with_size_guard(
+        self,
+        *,
+        ffmpeg_bin: str,
+        source_path: Path,
+        output_pattern: Path,
+        temp_audio_dir: Path,
+        initial_segment_duration: int,
+        max_segment_bytes: int,
+    ) -> int:
+        segment_duration = initial_segment_duration
+        for _attempt in range(4):
+            self._clear_audio_segments(temp_audio_dir)
+            self._extract_audio_segments(
+                ffmpeg_bin=ffmpeg_bin,
+                source_path=source_path,
+                output_pattern=output_pattern,
+                segment_duration=segment_duration,
+            )
+            largest_segment = self._largest_audio_segment(temp_audio_dir)
+            if largest_segment is None or largest_segment.stat().st_size <= max_segment_bytes:
+                return segment_duration
+            next_duration = max(1, math.floor(segment_duration * 0.75))
+            if next_duration == segment_duration:
+                break
+            segment_duration = next_duration
+        largest_segment = self._largest_audio_segment(temp_audio_dir)
+        largest_size = largest_segment.stat().st_size if largest_segment else 0
+        raise ModuleError(
+            code="TRANSCRIBE_AUDIO_SEGMENT_TOO_LARGE",
+            message="音频分片超过 Groq 上传安全阈值",
+            retryable=False,
+            details={
+                "largest_segment": str(largest_segment) if largest_segment else None,
+                "largest_segment_bytes": largest_size,
+                "max_segment_bytes": max_segment_bytes,
+            },
+        )
+
+    @staticmethod
+    def _clear_audio_segments(temp_audio_dir: Path) -> None:
+        for path in temp_audio_dir.glob("part_*.mp3"):
+            path.unlink(missing_ok=True)
+
+    @staticmethod
+    def _largest_audio_segment(temp_audio_dir: Path) -> Path | None:
+        segments = list(temp_audio_dir.glob("part_*.mp3"))
+        if not segments:
+            return None
+        return max(segments, key=lambda path: path.stat().st_size)
+
+    @staticmethod
+    def _load_segment_checkpoint(checkpoint_path: Path, *, segment_duration: int) -> list[dict[str, Any]] | None:
+        if not checkpoint_path.exists():
+            return None
+        try:
+            data = json.loads(checkpoint_path.read_text(encoding="utf-8"))
+            if data.get("model") != MODEL_NAME or data.get("language") != LANGUAGE:
+                return None
+            if data.get("segment_duration_seconds") != segment_duration:
+                return None
+            segments = data.get("segments")
+            if not isinstance(segments, list):
+                return None
+            return [dict(segment) for segment in segments]
+        except Exception:
+            return None
+
+    @staticmethod
+    def _write_segment_checkpoint(
+        checkpoint_path: Path,
+        segments: list[dict[str, Any]],
+        *,
+        segment_duration: int,
+        audio_file: Path,
+    ) -> None:
+        checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
+        temp_path = checkpoint_path.with_suffix(f"{checkpoint_path.suffix}.tmp")
+        payload = {
+            "provider": "groq",
+            "model": MODEL_NAME,
+            "language": LANGUAGE,
+            "audio_file": audio_file.name,
+            "segment_duration_seconds": segment_duration,
+            "segments": segments,
+        }
+        temp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+        temp_path.replace(checkpoint_path)
+
    def _extract_audio_segments(
        self,
        *,
@ -156,31 +304,141 @@ class GroqTranscribeProvider:
                details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
            ) from exc

-    def _transcribe_with_retry(self, client: Any, audio_file: Path) -> list[dict[str, Any]]:
-        retry_count = 0
-        while True:
+    @staticmethod
+    def _groq_lock_path(settings: dict[str, Any], fallback_work_dir: Path) -> Path:
+        session_dir = settings.get("session_dir")
+        if isinstance(session_dir, str) and session_dir:
+            return Path(session_dir).resolve().parent / "groq_transcribe.lock"
+        return fallback_work_dir / "groq_transcribe.lock"
+
+    @staticmethod
+    @contextmanager
+    def _optional_groq_lock(lock_path: Path, *, enabled: bool):
+        if not enabled:
+            yield
+            return
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        with lock_path.open("w", encoding="utf-8") as lock_file:
            try:
-                with audio_file.open("rb") as file_handle:
-                    response = client.audio.transcriptions.create(
-                        file=(audio_file.name, file_handle.read()),
-                        model=MODEL_NAME,
-                        response_format="verbose_json",
-                        language=LANGUAGE,
-                        temperature=0.0,
-                    )
-                return [dict(segment) for segment in response.segments]
-            except Exception as exc:  # noqa: BLE001
-                retry_count += 1
-                err_str = str(exc)
-                if "429" in err_str or "rate_limit" in err_str.lower():
-                    time.sleep(25)
-                    continue
-                raise ModuleError(
-                    code="GROQ_TRANSCRIBE_FAILED",
-                    message=f"Groq 转录失败: {audio_file.name}",
-                    retryable=True,
-                    details={"error": err_str, "retry_count": retry_count},
-                ) from exc
+                import fcntl
+
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
+                lock_file.write(f"{os.getpid()}\n")
+                lock_file.flush()
+                yield
+            finally:
+                with suppress(Exception):
+                    fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+    def _transcribe_with_retry(
+        self,
+        clients: list[Any],
+        audio_file: Path,
+        *,
+        request_timeout_seconds: int,
+        request_max_retries: int,
+        request_retry_backoff_seconds: int,
+    ) -> list[dict[str, Any]]:
+        attempt = 0
+        key_attempts = 0
+        last_error = ""
+        while True:
+            attempt += 1
+            for key_index, client in enumerate(clients):
+                key_attempts += 1
+                try:
+                    with audio_file.open("rb") as file_handle:
+                        response = client.audio.transcriptions.create(
+                            file=(audio_file.name, file_handle.read()),
+                            model=MODEL_NAME,
+                            response_format="verbose_json",
+                            language=LANGUAGE,
+                            temperature=0.0,
+                            timeout=request_timeout_seconds,
+                        )
+                    return [dict(segment) for segment in response.segments]
+                except Exception as exc:  # noqa: BLE001
+                    err_str = str(exc)
+                    last_error = err_str
+                    if self._is_rate_limit_error(err_str) and key_index < len(clients) - 1:
+                        continue
+                    if not self._should_retry_request(err_str):
+                        raise self._transcribe_failed(
+                            audio_file,
+                            err_str,
+                            request_attempts=attempt,
+                            key_attempts=key_attempts,
+                            api_key_count=len(clients),
+                            request_timeout_seconds=request_timeout_seconds,
+                        ) from exc
+                    break
+            if attempt <= request_max_retries:
+                if request_retry_backoff_seconds > 0:
+                    time.sleep(request_retry_backoff_seconds)
+                continue
+            raise self._transcribe_failed(
+                audio_file,
+                last_error,
+                request_attempts=attempt,
+                key_attempts=key_attempts,
+                api_key_count=len(clients),
+                request_timeout_seconds=request_timeout_seconds,
+            )
+
+    @staticmethod
+    def _transcribe_failed(
+        audio_file: Path,
+        error_text: str,
+        *,
+        request_attempts: int,
+        key_attempts: int,
+        api_key_count: int,
+        request_timeout_seconds: int,
+    ) -> ModuleError:
+        return ModuleError(
+            code="GROQ_TRANSCRIBE_FAILED",
+            message=f"Groq 转录失败: {audio_file.name}",
+            retryable=True,
+            details={
+                "error": error_text,
+                "request_attempts": request_attempts,
+                "key_attempts": key_attempts,
+                "api_key_count": api_key_count,
+                "request_timeout_seconds": request_timeout_seconds,
+            },
+        )
+
+    @staticmethod
+    def _is_rate_limit_error(error_text: str) -> bool:
+        lowered = error_text.lower()
+        return any(
+            needle in lowered
+            for needle in (
+                "429",
+                "rate_limit",
+                "rate limit",
+                "too many requests",
+            )
+        )
+
+    @staticmethod
+    def _should_retry_request(error_text: str) -> bool:
+        lowered = error_text.lower()
+        return any(
+            needle in lowered
+            for needle in (
+                "429",
+                "rate_limit",
+                "timed out",
+                "timeout",
+                "connection error",
+                "connect error",
+                "server disconnected",
+                "502",
+                "503",
+                "504",
+            )
+        )

    @staticmethod
    def _format_srt_time(seconds: float) -> str:
--- a/tests/test_bilibili_top_comment_provider.py
+++ b/tests/test_bilibili_top_comment_provider.py
@ -88,7 +88,7 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
            self.assertEqual(result["split"]["reason"], "comment_disabled")
            self.assertEqual(len(api.reply_calls), 1)
            self.assertIn("P1:\n1. Song A — Artist A", api.reply_calls[0]["content"])
-            self.assertIn("P2:\n1. Song B — Artist B", api.reply_calls[0]["content"])
+            self.assertIn("P2:\n2. Song B — Artist B", api.reply_calls[0]["content"])

    def test_split_comment_skips_on_non_anchor_task(self) -> None:
        api = _FakeBilibiliApi()
@ -212,6 +212,63 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
            self.assertEqual(result["split"]["reason"], "comment_disabled")
            self.assertTrue((work_dir / "comment_done.flag").exists())

+    def test_comment_format_can_be_configured_from_upload_config(self) -> None:
+        api = _FakeBilibiliApi()
+        provider = BilibiliTopCommentProvider(bilibili_api=api)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            work_dir = root / "task-1"
+            work_dir.mkdir(parents=True, exist_ok=True)
+            task = Task(
+                id="task-1",
+                source_type="local_file",
+                source_path=str(work_dir / "source.mp4"),
+                title="task-1",
+                status="published",
+                created_at=utc_now_iso(),
+                updated_at=utc_now_iso(),
+            )
+            (work_dir / "songs.txt").write_text("00:00:00 Song From Text — Artist T\n", encoding="utf-8")
+            (work_dir / "songs.json").write_text(
+                json.dumps({"songs": [{"title": "Song A", "artist": "Artist A"}]}),
+                encoding="utf-8",
+            )
+            (work_dir / "bvid.txt").write_text("BV1COMMENT123", encoding="utf-8")
+            (work_dir / "full_video_bvid.txt").write_text("BV1FULL12345", encoding="utf-8")
+            cookies_file = root / "cookies.json"
+            cookies_file.write_text("{}", encoding="utf-8")
+            upload_config = root / "upload_config.json"
+            upload_config.write_text(
+                json.dumps(
+                    {
+                        "comment_template": {
+                            "split_header": "这是纯享：{current_full_video_link}\n上一场：{previous_full_video_link}",
+                            "split_song_line": "#{song_index} {title} / {artist}",
+                        }
+                    }
+                ),
+                encoding="utf-8",
+            )
+
+            result = provider.comment(
+                task,
+                {
+                    "session_dir": str(root),
+                    "cookies_file": str(cookies_file),
+                    "upload_config_file": str(upload_config),
+                    "post_split_comment": True,
+                    "post_full_video_timeline_comment": False,
+                },
+            )
+
+            self.assertEqual(result["status"], "ok")
+            self.assertEqual(result["split"]["reason"], "comment_disabled")
+            self.assertEqual(len(api.reply_calls), 1)
+            content = str(api.reply_calls[0]["content"])
+            self.assertIn("这是纯享：https://www.bilibili.com/video/BV1FULL12345", content)
+            self.assertNotIn("上一场：", content)
+            self.assertIn("#1 Song A / Artist A", content)
+
    def test_full_comment_aggregates_session_parts_on_anchor_task(self) -> None:
        api = _FakeBilibiliApi()
        provider = BilibiliTopCommentProvider(bilibili_api=api)
@ -263,8 +320,8 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
            self.assertEqual(result["full"]["status"], "skipped")
            self.assertEqual(result["full"]["reason"], "comment_disabled")
            self.assertEqual(len(api.reply_calls), 1)
-            self.assertIn("P1:\n00:00:01 Song A\n00:02:00 Song B", api.reply_calls[0]["content"])
-            self.assertIn("P2:\n00:00:03 Song C", api.reply_calls[0]["content"])
+            self.assertIn("P1:\n1. 00:00:01 Song A\n2. 00:02:00 Song B", api.reply_calls[0]["content"])
+            self.assertIn("P2:\n3. 00:00:03 Song C", api.reply_calls[0]["content"])

    def test_full_comment_skips_on_non_anchor_task(self) -> None:
        api = _FakeBilibiliApi()
--- a/tests/test_biliup_cli_publish_provider.py
+++ b/tests/test_biliup_cli_publish_provider.py
@ -269,6 +269,117 @@ class BiliupCliPublishProviderTests(unittest.TestCase):
            self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
            self.assertTrue((work_dir / "upload_done.flag").exists())

+    def test_publish_recovers_bvid_from_progress_when_bvid_file_was_removed(self) -> None:
+        adapter = _FakeBiliupAdapter()
+        provider = BiliupCliPublishProvider(adapter=adapter)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            work_dir = root / "task-1"
+            work_dir.mkdir(parents=True, exist_ok=True)
+            task = Task(
+                id="task-1",
+                source_type="local_file",
+                source_path=str(work_dir / "source.mp4"),
+                title="task-1",
+                status="split_done",
+                created_at=utc_now_iso(),
+                updated_at=utc_now_iso(),
+            )
+            (work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
+            (work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
+            (work_dir / "publish_progress.json").write_text(
+                json.dumps({"bvid": "BV1RESUME1234", "completed_append_batches": [2]}),
+                encoding="utf-8",
+            )
+            upload_config = root / "upload_config.json"
+            upload_config.write_text("{}", encoding="utf-8")
+            clips = []
+            for index in range(1, 16):
+                clip_path = work_dir / f"clip-{index}.mp4"
+                clip_path.write_text("fake", encoding="utf-8")
+                clips.append(
+                    Artifact(
+                        id=None,
+                        task_id=task.id,
+                        artifact_type="clip_video",
+                        path=str(clip_path),
+                        metadata_json="{}",
+                        created_at=utc_now_iso(),
+                    )
+                )
+
+            with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
+                record = provider.publish(
+                    task,
+                    clips,
+                    {
+                        "session_dir": str(root),
+                        "upload_config_file": str(upload_config),
+                        "biliup_path": "runtime/biliup",
+                        "cookie_file": "runtime/cookies.json",
+                        "retry_count": 2,
+                        "command_timeout_seconds": 123,
+                    },
+                )
+
+            self.assertEqual(record.bvid, "BV1RESUME1234")
+            self.assertEqual((work_dir / "bvid.txt").read_text(encoding="utf-8"), "BV1RESUME1234")
+            self.assertEqual(len(adapter.run_calls), 1)
+            self.assertIn("append", adapter.run_calls[0]["cmd"])
+            self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
+
+    def test_publish_renumbers_clip_filenames_across_aggregated_sessions(self) -> None:
+        adapter = _FakeBiliupAdapter()
+        provider = BiliupCliPublishProvider(adapter=adapter)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            work_dir = root / "task-1"
+            second_dir = root / "task-2"
+            work_dir.mkdir(parents=True, exist_ok=True)
+            second_dir.mkdir(parents=True, exist_ok=True)
+            task = Task(
+                id="task-1",
+                source_type="local_file",
+                source_path=str(work_dir / "source.mp4"),
+                title="task-1",
+                status="split_done",
+                created_at=utc_now_iso(),
+                updated_at=utc_now_iso(),
+            )
+            (work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
+            (work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
+            upload_config = root / "upload_config.json"
+            upload_config.write_text("{}", encoding="utf-8")
+            clips = []
+            for index in range(1, 11):
+                clip_path = work_dir / f"{index:02d}_first-{index}.mp4"
+                clip_path.write_text("fake", encoding="utf-8")
+                clips.append(Artifact(None, task.id, "clip_video", str(clip_path), "{}", utc_now_iso()))
+            for index in range(1, 8):
+                clip_path = second_dir / f"{index:02d}_second-{index}.mp4"
+                clip_path.write_text("fake", encoding="utf-8")
+                clips.append(Artifact(None, "task-2", "clip_video", str(clip_path), "{}", utc_now_iso()))
+
+            with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
+                provider.publish(
+                    task,
+                    clips,
+                    {
+                        "session_dir": str(root),
+                        "upload_config_file": str(upload_config),
+                        "biliup_path": "runtime/biliup",
+                        "cookie_file": "runtime/cookies.json",
+                        "retry_count": 1,
+                        "command_timeout_seconds": 123,
+                    },
+                )
+
+            all_uploaded = [part for call in adapter.run_calls for part in call["cmd"] if str(part).endswith(".mp4")]
+            self.assertTrue(all_uploaded[0].endswith("01_first-1.mp4"))
+            self.assertTrue(all_uploaded[9].endswith("10_first-10.mp4"))
+            self.assertTrue(all_uploaded[10].endswith("11_second-1.mp4"))
+            self.assertTrue(all_uploaded[16].endswith("17_second-7.mp4"))
+
    def test_publish_creates_progress_from_existing_bvid_for_append_resume(self) -> None:
        adapter = _FakeBiliupAdapter()
        provider = BiliupCliPublishProvider(adapter=adapter)
--- a/tests/test_collection_service.py
+++ b/tests/test_collection_service.py
@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import unittest
+from types import SimpleNamespace
+
+from biliup_next.core.models import Task, TaskStep, utc_now_iso
+from biliup_next.modules.collection.service import CollectionService
+
+
+class _FakeRegistry:
+    def __init__(self, provider) -> None:  # type: ignore[no-untyped-def]
+        self.provider = provider
+
+    def get(self, provider_type: str, provider_id: str):  # type: ignore[no-untyped-def]
+        return self.provider
+
+
+class _FakeProvider:
+    def sync(self, task, target: str, settings: dict[str, object]) -> dict[str, object]:  # type: ignore[no-untyped-def]
+        return {"status": "skipped", "target": target}
+
+
+class _FakeRepo:
+    def __init__(self) -> None:
+        now = utc_now_iso()
+        self.task = Task("task-1", "local_file", "/tmp/source.mp4", "task-1", "running", now, now)
+        self.steps = {
+            "collection_a": TaskStep(None, "task-1", "collection_a", "pending", None, None, 0, None, None),
+            "collection_b": TaskStep(None, "task-1", "collection_b", "pending", None, None, 0, None, None),
+        }
+        self.task_status_updates: list[tuple[str, str]] = []
+
+    def get_task(self, task_id: str):  # type: ignore[no-untyped-def]
+        return self.task if task_id == self.task.id else None
+
+    def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        step = self.steps[step_name]
+        self.steps[step_name] = TaskStep(
+            step.id,
+            step.task_id,
+            step.step_name,
+            status,
+            kwargs.get("error_code", step.error_code),
+            kwargs.get("error_message", step.error_message),
+            kwargs.get("retry_count", step.retry_count),
+            kwargs.get("started_at", step.started_at),
+            kwargs.get("finished_at", step.finished_at),
+        )
+
+    def list_steps(self, task_id: str) -> list[TaskStep]:
+        return list(self.steps.values())
+
+    def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
+        self.task_status_updates.append((task_id, status))
+
+
+class CollectionServiceTests(unittest.TestCase):
+    def test_collection_a_restores_commented_status_so_collection_b_can_run(self) -> None:
+        repo = _FakeRepo()
+        service = CollectionService(_FakeRegistry(_FakeProvider()), repo)  # type: ignore[arg-type]
+        service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {})  # type: ignore[assignment]
+
+        result = service.run("task-1", "a", {"provider": "fake"})
+
+        self.assertEqual(result["status"], "skipped")
+        self.assertEqual(repo.steps["collection_a"].status, "succeeded")
+        self.assertEqual(repo.steps["collection_b"].status, "pending")
+        self.assertEqual(repo.task_status_updates[-1], ("task-1", "commented"))
+
+    def test_collection_b_marks_collection_synced_when_both_steps_succeeded(self) -> None:
+        repo = _FakeRepo()
+        repo.steps["collection_a"] = TaskStep(None, "task-1", "collection_a", "succeeded", None, None, 0, None, utc_now_iso())
+        service = CollectionService(_FakeRegistry(_FakeProvider()), repo)  # type: ignore[arg-type]
+        service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {"deleted": []})  # type: ignore[assignment]
+
+        result = service.run("task-1", "b", {"provider": "fake"})
+
+        self.assertEqual(result["status"], "skipped")
+        self.assertEqual(repo.steps["collection_b"].status, "succeeded")
+        self.assertEqual(repo.task_status_updates[-1], ("task-1", "collection_synced"))
+        self.assertEqual(result["cleanup"], {"deleted": []})
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_groq_transcribe_provider.py
+++ b/tests/test_groq_transcribe_provider.py
@ -0,0 +1,277 @@
+from __future__ import annotations
+
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+from biliup_next.core.errors import ModuleError
+from biliup_next.core.models import Artifact, Task
+from biliup_next.modules.transcribe.providers.groq import GroqTranscribeProvider
+
+
+class _FakeResponse:
+    def __init__(self, segments):
+        self.segments = segments
+
+
+class _FakeTranscriptions:
+    def __init__(self, outcomes: list[object]) -> None:
+        self.outcomes = list(outcomes)
+        self.calls: list[dict[str, object]] = []
+
+    def create(self, **kwargs):  # noqa: ANN003
+        self.calls.append(kwargs)
+        outcome = self.outcomes.pop(0)
+        if isinstance(outcome, Exception):
+            raise outcome
+        return outcome
+
+
+class _FakeGroqClient:
+    def __init__(self, outcomes: list[object]) -> None:
+        self.audio = SimpleNamespace(transcriptions=_FakeTranscriptions(outcomes))
+
+
+class GroqTranscribeProviderTests(unittest.TestCase):
+    def test_transcribe_retries_timeout_and_writes_srt_atomically(self) -> None:
+        provider = GroqTranscribeProvider()
+        task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            source_path = work_dir / "input.mp4"
+            source_path.write_bytes(b"video")
+            source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
+            segment = work_dir / "temp_audio" / "part_000.mp3"
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                segment.parent.mkdir(parents=True, exist_ok=True)
+                segment.write_bytes(b"audio")
+
+            client = _FakeGroqClient(
+                [
+                    RuntimeError("Request timed out."),
+                    _FakeResponse([{"start": 0, "end": 1.2, "text": "hello"}]),
+                ]
+            )
+
+            settings = {
+                "groq_api_key": "gsk_test",
+                "ffmpeg_bin": "ffmpeg",
+                "max_file_size_mb": 23,
+                "request_timeout_seconds": 33,
+                "request_max_retries": 1,
+                "request_retry_backoff_seconds": 0,
+                "serialize_groq_requests": False,
+            }
+
+            with patch("groq.Groq", return_value=client) as groq_ctor:
+                with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                    artifact = provider.transcribe(task, source_video, settings)
+
+            self.assertEqual(Path(artifact.path).read_text(encoding="utf-8"), "1\n00:00:00,000 --> 00:00:01,199\nhello\n\n")
+            self.assertFalse((work_dir / ".demo.srt.tmp").exists())
+            self.assertEqual(len(client.audio.transcriptions.calls), 2)
+            self.assertEqual(client.audio.transcriptions.calls[0]["timeout"], 33)
+            self.assertTrue((work_dir / "transcribe_segments" / "part_000.json").exists())
+            groq_ctor.assert_called_once_with(api_key="gsk_test", timeout=33, max_retries=0)
+
+    def test_transcribe_reuses_completed_segment_checkpoints(self) -> None:
+        provider = GroqTranscribeProvider()
+        task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            source_path = work_dir / "input.mp4"
+            source_path.write_bytes(b"video")
+            source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
+            segments = [work_dir / "temp_audio" / "part_000.mp3", work_dir / "temp_audio" / "part_001.mp3"]
+            checkpoint_dir = work_dir / "transcribe_segments"
+            checkpoint_dir.mkdir()
+            (checkpoint_dir / "part_000.json").write_text(
+                json.dumps(
+                    {
+                        "provider": "groq",
+                        "model": "whisper-large-v3-turbo",
+                        "language": "zh",
+                        "audio_file": "part_000.mp3",
+                        "segment_duration_seconds": 75,
+                        "segments": [{"start": 0, "end": 1, "text": "first"}],
+                    }
+                ),
+                encoding="utf-8",
+            )
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                for segment in segments:
+                    segment.parent.mkdir(parents=True, exist_ok=True)
+                    segment.write_bytes(b"audio")
+
+            client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.5, "text": "second"}])])
+            settings = {
+                "groq_api_key": "gsk_test",
+                "ffmpeg_bin": "ffmpeg",
+                "max_file_size_mb": 23,
+                "request_timeout_seconds": 33,
+                "request_max_retries": 1,
+                "request_retry_backoff_seconds": 0,
+                "serialize_groq_requests": False,
+            }
+
+            with patch("groq.Groq", return_value=client):
+                with patch.object(provider, "_initial_segment_duration", return_value=75):
+                    with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                        artifact = provider.transcribe(task, source_video, settings)
+
+            srt = Path(artifact.path).read_text(encoding="utf-8")
+            self.assertIn("00:00:00,000 --> 00:00:01,000\nfirst", srt)
+            self.assertIn("00:01:15,000 --> 00:01:16,500\nsecond", srt)
+            self.assertEqual(len(client.audio.transcriptions.calls), 1)
+            self.assertEqual(client.audio.transcriptions.calls[0]["file"][0], "part_001.mp3")
+            self.assertTrue((checkpoint_dir / "part_001.json").exists())
+
+    def test_transcribe_switches_to_next_api_key_on_rate_limit(self) -> None:
+        provider = GroqTranscribeProvider()
+        task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            source_path = work_dir / "input.mp4"
+            source_path.write_bytes(b"video")
+            source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
+            segment = work_dir / "temp_audio" / "part_000.mp3"
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                segment.parent.mkdir(parents=True, exist_ok=True)
+                segment.write_bytes(b"audio")
+
+            limited_client = _FakeGroqClient([RuntimeError("Error code: 429 rate_limit")])
+            fallback_client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.2, "text": "fallback"}])])
+            settings = {
+                "groq_api_key": "",
+                "groq_api_keys": ["gsk_first", "gsk_second"],
+                "ffmpeg_bin": "ffmpeg",
+                "max_file_size_mb": 23,
+                "request_timeout_seconds": 20,
+                "request_max_retries": 0,
+                "request_retry_backoff_seconds": 0,
+                "serialize_groq_requests": False,
+            }
+
+            with patch("groq.Groq", side_effect=[limited_client, fallback_client]) as groq_ctor:
+                with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                    artifact = provider.transcribe(task, source_video, settings)
+
+            self.assertIn("fallback", Path(artifact.path).read_text(encoding="utf-8"))
+            self.assertEqual(len(limited_client.audio.transcriptions.calls), 1)
+            self.assertEqual(len(fallback_client.audio.transcriptions.calls), 1)
+            self.assertEqual([call.kwargs["api_key"] for call in groq_ctor.call_args_list], ["gsk_first", "gsk_second"])
+
+    def test_transcribe_waits_after_all_api_keys_are_rate_limited(self) -> None:
+        provider = GroqTranscribeProvider()
+        task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            source_path = work_dir / "input.mp4"
+            source_path.write_bytes(b"video")
+            source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
+            segment = work_dir / "temp_audio" / "part_000.mp3"
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                segment.parent.mkdir(parents=True, exist_ok=True)
+                segment.write_bytes(b"audio")
+
+            first_client = _FakeGroqClient([RuntimeError("429 rate_limit"), _FakeResponse([{"start": 0, "end": 1, "text": "retry ok"}])])
+            second_client = _FakeGroqClient([RuntimeError("429 rate_limit")])
+            settings = {
+                "groq_api_key": "",
+                "groq_api_keys": ["gsk_first", "gsk_second"],
+                "ffmpeg_bin": "ffmpeg",
+                "max_file_size_mb": 23,
+                "request_timeout_seconds": 20,
+                "request_max_retries": 1,
+                "request_retry_backoff_seconds": 7,
+                "serialize_groq_requests": False,
+            }
+
+            with patch("groq.Groq", side_effect=[first_client, second_client]):
+                with patch("time.sleep") as sleep_mock:
+                    with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                        artifact = provider.transcribe(task, source_video, settings)
+
+            self.assertIn("retry ok", Path(artifact.path).read_text(encoding="utf-8"))
+            sleep_mock.assert_called_once_with(7)
+            self.assertEqual(len(first_client.audio.transcriptions.calls), 2)
+            self.assertEqual(len(second_client.audio.transcriptions.calls), 1)
+
+    def test_transcribe_raises_after_retry_budget_is_exhausted(self) -> None:
+        provider = GroqTranscribeProvider()
+        task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            source_path = work_dir / "input.mp4"
+            source_path.write_bytes(b"video")
+            source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
+            segment = work_dir / "temp_audio" / "part_000.mp3"
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                segment.parent.mkdir(parents=True, exist_ok=True)
+                segment.write_bytes(b"audio")
+
+            client = _FakeGroqClient([RuntimeError("Connection error."), RuntimeError("Connection error.")])
+            settings = {
+                "groq_api_key": "gsk_test",
+                "ffmpeg_bin": "ffmpeg",
+                "max_file_size_mb": 23,
+                "request_timeout_seconds": 20,
+                "request_max_retries": 1,
+                "request_retry_backoff_seconds": 0,
+                "serialize_groq_requests": False,
+            }
+
+            with patch("groq.Groq", return_value=client):
+                with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                    with self.assertRaises(ModuleError) as exc_info:
+                        provider.transcribe(task, source_video, settings)
+
+            self.assertEqual(exc_info.exception.message, "Groq 转录失败: part_000.mp3")
+
+    def test_initial_segment_duration_keeps_safety_margin(self) -> None:
+        self.assertLess(GroqTranscribeProvider._initial_segment_duration(12), 1536)
+
+    def test_extract_audio_segments_retries_when_segment_exceeds_size_limit(self) -> None:
+        provider = GroqTranscribeProvider()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            temp_audio_dir = work_dir / "temp_audio"
+            temp_audio_dir.mkdir()
+            output_pattern = temp_audio_dir / "part_%03d.mp3"
+            durations: list[int] = []
+
+            def fake_extract_audio_segments(**kwargs):  # noqa: ANN003
+                durations.append(int(kwargs["segment_duration"]))
+                size = 20 if len(durations) == 1 else 5
+                (temp_audio_dir / "part_000.mp3").write_bytes(b"x" * size)
+
+            with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
+                result = provider._extract_audio_segments_with_size_guard(
+                    ffmpeg_bin="ffmpeg",
+                    source_path=work_dir / "input.mp4",
+                    output_pattern=output_pattern,
+                    temp_audio_dir=temp_audio_dir,
+                    initial_segment_duration=100,
+                    max_segment_bytes=10,
+                )
+
+            self.assertEqual(durations, [100, 75])
+            self.assertEqual(result, 75)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_ingest_scan_stage.py
+++ b/tests/test_ingest_scan_stage.py
@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from biliup_next.core.providers import ProviderManifest
+from biliup_next.core.registry import Registry
+from biliup_next.infra.db import Database
+from biliup_next.infra.task_repository import TaskRepository
+from biliup_next.modules.ingest.service import IngestService
+
+
+class _FakeLocalFileProvider:
+    manifest = ProviderManifest(
+        id="local_file",
+        name="Fake Local File Ingest",
+        version="0.1.0",
+        provider_type="ingest_provider",
+        entrypoint="tests.test_ingest_scan_stage:_FakeLocalFileProvider",
+        capabilities=["ingest"],
+        enabled_by_default=True,
+    )
+
+    def validate_source(self, source_path: Path, settings: dict[str, object]) -> None:
+        if not source_path.exists() or not source_path.is_file():
+            raise AssertionError(f"unexpected source path: {source_path}")
+
+
+class IngestScanStageTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        root = Path(self.tempdir.name)
+        self.stage_dir = root / "stage"
+        self.backup_dir = root / "backup"
+        self.session_dir = root / "session"
+        self.stage_dir.mkdir()
+        self.backup_dir.mkdir()
+        self.session_dir.mkdir()
+
+        db = Database(root / "test.db")
+        db.initialize()
+        repo = TaskRepository(db)
+        registry = Registry()
+        provider = _FakeLocalFileProvider()
+        registry.register("ingest_provider", "local_file", provider, provider.manifest)
+        self.service = IngestService(registry=registry, repo=repo)
+
+    def tearDown(self) -> None:
+        self.tempdir.cleanup()
+
+    def test_scan_stage_uses_moved_file_for_reference_timestamp(self) -> None:
+        source_path = self.stage_dir / "王海颖唱歌录播 04月14日 17时49分.mp4"
+        source_path.write_bytes(b"fake-video")
+
+        settings = {
+            "provider": "local_file",
+            "stage_dir": str(self.stage_dir),
+            "backup_dir": str(self.backup_dir),
+            "session_dir": str(self.session_dir),
+            "allowed_extensions": [".mp4"],
+            "ffprobe_bin": "ffprobe",
+            "min_duration_seconds": 0,
+            "stability_wait_seconds": 0,
+            "meta_sidecar_enabled": True,
+        }
+
+        self.service._probe_duration_seconds = lambda *_args, **_kwargs: 120.0  # type: ignore[method-assign]
+
+        result = self.service.scan_stage(settings)
+
+        self.assertEqual(len(result["accepted"]), 1)
+        accepted = result["accepted"][0]
+        moved_path = Path(str(accepted["source_path"]))
+        self.assertTrue(moved_path.exists())
+        self.assertFalse(source_path.exists())
+        task = self.service.repo.get_task(moved_path.stem)
+        self.assertIsNotNone(task)
+        context = self.service.repo.get_task_context(moved_path.stem)
+        self.assertIsNotNone(context)
+        self.assertIsNotNone(context.segment_started_at)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_settings_service.py
+++ b/tests/test_settings_service.py
@ -2,6 +2,7 @@ from __future__ import annotations

 import tempfile
 import unittest
+from unittest.mock import patch
 from pathlib import Path

 from biliup_next.core.config import SettingsService
@ -78,6 +79,146 @@ class SettingsServiceTests(unittest.TestCase):
            self.assertTrue((config_dir / "settings.staged.json").exists())
            self.assertEqual(bundle.settings["paths"]["cookies_file"], str((root / "runtime" / "cookies.json").resolve()))

+    def test_load_applies_environment_overrides_before_path_normalization(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            config_dir = root / "config"
+            config_dir.mkdir(parents=True, exist_ok=True)
+            (config_dir / "settings.schema.json").write_text(
+                """
+                {
+                  "groups": {
+                    "runtime": {
+                      "database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}
+                    },
+                    "paths": {
+                      "stage_dir": {"type": "string", "default": "data/workspace/stage"},
+                      "backup_dir": {"type": "string", "default": "data/workspace/backup"},
+                      "session_dir": {"type": "string", "default": "data/workspace/session"},
+                      "cookies_file": {"type": "string", "default": "runtime/cookies.json"},
+                      "upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
+                    },
+                    "ingest": {
+                      "ffprobe_bin": {"type": "string", "default": "ffprobe"},
+                      "yt_dlp_cmd": {"type": "string", "default": "yt-dlp"},
+                      "yt_dlp_format": {"type": "string", "default": ""}
+                    },
+                    "transcribe": {
+                      "groq_api_key": {"type": "string", "default": "", "sensitive": true},
+                      "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
+                    },
+                    "split": {
+                      "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
+                    },
+                    "song_detect": {
+                      "codex_cmd": {"type": "string", "default": "codex"},
+                      "qwen_cmd": {"type": "string", "default": "qwen"}
+                    },
+                    "publish": {
+                      "biliup_path": {"type": "string", "default": "runtime/biliup"},
+                      "cookie_file": {"type": "string", "default": "runtime/cookies.json"}
+                    },
+                    "collection": {
+                      "season_id_a": {"type": "integer", "default": 0},
+                      "season_id_b": {"type": "integer", "default": 0}
+                    }
+                  }
+                }
+                """,
+                encoding="utf-8",
+            )
+            (config_dir / "settings.standalone.example.json").write_text(
+                """
+                {
+                  "runtime": {"database_path": "data/workspace/biliup_next.db"},
+                  "paths": {
+                    "stage_dir": "data/workspace/stage",
+                    "backup_dir": "data/workspace/backup",
+                    "session_dir": "data/workspace/session",
+                    "cookies_file": "runtime/cookies.json",
+                    "upload_config_file": "runtime/upload_config.json"
+                  },
+                  "ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp", "yt_dlp_format": ""},
+                  "transcribe": {"groq_api_key": "", "ffmpeg_bin": "ffmpeg"},
+                  "split": {"ffmpeg_bin": "ffmpeg"},
+                  "song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
+                  "publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"},
+                  "collection": {"season_id_a": 0, "season_id_b": 0}
+                }
+                """,
+                encoding="utf-8",
+            )
+
+            with patch.dict(
+                "os.environ",
+                {
+                    "GROQ_API_KEY": "gsk_test",
+                    "COLLECTION_SEASON_ID_A": "7196643",
+                    "BILIUP_NEXT__COLLECTION__SEASON_ID_B": "7196624",
+                    "BILIUP_NEXT__PATHS__STAGE_DIR": "data/custom-stage",
+                },
+                clear=True,
+            ):
+                bundle = SettingsService(root).load()
+
+            self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "gsk_test")
+            self.assertEqual(bundle.settings["collection"]["season_id_a"], 7196643)
+            self.assertEqual(bundle.settings["collection"]["season_id_b"], 7196624)
+            self.assertEqual(bundle.settings["paths"]["stage_dir"], str((root / "data" / "custom-stage").resolve()))
+
+    def test_empty_environment_values_do_not_override_settings(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            config_dir = root / "config"
+            config_dir.mkdir(parents=True, exist_ok=True)
+            (config_dir / "settings.schema.json").write_text(
+                """
+                {
+                  "groups": {
+                    "runtime": {"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}},
+                    "paths": {
+                      "stage_dir": {"type": "string", "default": "data/workspace/stage"},
+                      "backup_dir": {"type": "string", "default": "data/workspace/backup"},
+                      "session_dir": {"type": "string", "default": "data/workspace/session"},
+                      "cookies_file": {"type": "string", "default": "runtime/cookies.json"},
+                      "upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
+                    },
+                    "ingest": {"ffprobe_bin": {"type": "string", "default": "ffprobe"}, "yt_dlp_cmd": {"type": "string", "default": "yt-dlp"}},
+                    "transcribe": {"groq_api_key": {"type": "string", "default": ""}, "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
+                    "split": {"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
+                    "song_detect": {"codex_cmd": {"type": "string", "default": "codex"}, "qwen_cmd": {"type": "string", "default": "qwen"}},
+                    "publish": {"biliup_path": {"type": "string", "default": "runtime/biliup"}, "cookie_file": {"type": "string", "default": "runtime/cookies.json"}}
+                  }
+                }
+                """,
+                encoding="utf-8",
+            )
+            (config_dir / "settings.standalone.example.json").write_text(
+                """
+                {
+                  "runtime": {"database_path": "data/workspace/biliup_next.db"},
+                  "paths": {
+                    "stage_dir": "data/workspace/stage",
+                    "backup_dir": "data/workspace/backup",
+                    "session_dir": "data/workspace/session",
+                    "cookies_file": "runtime/cookies.json",
+                    "upload_config_file": "runtime/upload_config.json"
+                  },
+                  "ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp"},
+                  "transcribe": {"groq_api_key": "from-file", "ffmpeg_bin": "ffmpeg"},
+                  "split": {"ffmpeg_bin": "ffmpeg"},
+                  "song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
+                  "publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"}
+                }
+                """,
+                encoding="utf-8",
+            )
+
+            with patch.dict("os.environ", {"GROQ_API_KEY": ""}, clear=True):
+                bundle = SettingsService(root).load()
+
+            self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "from-file")
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_song_detect_providers.py
+++ b/tests/test_song_detect_providers.py
@ -1,11 +1,15 @@
 from __future__ import annotations

 import json
+import os
 import tempfile
 import unittest
 from pathlib import Path
+from unittest.mock import patch

 from biliup_next.core.models import Artifact, Task, utc_now_iso
+from biliup_next.infra.adapters.codex_cli import CodexCliAdapter
+from biliup_next.modules.song_detect.providers.codex import CodexSongDetector
 from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector


@ -38,6 +42,33 @@ class FakeQwenCliAdapter:
        return type("Result", (), {"returncode": self.returncode, "stdout": "ok", "stderr": ""})()


+class FakeCodexCliAdapter:
+    def __init__(self, returncode: int = 0) -> None:
+        self.returncode = returncode
+
+    def run_song_detect(self, *, codex_cmd: str, work_dir: Path, prompt: str):  # noqa: ANN001
+        songs_json_path = work_dir / "songs.json"
+        songs_json_path.write_text(
+            json.dumps(
+                {
+                    "songs": [
+                        {
+                            "start": "00:01:23,000",
+                            "end": "00:03:45,000",
+                            "title": "测试歌曲",
+                            "artist": "测试歌手",
+                            "confidence": 0.93,
+                            "evidence": "歌词命中",
+                        }
+                    ]
+                },
+                ensure_ascii=False,
+            ),
+            encoding="utf-8",
+        )
+        return type("Result", (), {"returncode": self.returncode, "stdout": "codex stdout", "stderr": "codex stderr"})()
+
+
 class SongDetectProviderTests(unittest.TestCase):
    def test_qwen_cli_provider_generates_json_and_txt_artifacts(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
@ -72,6 +103,67 @@ class SongDetectProviderTests(unittest.TestCase):
            self.assertTrue(Path(songs_txt.path).exists())
            self.assertIn("测试歌曲", Path(songs_txt.path).read_text(encoding="utf-8"))

+    def test_codex_provider_writes_execution_output_to_session_log(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            work_dir = Path(tmpdir)
+            subtitle_path = work_dir / "subtitle.srt"
+            subtitle_path.write_text("1\n00:00:00,000 --> 00:00:03,000\n测试字幕\n", encoding="utf-8")
+            provider = CodexSongDetector(adapter=FakeCodexCliAdapter())
+
+            task = Task(
+                id="task-1",
+                source_type="local_file",
+                source_path=str(work_dir / "video.mp4"),
+                title="task-1",
+                status="transcribed",
+                created_at=utc_now_iso(),
+                updated_at=utc_now_iso(),
+            )
+            subtitle = Artifact(
+                id=None,
+                task_id=task.id,
+                artifact_type="subtitle_srt",
+                path=str(subtitle_path),
+                metadata_json=None,
+                created_at=utc_now_iso(),
+            )
+
+            songs_json, songs_txt = provider.detect(task, subtitle, {"codex_cmd": "codex"})
+
+            json_metadata = json.loads(songs_json.metadata_json)
+            txt_metadata = json.loads(songs_txt.metadata_json)
+            self.assertEqual(json_metadata["provider"], "codex")
+            self.assertEqual(txt_metadata["provider"], "codex")
+            self.assertNotIn("execution", json_metadata)
+            codex_log = work_dir / "codex.log"
+            self.assertTrue(codex_log.exists())
+            log_text = codex_log.read_text(encoding="utf-8")
+            self.assertIn("returncode: 0", log_text)
+            self.assertIn("codex stdout", log_text)
+            self.assertIn("codex stderr", log_text)
+
+    def test_codex_cli_adapter_disables_inner_sandbox_and_normalizes_proxy_env(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            calls = []
+
+            def fake_run(cmd, **kwargs):  # noqa: ANN001
+                calls.append((cmd, kwargs))
+                return type("Result", (), {"returncode": 0, "stdout": "", "stderr": ""})()
+
+            with patch.dict(os.environ, {"HTTPS_PROXY": "192.168.1.100:7897"}, clear=True):
+                with patch("subprocess.run", side_effect=fake_run):
+                    CodexCliAdapter().run_song_detect(
+                        codex_cmd="codex",
+                        work_dir=Path(tmpdir),
+                        prompt="detect songs",
+                    )
+
+            cmd, kwargs = calls[0]
+            self.assertIn("--dangerously-bypass-approvals-and-sandbox", cmd)
+            self.assertNotIn("--full-auto", cmd)
+            self.assertNotIn("workspace-write", cmd)
+            self.assertEqual(kwargs["env"]["HTTPS_PROXY"], "http://192.168.1.100:7897")
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_song_detect_retry_policy.py
+++ b/tests/test_song_detect_retry_policy.py
@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import unittest
+from types import SimpleNamespace
+
+from biliup_next.app.retry_meta import retry_meta_for_step
+from biliup_next.app.task_engine import next_runnable_step
+from biliup_next.app.task_policies import resolve_failure
+from biliup_next.core.errors import ModuleError
+from biliup_next.core.models import TaskStep
+from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
+
+
+class _Repo:
+    def __init__(self) -> None:
+        self.steps = [TaskStep(None, "task-1", "song_detect", "running", None, None, 0, None, None)]
+        self.step_updates: list[tuple] = []
+        self.task_updates: list[tuple] = []
+
+    def list_steps(self, task_id: str):  # noqa: ANN001
+        return list(self.steps)
+
+    def get_task(self, task_id: str):  # noqa: ANN001
+        return SimpleNamespace(id=task_id, status="running")
+
+    def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None:  # noqa: ANN001
+        self.step_updates.append((task_id, step_name, status, kwargs))
+        self.steps = [
+            TaskStep(
+                None,
+                task_id,
+                step_name,
+                status,
+                kwargs.get("error_code"),
+                kwargs.get("error_message"),
+                kwargs.get("retry_count", 0),
+                kwargs.get("started_at"),
+                kwargs.get("finished_at"),
+            )
+        ]
+
+    def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
+        self.task_updates.append((task_id, status, updated_at))
+
+
+class SongDetectRetryPolicyTests(unittest.TestCase):
+    def test_retry_meta_reports_wait_window_for_song_detect(self) -> None:
+        step = TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
+
+        payload = retry_meta_for_step(step, {"song_detect": {"retry_schedule_minutes": [10]}})
+
+        self.assertIsNotNone(payload)
+        self.assertFalse(payload["retry_due"])
+        self.assertEqual(payload["retry_wait_seconds"], 600)
+
+    def test_next_runnable_step_waits_for_retryable_song_detect(self) -> None:
+        task = SimpleNamespace(id="task-1", status="failed_retryable")
+        steps = {
+            "song_detect": TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
+        }
+        state = {
+            "settings": {
+                "transcribe": {},
+                "song_detect": {"retry_schedule_minutes": [10]},
+                "comment": {"enabled": True},
+                "collection": {"enabled": True},
+                "paths": {},
+                "publish": {},
+            }
+        }
+
+        step_name, waiting_payload = next_runnable_step(task, steps, state)
+
+        self.assertIsNone(step_name)
+        self.assertIsNotNone(waiting_payload)
+        self.assertEqual(waiting_payload["step"], "song_detect")
+
+    def test_resolve_failure_adds_song_detect_retry_delay(self) -> None:
+        repo = _Repo()
+        task = SimpleNamespace(id="task-1", status="running")
+        state = {
+            "settings": {
+                "transcribe": {},
+                "song_detect": {"retry_schedule_minutes": [5, 10]},
+                "publish": {},
+                "comment": {},
+                "paths": {},
+                "collection": {"enabled": True},
+            }
+        }
+
+        result = resolve_failure(task, repo, state, ModuleError(code="SONG_DETECT_FAILED", message="boom", retryable=True))
+
+        self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
+        self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
+
+    def test_qwen_auth_errors_are_not_retryable(self) -> None:
+        self.assertTrue(QwenCliSongDetector._is_auth_error("[API Error: 401 invalid access token or token expired]"))
+        self.assertFalse(QwenCliSongDetector._is_auth_error("temporary network failure"))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_task_engine.py
+++ b/tests/test_task_engine.py
@ -51,6 +51,7 @@ class TaskEngineTests(unittest.TestCase):
        }
        state = {
            "settings": {
+                "transcribe": {},
                "comment": {"enabled": True},
                "collection": {"enabled": True},
                "paths": {},
--- a/tests/test_transcribe_retry_policy.py
+++ b/tests/test_transcribe_retry_policy.py
@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import unittest
+from types import SimpleNamespace
+
+from biliup_next.app.retry_meta import retry_meta_for_step
+from biliup_next.app.task_engine import next_runnable_step
+from biliup_next.app.task_policies import resolve_failure
+from biliup_next.core.errors import ModuleError
+from biliup_next.core.models import TaskStep
+
+
+class _Repo:
+    def __init__(self) -> None:
+        self.steps = [TaskStep(None, "task-1", "transcribe", "running", None, None, 0, None, None)]
+        self.step_updates: list[tuple] = []
+        self.task_updates: list[tuple] = []
+
+    def list_steps(self, task_id: str):  # noqa: ANN001
+        return list(self.steps)
+
+    def get_task(self, task_id: str):  # noqa: ANN001
+        return SimpleNamespace(id=task_id, status="running")
+
+    def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None:  # noqa: ANN001
+        self.step_updates.append((task_id, step_name, status, kwargs))
+        self.steps = [TaskStep(None, task_id, step_name, status, kwargs.get("error_code"), kwargs.get("error_message"), kwargs.get("retry_count", 0), kwargs.get("started_at"), kwargs.get("finished_at"))]
+
+    def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
+        self.task_updates.append((task_id, status, updated_at))
+
+
+class TranscribeRetryPolicyTests(unittest.TestCase):
+    def test_retry_meta_reports_wait_window_for_transcribe(self) -> None:
+        step = TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
+
+        payload = retry_meta_for_step(step, {"transcribe": {"retry_schedule_minutes": [10]}})
+
+        self.assertIsNotNone(payload)
+        self.assertFalse(payload["retry_due"])
+        self.assertEqual(payload["retry_wait_seconds"], 600)
+
+    def test_next_runnable_step_waits_for_retryable_transcribe(self) -> None:
+        task = SimpleNamespace(id="task-1", status="failed_retryable")
+        steps = {
+            "transcribe": TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
+        }
+        state = {
+            "settings": {
+                "transcribe": {"retry_schedule_minutes": [10]},
+                "comment": {"enabled": True},
+                "collection": {"enabled": True},
+                "paths": {},
+                "publish": {},
+            }
+        }
+
+        step_name, waiting_payload = next_runnable_step(task, steps, state)
+
+        self.assertIsNone(step_name)
+        self.assertIsNotNone(waiting_payload)
+        self.assertEqual(waiting_payload["step"], "transcribe")
+
+    def test_resolve_failure_adds_transcribe_retry_delay(self) -> None:
+        repo = _Repo()
+        task = SimpleNamespace(id="task-1", status="running")
+        state = {
+            "settings": {
+                "transcribe": {"retry_schedule_minutes": [5, 10]},
+                "publish": {},
+                "comment": {},
+                "paths": {},
+                "collection": {"enabled": True},
+            }
+        }
+
+        result = resolve_failure(task, repo, state, ModuleError(code="GROQ_TRANSCRIBE_FAILED", message="boom", retryable=True))
+
+        self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
+        self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_video_links.py
+++ b/tests/test_video_links.py
@ -0,0 +1,170 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import subprocess
+
+from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list
+from biliup_next.infra.video_links import link_context_for_task
+
+
+class VideoLinksTests(unittest.TestCase):
+    def test_fetch_biliup_list_keeps_pubing_videos(self) -> None:
+        output = (
+            "2026-04-22 15:56:43  INFO biliup_cli::uploader: user: test\n"
+            "BVREVIEW\t王海颖唱歌录播 04月22日 15时56分\t审核中\n"
+            "BVPUB\t王海颖唱歌录播 04月20日 22时08分\t开放浏览\n"
+            "BVPRIVATE\t私密视频\t仅自己可见\n"
+        )
+        with patch(
+            "biliup_next.infra.adapters.full_video_locator.subprocess.run",
+            return_value=subprocess.CompletedProcess(["biliup"], 0, stdout=output, stderr=""),
+        ):
+            videos = fetch_biliup_list({"biliup_path": "biliup", "cookie_file": "cookies.json"}, max_pages=1)
+
+        self.assertEqual(
+            videos,
+            [
+                {"bvid": "BVREVIEW", "title": "王海颖唱歌录播 04月22日 15时56分"},
+                {"bvid": "BVPUB", "title": "王海颖唱歌录播 04月20日 22时08分"},
+            ],
+        )
+
+    def test_previous_live_falls_back_to_biliup_list(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            source_path = Path(tmpdir) / "source.mp4"
+            source_path.write_bytes(b"")
+            task = SimpleNamespace(
+                id="task-current",
+                title="王海颖唱歌录播 04月19日 22时10分",
+                source_path=str(source_path),
+            )
+            repo = SimpleNamespace(get_task_context=lambda task_id: None)
+            settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
+
+            with patch(
+                "biliup_next.infra.video_links.fetch_biliup_list",
+                return_value=[
+                    {"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月18日 22时06分 共10首歌"},
+                    {"bvid": "BVNEWER", "title": "王海颖唱歌录播 04月20日 22时00分"},
+                    {"bvid": "BVPREV", "title": "王海颖唱歌录播 04月18日 22时06分"},
+                    {"bvid": "BVOLDER", "title": "王海颖唱歌录播 04月17日 22时00分"},
+                ],
+            ):
+                context = link_context_for_task(task, repo, settings)
+
+        self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
+        self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVPREV")
+        self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
+        self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
+
+    def test_previous_live_merges_repo_and_biliup_list_links(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            current_path = root / "current" / "source.mp4"
+            previous_path = root / "previous" / "source.mp4"
+            current_path.parent.mkdir()
+            previous_path.parent.mkdir()
+            current_path.write_bytes(b"")
+            previous_path.write_bytes(b"")
+            (previous_path.parent / "full_video_bvid.txt").write_text("BVLOCALFULL", encoding="utf-8")
+
+            task = SimpleNamespace(
+                id="task-current",
+                title="王海颖唱歌录播 04月19日 22时10分",
+                source_path=str(current_path),
+            )
+            previous_task = SimpleNamespace(
+                id="task-previous",
+                title="王海颖唱歌录播 04月18日 22时06分",
+                source_path=str(previous_path),
+            )
+            current_context = SimpleNamespace(
+                task_id=task.id,
+                streamer="王海颖",
+                session_key="王海颖-0419",
+                segment_started_at="2026-04-19T22:10:00",
+            )
+            previous_context = SimpleNamespace(
+                task_id=previous_task.id,
+                streamer="王海颖",
+                session_key="王海颖-0418",
+                segment_started_at="2026-04-18T22:06:00",
+                full_video_bvid="BVLOCALFULL",
+            )
+            tasks = {task.id: task, previous_task.id: previous_task}
+            contexts = {task.id: current_context, previous_task.id: previous_context}
+            repo = SimpleNamespace(
+                get_task_context=lambda task_id: contexts.get(task_id),
+                get_task=lambda task_id: tasks.get(task_id),
+                find_recent_task_contexts=lambda streamer, limit=50: [current_context, previous_context],
+            )
+            settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
+
+            with patch(
+                "biliup_next.infra.video_links.fetch_biliup_list",
+                return_value=[
+                    {"bvid": "BVPURE", "title": "【王海颖(歌曲纯享版)】04月18日 22时06分 共18首歌"},
+                ],
+            ):
+                context = link_context_for_task(task, repo, settings)
+
+        self.assertEqual(context["previous_full_video_bvid"], "BVLOCALFULL")
+        self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVLOCALFULL")
+        self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
+        self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
+
+    def test_previous_live_biliup_list_handles_year_boundary(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            source_path = Path(tmpdir) / "source.mp4"
+            source_path.write_bytes(b"")
+            task = SimpleNamespace(
+                id="task-current",
+                title="王海颖唱歌录播 01月01日 22时10分",
+                source_path=str(source_path),
+            )
+            repo = SimpleNamespace(get_task_context=lambda task_id: None)
+            settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
+
+            with patch(
+                "biliup_next.infra.video_links.fetch_biliup_list",
+                return_value=[
+                    {"bvid": "BVPREV", "title": "王海颖唱歌录播 12月31日 22时06分"},
+                ],
+            ):
+                context = link_context_for_task(task, repo, settings)
+
+        self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
+
+    def test_current_full_video_falls_back_to_biliup_list(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            source_path = Path(tmpdir) / "source.mp4"
+            source_path.write_bytes(b"")
+            task = SimpleNamespace(
+                id="task-current",
+                title="王海颖唱歌录播 04月22日 15时56分",
+                source_path=str(source_path),
+            )
+            repo = SimpleNamespace(get_task_context=lambda task_id: None)
+            settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
+
+            with patch(
+                "biliup_next.infra.adapters.full_video_locator.fetch_biliup_list",
+                return_value=[
+                    {"bvid": "BVFULL", "title": "王海颖唱歌录播 04月22日 15时56分"},
+                    {"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月22日 15时56分 共20首歌"},
+                ],
+            ):
+                context = link_context_for_task(task, repo, settings)
+
+            self.assertEqual(context["current_full_video_bvid"], "BVFULL")
+            self.assertEqual(context["current_full_video_link"], "https://www.bilibili.com/video/BVFULL")
+            self.assertEqual((source_path.parent / "full_video_bvid.txt").read_text(encoding="utf-8"), "BVFULL")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_workspace_cleanup.py
+++ b/tests/test_workspace_cleanup.py
@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+
+from biliup_next.core.models import Task, utc_now_iso
+from biliup_next.infra.workspace_cleanup import WorkspaceCleanupService
+
+
+class _FakeRepo:
+    def __init__(self, tasks: list[Task], session_key: str | None = None) -> None:
+        self.tasks = {task.id: task for task in tasks}
+        self.session_key = session_key
+        self.deleted_artifacts: list[tuple[str, str]] = []
+        self.deleted_artifact_paths: list[tuple[str, str]] = []
+
+    def get_task(self, task_id: str) -> Task | None:
+        return self.tasks.get(task_id)
+
+    def get_task_context(self, task_id: str):  # noqa: ANN201
+        if self.session_key is None or task_id not in self.tasks:
+            return None
+        return SimpleNamespace(task_id=task_id, session_key=self.session_key)
+
+    def list_task_contexts_by_session_key(self, session_key: str):  # noqa: ANN201
+        if session_key != self.session_key:
+            return []
+        return [SimpleNamespace(task_id=task_id, session_key=session_key) for task_id in self.tasks]
+
+    def delete_artifacts(self, task_id: str, artifact_type: str) -> None:
+        self.deleted_artifacts.append((task_id, artifact_type))
+
+    def delete_artifact_by_path(self, task_id: str, path: str) -> None:
+        self.deleted_artifact_paths.append((task_id, path))
+
+
+def _make_task(task_id: str, root: Path) -> Task:
+    now = utc_now_iso()
+    work_dir = root / task_id
+    work_dir.mkdir(parents=True)
+    source = work_dir / "source.mp4"
+    source.write_bytes(b"source")
+    for dirname in ("split_video", "publish_video"):
+        video_dir = work_dir / dirname
+        video_dir.mkdir()
+        (video_dir / "01_song.mp4").write_bytes(b"clip")
+    return Task(task_id, "local_file", str(source), task_id, "collection_synced", now, now)
+
+
+class WorkspaceCleanupServiceTests(unittest.TestCase):
+    def test_cleanup_removes_source_split_and_publish_video_for_single_task(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            task = _make_task("task-1", root)
+            repo = _FakeRepo([task])
+            result = WorkspaceCleanupService(repo).cleanup_task_outputs(
+                task.id,
+                {
+                    "delete_source_video_after_collection_synced": True,
+                    "delete_split_videos_after_collection_synced": True,
+                },
+            )
+
+            work_dir = root / "task-1"
+            self.assertFalse((work_dir / "source.mp4").exists())
+            self.assertFalse((work_dir / "split_video").exists())
+            self.assertFalse((work_dir / "publish_video").exists())
+            self.assertEqual(result["task_ids"], ["task-1"])
+            self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video")])
+            self.assertEqual(repo.deleted_artifact_paths, [("task-1", str((work_dir / "source.mp4").resolve()))])
+
+    def test_cleanup_removes_all_tasks_in_same_session(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            task_1 = _make_task("task-1", root)
+            task_2 = _make_task("task-2", root)
+            repo = _FakeRepo([task_1, task_2], session_key="session-1")
+            result = WorkspaceCleanupService(repo).cleanup_task_outputs(
+                task_1.id,
+                {
+                    "delete_source_video_after_collection_synced": True,
+                    "delete_split_videos_after_collection_synced": True,
+                },
+            )
+
+            for task_id in ("task-1", "task-2"):
+                work_dir = root / task_id
+                self.assertFalse((work_dir / "source.mp4").exists())
+                self.assertFalse((work_dir / "split_video").exists())
+                self.assertFalse((work_dir / "publish_video").exists())
+            self.assertEqual(result["task_ids"], ["task-1", "task-2"])
+            self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video"), ("task-2", "clip_video")])
+
+    def test_cleanup_skips_missing_source_video(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            task = _make_task("task-1", root)
+            source = Path(task.source_path)
+            source.unlink()
+            repo = _FakeRepo([task])
+            result = WorkspaceCleanupService(repo).cleanup_task_outputs(
+                task.id,
+                {
+                    "delete_source_video_after_collection_synced": True,
+                    "delete_split_videos_after_collection_synced": False,
+                },
+            )
+
+            self.assertIn(str(source.resolve()), result["skipped"])
+            self.assertEqual(repo.deleted_artifact_paths, [])
+
+
+if __name__ == "__main__":
+    unittest.main()