feat: package docker deployment and publish flow

This commit is contained in:
theshy
2026-04-22 16:20:03 +08:00
parent 055474360e
commit 2146687dc6
178 changed files with 24318 additions and 20855 deletions

19
.dockerignore Normal file
View File

@ -0,0 +1,19 @@
.git
.venv
.pytest_cache
__pycache__
*.pyc
data/
runtime/cookies.json
runtime/upload_config.json
runtime/biliup
runtime/codex/
runtime/logs/
frontend/node_modules/
frontend/dist/
.env
config/settings.json
config/settings.staged.json

49
.env.example Normal file
View File

@ -0,0 +1,49 @@
# Web/API port exposed on the host.
BILIUP_NEXT_PORT=8000
# Image used by both api and worker. Override this when using a versioned tag
# or a private registry image, for example 192.168.1.100:25490/biliup-next:20260420.
BILIUP_NEXT_IMAGE=biliup-next:local
# Worker polling interval in seconds.
WORKER_INTERVAL=5
# Container timezone.
TZ=Asia/Shanghai
# Optional container outbound proxy. In Docker Desktop/WSL, host.docker.internal
# points to the Windows host; set this to your local proxy port.
# These values are also passed as Docker build args for apt/pip/npm.
# HTTP_PROXY=http://host.docker.internal:7897
# HTTPS_PROXY=http://host.docker.internal:7897
# ALL_PROXY=http://host.docker.internal:7897
# NO_PROXY=localhost,127.0.0.1,api,worker
#
# Docker build-time proxy. Separate names avoid being overridden by host
# HTTP_PROXY/HTTPS_PROXY when Compose interpolates build args.
# DOCKER_BUILD_HTTP_PROXY=http://host.docker.internal:7897
# DOCKER_BUILD_HTTPS_PROXY=http://host.docker.internal:7897
# DOCKER_BUILD_ALL_PROXY=http://host.docker.internal:7897
# DOCKER_BUILD_NO_PROXY=localhost,127.0.0.1,api,worker
# Required for Groq transcription. Prefer this env var over writing the key
# directly into config/settings.json.
GROQ_API_KEY=
# Optional key pool. Use a JSON array; keys here are tried before GROQ_API_KEY.
# GROQ_API_KEYS=["gsk_xxx","gsk_yyy"]
# Optional for the Codex song detector when you do not mount an existing
# Codex login state into runtime/codex.
OPENAI_API_KEY=
# Bilibili collection IDs.
# A: live full-video collection
# B: live split/pure-song collection
COLLECTION_SEASON_ID_A=7196643
COLLECTION_SEASON_ID_B=7196624
# Optional explicit config overrides. The generic format is:
# BILIUP_NEXT__GROUP__FIELD=value
#
# BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
# BILIUP_NEXT__PUBLISH__RATE_LIMIT_RETRY_SCHEDULE_MINUTES=[15,30,60]

5
.gitignore vendored
View File

@ -1,4 +1,8 @@
.venv/
.codex
.codex/
.env
.tmp-tests/
__pycache__/
*.pyc
*.pyo
@ -12,6 +16,7 @@ systemd/rendered/
runtime/cookies.json
runtime/upload_config.json
runtime/biliup
runtime/codex/
runtime/logs/
frontend/node_modules/

61
Dockerfile Normal file
View File

@ -0,0 +1,61 @@
FROM node:24-bookworm-slim AS frontend-builder
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG ALL_PROXY
ARG NO_PROXY
ARG http_proxy
ARG https_proxy
ARG all_proxy
ARG no_proxy
WORKDIR /build/frontend
COPY frontend/package*.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build
FROM python:3.12-slim AS app
ARG HTTP_PROXY
ARG HTTPS_PROXY
ARG ALL_PROXY
ARG NO_PROXY
ARG http_proxy
ARG https_proxy
ARG all_proxy
ARG no_proxy
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
BILIUP_NEXT_CONTAINER=1
WORKDIR /app
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
ca-certificates \
curl \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
COPY pyproject.toml README.md ./
COPY src ./src
COPY config ./config
COPY runtime/README.md runtime/cookies.example.json runtime/upload_config.example.json ./runtime/
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
COPY --from=frontend-builder /usr/local/bin/node /usr/local/bin/node
COPY --from=frontend-builder /usr/local/lib/node_modules /usr/local/lib/node_modules
RUN pip install --editable . \
&& pip install yt-dlp \
&& ln -sf ../lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm \
&& ln -sf ../lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx \
&& npm install -g @openai/codex
RUN mkdir -p /app/data/workspace/stage /app/data/workspace/session /app/data/workspace/backup /app/runtime/logs /root/.codex
EXPOSE 8000
CMD ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]

View File

@ -59,6 +59,10 @@ bash setup.sh
- `docs/cold-start-checklist.md`
发布流程、输出文案和评论示例见:
- `docs/publish-output-examples.md`
浏览器访问:
```text
@ -192,6 +196,29 @@ cd /home/theshy/biliup/biliup-next
- 内容按 `P1/P2/P3` 分组
- 依赖 `full_video_bvid.txt` 或通过标题匹配解析到完整版 BV
评论格式和投稿文案一样,优先从 `runtime/upload_config.json` 读取。可编辑字段:
```json
"comment_template": {
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享{previous_pure_video_link} (上一场歌曲纯享版)",
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版{current_pure_video_link} (只听歌曲看这里)\n上次完整版{previous_full_video_link} (上一场完整录播)",
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}"
}
```
常用变量:
- 链接:`{current_full_video_link}``{current_pure_video_link}``{previous_full_video_link}``{previous_pure_video_link}`
- 分段与序号:`{part_index}``{song_index}`
- 纯享歌单:`{title}``{artist}``{artist_suffix}``{song_text}`
- 完整版时间轴:`{line_text}`
如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过。
清理默认关闭:
- `cleanup.delete_source_video_after_collection_synced = false`
@ -201,11 +228,14 @@ cd /home/theshy/biliup/biliup-next
## Full Video BV Input
完整版 `BV` 目前支持 3 种来源:
完整版 `BV` 目前支持 4 种来源:
- `stage/*.meta.json` 中的 `full_video_bvid`
- 前端 / API 手工绑定
- webhook`POST /webhooks/full-video-uploaded`
- `biliup list` 标题匹配,包含 `开放浏览``审核中` 状态
只要完整版上传后已经生成 BV即使仍在审核中也可以被用于纯享版简介、动态和评论互链。
推荐 webhook 负载:
@ -320,3 +350,14 @@ curl -X POST http://127.0.0.1:8787/tasks \
- `ingest.provider = bilibili_url`
- `ingest.yt_dlp_cmd = yt-dlp`
## Docker Compose Deployment
如果希望用容器方式一键运行 API 和 worker请参考 [README_DEPLOY.md](README_DEPLOY.md)。
快速入口:
```bash
./scripts/init-docker-config.sh
docker compose up -d --build
```

176
README_DEPLOY.md Normal file
View File

@ -0,0 +1,176 @@
# Docker Compose Deployment
This deployment runs the API and worker as two services from the same image.
Runtime state, credentials, staged videos, generated sessions, and the SQLite
database stay on the host through bind mounts.
## 1. Initialize Local Files
```bash
chmod +x scripts/init-docker-config.sh
./scripts/init-docker-config.sh
```
This creates these files if they do not already exist:
```text
.env
config/settings.json
runtime/cookies.json
runtime/upload_config.json
data/workspace/
```
## 2. Edit Required Secrets And IDs
Edit `.env`:
```env
GROQ_API_KEY=your_groq_key
OPENAI_API_KEY=your_openai_key_if_using_codex
COLLECTION_SEASON_ID_A=7196643
COLLECTION_SEASON_ID_B=7196624
```
Edit `runtime/cookies.json` and `runtime/upload_config.json` with real Bilibili
credentials and upload metadata.
`runtime/upload_config.json` also controls pure-video title, description,
dynamic text, and top-comment formatting. Existing deployments mount
`./runtime` from the host, so updating the image does not overwrite this file.
When you want to change output text, edit the host file directly.
Common output templates:
```json
{
"template": {
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版{current_full_video_link}\n上次直播{previous_full_video_link}\n\n本视频为歌曲纯享切片适合只听歌曲。",
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版{current_full_video_link}\n上次直播{previous_full_video_link}"
},
"comment_template": {
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次直播{previous_full_video_link} (上一场完整录播)",
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版{current_pure_video_link} (只听歌曲看这里)\n上次直播{previous_full_video_link} (上一场完整录播)",
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}"
}
}
```
Supported comment variables:
- `{current_full_video_link}` / `{current_pure_video_link}`
- `{previous_full_video_link}` / `{previous_pure_video_link}`
- `{part_index}` / `{song_index}`
- `{title}` / `{artist}` / `{artist_suffix}` / `{song_text}` / `{line_text}`
If a comment header line contains an empty link variable, that whole line is
omitted. This prevents comments from showing blank `上次直播:` lines when the
previous live video cannot be found.
Provide the `biliup` binary at:
```text
runtime/biliup
```
It must be executable inside the container:
```bash
chmod +x runtime/biliup
```
The image installs the `codex` CLI for `song_detect.provider=codex`. Provide
Codex auth in one of these ways:
```text
OPENAI_API_KEY in .env
runtime/codex mounted to /root/.codex
```
## 3. Start
```bash
docker compose up -d --build
```
Open:
```text
http://127.0.0.1:8000
```
Drop videos into:
```text
data/workspace/stage/
```
## Common Commands
```bash
docker compose logs -f api
docker compose logs -f worker
docker compose restart worker
docker compose down
```
Run one scheduler cycle manually:
```bash
docker compose run --rm worker biliup-next run-once
```
Run doctor:
```bash
docker compose run --rm api biliup-next doctor
```
## Environment Overrides
`config/settings.json` is still the base configuration. Environment variables
override selected values at runtime.
The Compose file already forces container-safe paths such as
`/app/data/workspace` and `/app/runtime/cookies.json`, so an existing local
`config/settings.json` with host paths can still be mounted safely.
Generic format:
```text
BILIUP_NEXT__GROUP__FIELD=value
```
Examples:
```env
BILIUP_NEXT__PATHS__STAGE_DIR=/app/data/workspace/stage
BILIUP_NEXT__PUBLISH__BILIUP_PATH=/app/runtime/biliup
BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
```
Convenience aliases:
```env
GROQ_API_KEY=...
COLLECTION_SEASON_ID_A=7196643
COLLECTION_SEASON_ID_B=7196624
```
## Data Persistence
These host paths are mounted into the containers:
```text
./config -> /app/config
./runtime -> /app/runtime
./data/workspace -> /app/data/workspace
```
Do not store `cookies.json`, Groq keys, or generated workspace data in the image.
They should stay in the mounted host directories.

View File

@ -0,0 +1,127 @@
{
"runtime": {
"database_path": "/app/data/workspace/biliup_next.db",
"control_token": "",
"log_level": "INFO"
},
"paths": {
"stage_dir": "/app/data/workspace/stage",
"backup_dir": "/app/data/workspace/backup",
"session_dir": "/app/data/workspace/session",
"cookies_file": "/app/runtime/cookies.json",
"upload_config_file": "/app/runtime/upload_config.json"
},
"scheduler": {
"candidate_scan_limit": 500,
"max_tasks_per_cycle": 50,
"prioritize_retry_due": true,
"oldest_first": true,
"status_priority": [
"failed_retryable",
"created",
"transcribed",
"songs_detected",
"split_done",
"published",
"commented",
"collection_synced"
]
},
"ingest": {
"provider": "local_file",
"min_duration_seconds": 900,
"ffprobe_bin": "ffprobe",
"yt_dlp_cmd": "yt-dlp",
"yt_dlp_format": "",
"allowed_extensions": [
".mp4",
".flv",
".mkv",
".mov"
],
"stage_min_free_space_mb": 1024,
"stability_wait_seconds": 30,
"session_gap_minutes": 60,
"meta_sidecar_enabled": true,
"meta_sidecar_suffix": ".meta.json"
},
"transcribe": {
"provider": "groq",
"groq_api_key": "",
"groq_api_keys": [],
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 12,
"request_timeout_seconds": 180,
"request_max_retries": 1,
"request_retry_backoff_seconds": 30,
"serialize_groq_requests": true,
"retry_count": 3,
"retry_backoff_seconds": 300,
"retry_schedule_minutes": [
5,
10,
15
]
},
"song_detect": {
"provider": "codex",
"codex_cmd": "codex",
"qwen_cmd": "qwen",
"poll_interval_seconds": 2,
"retry_count": 3,
"retry_backoff_seconds": 300,
"retry_schedule_minutes": [
5,
10,
15
]
},
"split": {
"provider": "ffmpeg_copy",
"ffmpeg_bin": "ffmpeg",
"poll_interval_seconds": 2,
"min_free_space_mb": 2048
},
"publish": {
"provider": "biliup_cli",
"biliup_path": "/app/runtime/biliup",
"cookie_file": "/app/runtime/cookies.json",
"retry_count": 5,
"retry_schedule_minutes": [
15,
5,
5,
5,
5
],
"retry_backoff_seconds": 300,
"command_timeout_seconds": 1800,
"rate_limit_retry_schedule_minutes": [
15,
30,
60
]
},
"comment": {
"provider": "bilibili_top_comment",
"enabled": true,
"max_retries": 5,
"base_delay_seconds": 180,
"poll_interval_seconds": 10,
"post_split_comment": true,
"post_full_video_timeline_comment": true
},
"collection": {
"provider": "bilibili_collection",
"enabled": true,
"season_id_a": 7196643,
"season_id_b": 7196624,
"allow_fuzzy_full_video_match": false,
"append_collection_a_new_to_end": true,
"append_collection_b_new_to_end": true
},
"cleanup": {
"delete_source_video_after_collection_synced": false,
"delete_split_videos_after_collection_synced": false
}
}

View File

@ -1,15 +1,15 @@
{
"runtime": {
"database_path": "data/workspace/biliup_next.db",
"database_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/biliup_next.db",
"control_token": "",
"log_level": "INFO"
},
"paths": {
"stage_dir": "data/workspace/stage",
"backup_dir": "data/workspace/backup",
"session_dir": "data/workspace/session",
"cookies_file": "runtime/cookies.json",
"upload_config_file": "runtime/upload_config.json"
"stage_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/stage",
"backup_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/backup",
"session_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/session",
"cookies_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
"upload_config_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/upload_config.json"
},
"scheduler": {
"candidate_scan_limit": 500,
@ -31,7 +31,7 @@
"provider": "local_file",
"min_duration_seconds": 900,
"ffprobe_bin": "ffprobe",
"yt_dlp_cmd": "yt-dlp",
"yt_dlp_cmd": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/.venv/bin/yt-dlp",
"yt_dlp_format": "",
"allowed_extensions": [
".mp4",
@ -47,15 +47,34 @@
},
"transcribe": {
"provider": "groq",
"groq_api_key": "",
"groq_api_key": "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",
"groq_api_keys": [],
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23
"max_file_size_mb": 12,
"request_timeout_seconds": 180,
"request_max_retries": 1,
"request_retry_backoff_seconds": 30,
"serialize_groq_requests": true,
"retry_count": 3,
"retry_backoff_seconds": 300,
"retry_schedule_minutes": [
5,
10,
15
]
},
"song_detect": {
"provider": "qwen_cli",
"provider": "codex",
"codex_cmd": "codex",
"qwen_cmd": "qwen",
"poll_interval_seconds": 2
"poll_interval_seconds": 2,
"retry_count": 3,
"retry_backoff_seconds": 300,
"retry_schedule_minutes": [
5,
10,
15
]
},
"split": {
"provider": "ffmpeg_copy",
@ -65,8 +84,8 @@
},
"publish": {
"provider": "biliup_cli",
"biliup_path": "runtime/biliup",
"cookie_file": "runtime/cookies.json",
"biliup_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/biliup",
"cookie_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
"retry_count": 5,
"retry_schedule_minutes": [
15,
@ -78,9 +97,9 @@
"retry_backoff_seconds": 300,
"command_timeout_seconds": 1800,
"rate_limit_retry_schedule_minutes": [
15,
30,
60,
120
60
]
},
"comment": {
@ -95,8 +114,8 @@
"collection": {
"provider": "bilibili_collection",
"enabled": true,
"season_id_a": 0,
"season_id_b": 0,
"season_id_a": 7196643,
"season_id_b": 7196624,
"allow_fuzzy_full_video_match": false,
"append_collection_a_new_to_end": true,
"append_collection_b_new_to_end": true

View File

@ -229,6 +229,16 @@
"description": "用于调用 Groq 转录 API。",
"sensitive": true
},
"groq_api_keys": {
"type": "array",
"default": [],
"title": "Groq API Keys",
"ui_order": 12,
"ui_widget": "secret_list",
"items": { "type": "string" },
"description": "可选 Groq API Key 池。遇到单个 key 限流时会自动切换下一个 key为空时使用 groq_api_key。",
"sensitive": true
},
"ffmpeg_bin": {
"type": "string",
"default": "ffmpeg",
@ -238,10 +248,66 @@
},
"max_file_size_mb": {
"type": "integer",
"default": 23,
"default": 12,
"title": "Max File Size MB",
"ui_order": 40,
"minimum": 1
"minimum": 1,
"description": "Groq 音频分片目标上限。实际切分会额外保留安全余量,避免贴近上传限制。"
},
"request_timeout_seconds": {
"type": "integer",
"default": 180,
"title": "Request Timeout Seconds",
"ui_order": 50,
"minimum": 1,
"description": "单个 Groq 转录请求的超时时间。"
},
"request_max_retries": {
"type": "integer",
"default": 1,
"title": "Request Max Retries",
"ui_order": 60,
"minimum": 0,
"description": "单个音频分片在超时、限流或连接错误时的请求级重试次数。"
},
"request_retry_backoff_seconds": {
"type": "integer",
"default": 30,
"title": "Request Retry Backoff Seconds",
"ui_order": 70,
"minimum": 0,
"description": "Groq 请求级重试之间的等待时间。"
},
"serialize_groq_requests": {
"type": "boolean",
"default": true,
"title": "Serialize Groq Requests",
"ui_order": 75,
"description": "是否串行化 Groq 分片上传请求,避免多个 worker 或多个任务同时上传导致超时。"
},
"retry_count": {
"type": "integer",
"default": 3,
"title": "Task Retry Count",
"ui_order": 80,
"minimum": 0,
"description": "transcribe 步骤允许的任务级失败重试次数。"
},
"retry_backoff_seconds": {
"type": "integer",
"default": 300,
"title": "Task Retry Backoff Seconds",
"ui_order": 90,
"minimum": 0,
"description": "未配置 retry_schedule_minutes 时transcribe 任务级重试的等待时间。"
},
"retry_schedule_minutes": {
"type": "array",
"default": [5, 10, 15],
"title": "Task Retry Schedule Minutes",
"ui_order": 100,
"items": { "type": "integer", "minimum": 0 },
"description": "transcribe 任务级失败后的自动重试等待时间。"
}
},
"song_detect": {
@ -275,6 +341,30 @@
"title": "Poll Interval Seconds",
"ui_order": 30,
"minimum": 1
},
"retry_count": {
"type": "integer",
"default": 3,
"title": "Task Retry Count",
"ui_order": 40,
"minimum": 0,
"description": "song_detect 步骤允许的任务级失败重试次数。认证失败会直接进入人工失败,不会重试。"
},
"retry_backoff_seconds": {
"type": "integer",
"default": 300,
"title": "Task Retry Backoff Seconds",
"ui_order": 50,
"minimum": 0,
"description": "未配置 retry_schedule_minutes 时song_detect 任务级重试的等待时间。"
},
"retry_schedule_minutes": {
"type": "array",
"default": [5, 10, 15],
"title": "Task Retry Schedule Minutes",
"ui_order": 60,
"items": { "type": "integer", "minimum": 0 },
"description": "song_detect 任务级失败后的自动重试等待时间。"
}
},
"split": {
@ -375,9 +465,9 @@
"rate_limit_retry_schedule_minutes": {
"type": "array",
"default": [
15,
30,
60,
120
60
],
"title": "Rate Limit Retry Schedule Minutes",
"ui_order": 70,

View File

@ -27,6 +27,7 @@
"transcribe": {
"provider": "groq",
"groq_api_key": "",
"groq_api_keys": [],
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23
},

74
docker-compose.yml Normal file
View File

@ -0,0 +1,74 @@
services:
api:
build:
context: .
args:
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
command: ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
env_file:
- path: .env
required: false
environment:
TZ: ${TZ:-Asia/Shanghai}
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
ports:
- "${BILIUP_NEXT_PORT:-8000}:8000"
volumes:
- ./config:/app/config
- ./runtime:/app/runtime
- ./data/workspace:/app/data/workspace
- ./runtime/codex:/root/.codex
restart: unless-stopped
worker:
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
build:
context: .
args:
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
command: ["sh", "-c", "biliup-next worker --interval ${WORKER_INTERVAL:-5}"]
env_file:
- path: .env
required: false
environment:
TZ: ${TZ:-Asia/Shanghai}
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
volumes:
- ./config:/app/config
- ./runtime:/app/runtime
- ./data/workspace:/app/data/workspace
- ./runtime/codex:/root/.codex
restart: unless-stopped
depends_on:
- api

View File

@ -155,6 +155,60 @@ User edits config
- `base_delay_seconds`
- `poll_interval_seconds`
## Upload And Comment Templates
`paths.upload_config_file` 指向 `runtime/upload_config.json`。这个文件不只控制 `biliup upload` 的标题、简介、动态和标签,也控制 B 站置顶评论格式。
投稿字段在 `template` 中:
```json
{
"template": {
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版{current_full_video_link}\n上次直播{previous_full_video_link}",
"tag": "可爱,王海颖,唱歌,音乐",
"dynamic": "{streamer} {date} 歌曲纯享版已发布。\n直播完整版{current_full_video_link}"
}
}
```
评论字段在 `comment_template` 中:
```json
{
"comment_template": {
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享{previous_pure_video_link} (上一场歌曲纯享版)",
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版{current_pure_video_link} (只听歌曲看这里)\n上次完整版{previous_full_video_link} (上一场完整录播)",
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}"
}
}
```
可用变量:
- `streamer`:主播名。
- `date`:从文件名解析出来的日期和时间。
- `song_count`:识别到的歌曲数量。
- `songs_list``songs.txt` 原始歌单内容。
- `daily_quote` / `quote_author`:随机引用文本。
- `current_full_video_bvid` / `current_full_video_link`:本场直播完整版 BV 和链接。
- `current_pure_video_bvid` / `current_pure_video_link`:本场歌曲纯享版 BV 和链接。
- `previous_full_video_bvid` / `previous_full_video_link`:上一场直播完整版 BV 和链接。
- `previous_pure_video_bvid` / `previous_pure_video_link`:上一场歌曲纯享版 BV 和链接。
- `part_index`:评论中的 `P1/P2/P3` 分段序号。
- `song_index`:全局歌曲序号。
- `title` / `artist` / `artist_suffix`:从 `songs.json` 生成纯享歌单时使用。
- `song_text`:从 `songs.txt` 兜底生成纯享歌单时使用,通常不含时间戳。
- `line_text`:完整版时间轴的原始行,通常包含时间戳。
评论头部模板有一条额外规则:如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过,避免发出空链接提示。
Docker 部署时 `./runtime` 是宿主机挂载目录。镜像更新不会覆盖已有 `runtime/upload_config.json`,因此调整文案或评论格式时应修改宿主机上的这个文件,然后重启容器。
### collection
- `enabled`

View File

@ -75,7 +75,7 @@
"platform": "bilibili",
"aid": 123456,
"bvid": "BV1xxxx",
"title": "【王海颖 (歌曲纯享版)】_03月29日 22时02分 共18首歌",
"title": "【王海颖 (歌曲纯享版)】 03月29日 22时02分 共18首歌",
"published_at": "2026-03-30T07:56:13+08:00"
}
```

View File

@ -0,0 +1,321 @@
# 发布输出示例与流程说明
本文档面向使用者说明 `biliup-next` 的主流程、输入输出、当前已实现功能,以及一次多段同场直播发布后的示例文案。
## 项目功能
`biliup-next` 将一场直播录播拆成两个最终发布目标:
- 直播完整版:由外部流程或人工上传到 B 站,本项目负责记录/绑定它的 BV 号,并给它补充置顶时间轴评论、加入完整版合集。
- 歌曲纯享版:由本项目从直播录播中识别歌曲、切出歌曲片段、合并发布为一个分 P 视频,并给它补充置顶歌单评论、加入纯享版合集。
当前主链路:
```text
stage 输入视频
-> ingest 导入并归并 session
-> transcribe 语音转字幕
-> song_detect 识别歌曲
-> split 切出歌曲片段
-> publish 发布歌曲纯享版
-> comment 发布/置顶评论
-> collection 加入合集
```
## 输入
最常见输入是把录播视频放入 `data/workspace/stage/`
支持的形式:
- 单个视频文件:一场直播只有一个录播文件。
- 多个视频文件:同一场直播被分成多段录播文件。
- 浏览器上传:通过控制台上传到 stage。
- 本机复制:通过控制台把服务器上的文件复制到 stage。
输入文件名会用于推测主播和直播开始时间,例如:
```text
王海颖唱歌录播 04月19日 22时10分.mp4
王海颖唱歌录播 04月19日 23时05分.mp4
王海颖唱歌录播 04月20日 00时01分.mp4
```
## Session 归并
同一主播、时间接近的多个录播片段会归入同一个 session。
同一 session 的行为:
- 只发布一个歌曲纯享版 BV。
- 多段录播的歌曲会按时间顺序聚合。
- 评论按 `P1``P2``P3` 分段展示。
- 歌曲序号全局递增,不在每个 P 内重新从 1 开始。
示例:
```text
P1:
1. 程艾影 — 赵雷
2. 钟无艳 — 谢安琪
P2:
3. 慢慢喜欢你 — 莫文蔚
P3:
4. 空白格 — 蔡健雅
```
## BV 获取
### 歌曲纯享版 BV
歌曲纯享版由本项目调用 `biliup upload` 发布。
发布成功后,项目会从 `biliup` 输出中提取 BV 号,并写入当前 session 目录:
```text
bvid.txt
```
这个 BV 会用于:
- 纯享版评论发布。
- 完整版评论顶部反向链接。
- 纯享版合集同步。
### 直播完整版 BV
完整版 BV 可以来自三种方式:
- 控制台手动绑定。
- API/webhook 传入。
- `biliup list` 标题匹配。
`biliup list` 会同时接受 `开放浏览``审核中` 状态。完整版视频只要上传后生成了 BV即使仍在审核中也可以被写入纯享版简介、动态和评论互链。
成功解析后会写入:
```text
full_video_bvid.txt
```
默认标题匹配是保守的精确匹配:会先去掉空格、标点、括号、冒号等,只保留中文、英文、数字,再比较标题是否相等。
如果 `allow_fuzzy_full_video_match=false`,不会做包含式模糊匹配。为了避免误匹配,推荐在完整版上传完成后手动绑定 BV。
## 示例场景
假设本次直播由三段录播组成:
```text
王海颖唱歌录播 04月19日 22时10分
王海颖唱歌录播 04月19日 23时05分
王海颖唱歌录播 04月20日 00时01分
```
假设 BV 绑定结果如下:
```text
本次直播完整版BVFULLCURR
本次歌曲纯享版BVPURECURR
上次直播完整版BVFULLPREV
```
假设识别出的歌曲如下:
```text
P1:
00:06:32 程艾影 — 赵雷
00:14:45 钟无艳 — 谢安琪
P2:
00:20:57 慢慢喜欢你 — 莫文蔚
P3:
00:27:16 空白格 — 蔡健雅
```
## 歌曲纯享版标题
当前模板:
```text
【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌
```
示例:
```text
【王海颖 (歌曲纯享版)】 04月19日 22时10分 共4首歌
```
## 歌曲纯享版简介
当前模板会保持简介较短,完整歌单放到置顶评论中,避免 B 站简介截断。
示例:
```text
王海颖 04月19日 22时10分 歌曲纯享版。
完整歌单与时间轴见置顶评论。
直播完整版https://www.bilibili.com/video/BVFULLCURR
上次直播https://www.bilibili.com/video/BVFULLPREV
本视频为歌曲纯享切片,适合只听歌曲。
```
如果某个链接暂时没有 BV项目会自动移除对应的空链接行。
## 歌曲纯享版动态
示例:
```text
王海颖 04月19日 22时10分 歌曲纯享版已发布。完整歌单见置顶评论。
直播完整版https://www.bilibili.com/video/BVFULLCURR
上次直播https://www.bilibili.com/video/BVFULLPREV
```
## 歌曲纯享版置顶评论
纯享版评论主要给听歌用户看,不带歌曲时间轴,只展示歌名、歌手和互链。
默认由 `runtime/upload_config.json``comment_template.split_header``comment_template.split_part_header``comment_template.split_song_line` 生成。
示例:
```text
当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。
直播完整版https://www.bilibili.com/video/BVFULLCURR (完整录播,含聊天/互动/完整流程)
上次纯享https://www.bilibili.com/video/BVPUREPREV (上一场歌曲纯享版)
P1:
1. 程艾影 — 赵雷
2. 钟无艳 — 谢安琪
P2:
3. 慢慢喜欢你 — 莫文蔚
P3:
4. 空白格 — 蔡健雅
```
## 直播完整版置顶评论
完整版评论主要给看完整录播的用户跳转歌曲纯享版,并提供完整时间轴。
默认由 `runtime/upload_config.json``comment_template.full_header``comment_template.full_part_header``comment_template.full_timeline_line` 生成。
示例:
```text
当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。
歌曲纯享版https://www.bilibili.com/video/BVPURECURR (只听歌曲看这里)
上次完整版https://www.bilibili.com/video/BVFULLPREV (上一场完整录播)
P1:
1. 00:06:32 程艾影 — 赵雷
2. 00:14:45 钟无艳 — 谢安琪
P2:
3. 00:20:57 慢慢喜欢你 — 莫文蔚
P3:
4. 00:27:16 空白格 — 蔡健雅
```
## 评论格式配置
评论格式可以像标题、简介、动态一样通过 `runtime/upload_config.json` 修改:
```json
"comment_template": {
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享{previous_pure_video_link} (上一场歌曲纯享版)",
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版{current_pure_video_link} (只听歌曲看这里)\n上次完整版{previous_full_video_link} (上一场完整录播)",
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}"
}
```
字段含义:
- `split_header`:纯享版评论顶部说明。
- `full_header`:完整版评论顶部说明。
- `split_part_header` / `full_part_header`:多片段 session 的分段标题,例如 `P1:`
- `split_song_line`:从 `songs.json` 生成纯享歌单时的单行格式。
- `split_text_song_line``songs.json` 不可用时,从 `songs.txt` 兜底生成纯享歌单的单行格式。
- `full_timeline_line`:完整版时间轴评论的单行格式。
常用变量:
- `{current_full_video_link}`:本场直播完整版链接。
- `{current_pure_video_link}`:本场歌曲纯享版链接。
- `{previous_full_video_link}`:上一场直播完整版链接。
- `{previous_pure_video_link}`:上一场歌曲纯享版链接。
- `{part_index}`P 分段序号。
- `{song_index}`:歌曲全局序号。
- `{title}` / `{artist}` / `{artist_suffix}`:歌曲标题、歌手、带分隔符的歌手后缀。
- `{song_text}`:不带时间戳的歌曲文本。
- `{line_text}`:原始时间轴行,通常包含时间戳。
如果评论头部某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动省略。
## 合集同步
项目维护两个合集目标:
- 合集 A直播完整版。
- 合集 B歌曲纯享版。
当前配置中的示例 ID
```text
直播完整版合集7196643
歌曲纯享版合集7196624
```
合集同步完成后,如果启用了清理策略,项目可以删除本地原视频或切片视频以节省空间。当前默认不删除。
## 幂等与重试
项目会在 session 目录写入标记文件,避免重复上传和重复评论。
常见标记:
```text
bvid.txt
full_video_bvid.txt
upload_done.flag
comment_split_done.flag
comment_full_done.flag
collection_a_done.flag
collection_b_done.flag
```
发布阶段的关键行为:
- 首批最多上传 5 个分 P。
- 超过 5 个分 P 时,后续通过 append 追加。
- 已经写入 `bvid.txt` 后,重试会优先 append 到已有视频,而不是重新发布。
- `publish_progress.json` 记录 append 进度,避免重试时重复追加已完成批次。
评论阶段的关键行为:
- 同一 session 只由最早片段负责聚合评论。
- 非 anchor 片段进入评论步骤时会跳过实际发评。
- 这样可以避免同一场直播的多个片段重复发布相同评论。
## 使用建议
发布前建议确认:
- stage 中的视频文件名能解析出主播和时间。
- `runtime/upload_config.json` 中标题、简介、动态符合预期。
- 完整版上传完成后,尽量手动绑定 `full_video_bvid`
- worker 重启前确认已有 `bvid.txt``publish_progress.json` 是否符合当前发布进度。
- 如需自动匹配完整版 BV确认 `biliup list` 中完整视频标题与任务标题标准化后相等。

View File

@ -25,3 +25,11 @@ cd /home/theshy/biliup/biliup-next
- `upload_config.json` <- `upload_config.example.json`
它们只用于占位能保证项目进入可配置 doctor的状态但不代表上传链路已经可用
`upload_config.json` 同时控制
- 纯享版投稿标题简介动态标签`template`
- 纯享版和完整版置顶评论格式`comment_template`
- 文件名解析规则`filename_patterns`
Docker 部署时这个目录通常会作为 `./runtime:/app/runtime` 挂载到容器内镜像更新不会覆盖已有 `upload_config.json`所以修改评论动态简介格式时应直接改宿主机上的 `runtime/upload_config.json`

View File

@ -1,5 +1,95 @@
{
"line": "AUTO",
"limit": 3,
"threads": 3
"comment": "B站投稿配置文件 - 根据您的需要修改模板内容",
"upload_settings": {
"tid": 31,
"copyright": 1,
"source": "王海颖好听的歌声分享",
"cover": ""
},
"template": {
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版{current_full_video_link}\n上次直播{previous_full_video_link}\n\n本视频为歌曲纯享切片适合只听歌曲。",
"tag": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐",
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版{current_full_video_link}\n上次直播{previous_full_video_link}"
},
"comment_template": {
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享{previous_pure_video_link} (上一场歌曲纯享版)",
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版{current_pure_video_link} (只听歌曲看这里)\n上次完整版{previous_full_video_link} (上一场完整录播)",
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}"
},
"streamers": {
"王海颖": {
"display_name": "王海颖",
"tags": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐"
},
"示例主播": {
"display_name": "示例主播",
"tags": "示例,标签1,标签2,唱歌,音乐"
}
},
"quotes": [
{
"text": "此心安处是吾乡。",
"author": "苏轼《定风波·南海归赠王定国侍人寓娘》"
},
{
"text": "山重水复疑无路,柳暗花明又一村。",
"author": "陆游《游山西村》"
},
{
"text": "长风破浪会有时,直挂云帆济沧海。",
"author": "李白《行路难·其一》"
}
],
"filename_patterns": {
"comment": "从文件名提取信息的正则表达式模式 - 按优先级从高到低排列",
"patterns": [
{
"name": "主播名唱歌录播 日期 时间",
"regex": "^(?P<streamer>.+?)唱歌录播 (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分",
"date_format": "{month}月{day}日 {hour}时{minute}分",
"example": "王海颖唱歌录播 01月28日 22时06分"
},
{
"name": "日期 时间 主播名 唱歌录播",
"regex": "^(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 (?P<streamer>.+?)唱歌录播",
"date_format": "{month}月{day}日 {hour}时{minute}分",
"example": "01月25日 09时20分 王海颖唱歌录播"
},
{
"name": "主播名唱歌录播: 年月日 时分 [BV号]",
"regex": "^(?P<streamer>.+?)唱歌录播[:] (?P<year>\\d{4})年(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
"date_format": "{month}月{day}日 {hour}时{minute}分",
"example": "王海颖唱歌录播: 2026年01月22日 22时09分 [BV1wEzcBqEhW]"
},
{
"name": "主播名 日期 时分 [BV号]",
"regex": "^(?P<streamer>.+?) (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})点(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
"date_format": "{month}月{day}日 {hour}点{minute}分",
"example": "王海颖 01月25日 02点24分 [BV1KCzQBpEXC]"
},
{
"name": "主播名_日期",
"regex": "^(?P<streamer>.+?)_(?P<date>\\d{1,2}月\\d{1,2}日)",
"date_format": "{date}",
"example": "王海颖_1月20日"
},
{
"name": "主播名_完整日期",
"regex": "^(?P<streamer>.+?)_(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})",
"date_format": "{month}月{day}日",
"example": "王海颖_2026-01-20"
},
{
"name": "主播名_描述",
"regex": "^(?P<streamer>.+?)_(?P<desc>.+)",
"date_format": "{desc}",
"example": "测试搬运_前15分钟"
}
]
}
}

View File

@ -0,0 +1,28 @@
#!/usr/bin/env sh
set -eu
mkdir -p config runtime/codex data/workspace/stage data/workspace/session data/workspace/backup
if [ ! -f .env ]; then
cp .env.example .env
echo "created .env from .env.example"
fi
if [ ! -f config/settings.json ]; then
cp config/settings.docker.example.json config/settings.json
echo "created config/settings.json from config/settings.docker.example.json"
fi
if [ ! -f runtime/cookies.json ]; then
cp runtime/cookies.example.json runtime/cookies.json
echo "created runtime/cookies.json placeholder"
fi
if [ ! -f runtime/upload_config.json ]; then
cp runtime/upload_config.example.json runtime/upload_config.json
echo "created runtime/upload_config.json placeholder"
fi
if [ ! -x runtime/biliup ]; then
echo "warning: runtime/biliup is missing or not executable; publish will fail until you provide it" >&2
fi

View File

@ -4,3 +4,4 @@ Version: 0.1.0
Summary: Next-generation control-plane-first biliup pipeline
Requires-Python: >=3.11
Requires-Dist: requests>=2.32.0
Requires-Dist: groq>=0.18.0

View File

@ -10,7 +10,19 @@ src/biliup_next.egg-info/top_level.txt
src/biliup_next/app/api_server.py
src/biliup_next/app/bootstrap.py
src/biliup_next/app/cli.py
src/biliup_next/app/control_plane_get_dispatcher.py
src/biliup_next/app/control_plane_post_dispatcher.py
src/biliup_next/app/dashboard.py
src/biliup_next/app/retry_meta.py
src/biliup_next/app/scheduler.py
src/biliup_next/app/serializers.py
src/biliup_next/app/session_delivery_service.py
src/biliup_next/app/task_actions.py
src/biliup_next/app/task_audit.py
src/biliup_next/app/task_control_service.py
src/biliup_next/app/task_engine.py
src/biliup_next/app/task_policies.py
src/biliup_next/app/task_runner.py
src/biliup_next/app/worker.py
src/biliup_next/core/config.py
src/biliup_next/core/errors.py
@ -18,25 +30,56 @@ src/biliup_next/core/models.py
src/biliup_next/core/providers.py
src/biliup_next/core/registry.py
src/biliup_next/infra/db.py
src/biliup_next/infra/legacy_asset_sync.py
src/biliup_next/infra/log_reader.py
src/biliup_next/infra/plugin_loader.py
src/biliup_next/infra/runtime_doctor.py
src/biliup_next/infra/stage_importer.py
src/biliup_next/infra/storage_guard.py
src/biliup_next/infra/systemd_runtime.py
src/biliup_next/infra/task_repository.py
src/biliup_next/infra/task_reset.py
src/biliup_next/infra/workspace_cleanup.py
src/biliup_next/infra/workspace_paths.py
src/biliup_next/infra/adapters/bilibili_api.py
src/biliup_next/infra/adapters/biliup_cli.py
src/biliup_next/infra/adapters/codex_cli.py
src/biliup_next/infra/adapters/full_video_locator.py
src/biliup_next/infra/adapters/qwen_cli.py
src/biliup_next/infra/adapters/yt_dlp.py
src/biliup_next/modules/collection/service.py
src/biliup_next/modules/collection/providers/bilibili_collection.py
src/biliup_next/modules/comment/service.py
src/biliup_next/modules/comment/providers/bilibili_top_comment.py
src/biliup_next/modules/ingest/service.py
src/biliup_next/modules/ingest/providers/bilibili_url.py
src/biliup_next/modules/ingest/providers/local_file.py
src/biliup_next/modules/publish/service.py
src/biliup_next/modules/publish/providers/biliup_cli.py
src/biliup_next/modules/song_detect/service.py
src/biliup_next/modules/song_detect/providers/codex.py
src/biliup_next/modules/song_detect/providers/common.py
src/biliup_next/modules/song_detect/providers/qwen_cli.py
src/biliup_next/modules/split/service.py
src/biliup_next/modules/split/providers/ffmpeg_copy.py
src/biliup_next/modules/transcribe/service.py
src/biliup_next/modules/transcribe/providers/groq.py
tests/test_api_server.py
tests/test_bilibili_top_comment_provider.py
tests/test_biliup_cli_publish_provider.py
tests/test_control_plane_get_dispatcher.py
tests/test_control_plane_post_dispatcher.py
tests/test_ingest_bilibili_url.py
tests/test_ingest_session_grouping.py
tests/test_publish_service.py
tests/test_retry_meta.py
tests/test_serializers.py
tests/test_session_delivery_service.py
tests/test_settings_service.py
tests/test_song_detect_providers.py
tests/test_task_actions.py
tests/test_task_control_service.py
tests/test_task_engine.py
tests/test_task_policies.py
tests/test_task_repository_sqlite.py
tests/test_task_runner.py

View File

@ -1 +1,2 @@
requests>=2.32.0
groq>=0.18.0

View File

@ -3,6 +3,8 @@ from __future__ import annotations
from datetime import datetime, timedelta, timezone
STEP_SETTINGS_GROUP = {
"transcribe": "transcribe",
"song_detect": "song_detect",
"publish": "publish",
"comment": "comment",
}
@ -54,6 +56,26 @@ def publish_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
)
def transcribe_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
return retry_schedule_seconds(
settings,
count_key="retry_count",
backoff_key="retry_backoff_seconds",
default_count=3,
default_backoff=300,
)
def song_detect_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
return retry_schedule_seconds(
settings,
count_key="retry_count",
backoff_key="retry_backoff_seconds",
default_count=3,
default_backoff=300,
)
def comment_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
return retry_schedule_seconds(
settings,
@ -77,7 +99,11 @@ def retry_meta_for_step(step, settings_by_group: dict[str, object]) -> dict[str,
if not isinstance(group_settings, dict):
group_settings = {}
if step_name == "publish":
if step_name == "transcribe":
schedule = transcribe_retry_schedule_seconds(group_settings)
elif step_name == "song_detect":
schedule = song_detect_retry_schedule_seconds(group_settings)
elif step_name == "publish":
schedule = publish_retry_schedule_seconds(group_settings)
elif step_name == "comment":
schedule = comment_retry_schedule_seconds(group_settings)

View File

@ -52,7 +52,16 @@ def infer_error_step_name(task, steps: dict[str, object]) -> str: # type: ignor
def retry_wait_payload(task_id: str, step, state: dict[str, object]) -> dict[str, object] | None: # type: ignore[no-untyped-def]
if step.status != "failed_retryable":
return None
meta = retry_meta_for_step(step, {"publish": settings_for(state, "publish")})
step_settings_group = {
"transcribe": "transcribe",
"song_detect": "song_detect",
"publish": "publish",
"comment": "comment",
}.get(step.step_name)
settings_by_group = {}
if step_settings_group is not None and step_settings_group in state["settings"]:
settings_by_group[step_settings_group] = settings_for(state, step_settings_group)
meta = retry_meta_for_step(step, settings_by_group)
if meta is None or meta["retry_due"]:
return None
return {

View File

@ -2,6 +2,8 @@ from __future__ import annotations
from biliup_next.app.retry_meta import comment_retry_schedule_seconds
from biliup_next.app.retry_meta import publish_retry_schedule_seconds
from biliup_next.app.retry_meta import song_detect_retry_schedule_seconds
from biliup_next.app.retry_meta import transcribe_retry_schedule_seconds
from biliup_next.app.task_engine import infer_error_step_name, settings_for as task_engine_settings_for
from biliup_next.core.models import utc_now_iso
@ -35,6 +37,18 @@ def resolve_failure(task, repo, state: dict[str, object], exc) -> dict[str, obje
next_retry_count = current_retry + 1
next_status = "failed_retryable" if exc.retryable else "failed_manual"
next_retry_delay_seconds: int | None = None
if exc.retryable and step_name == "transcribe":
schedule = transcribe_retry_schedule_seconds(settings_for(state, "transcribe"))
if next_retry_count > len(schedule):
next_status = "failed_manual"
else:
next_retry_delay_seconds = schedule[next_retry_count - 1]
if exc.retryable and step_name == "song_detect":
schedule = song_detect_retry_schedule_seconds(settings_for(state, "song_detect"))
if next_retry_count > len(schedule):
next_status = "failed_manual"
else:
next_retry_delay_seconds = schedule[next_retry_count - 1]
if exc.retryable and step_name == "publish":
publish_settings = settings_for(state, "publish")
if exc.code == "PUBLISH_RATE_LIMITED":

View File

@ -1,6 +1,7 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any
@ -32,6 +33,7 @@ class SettingsService:
schema = self._read_json(self.schema_path)
settings = self._read_json(self.settings_path)
settings = self._apply_schema_defaults(settings, schema)
settings = self._apply_env_overrides(settings, schema)
settings = self._normalize_paths(settings)
self.validate(settings, schema)
return SettingsBundle(schema=schema, settings=settings)
@ -125,6 +127,57 @@ class SettingsService:
group_value[field_name] = self._clone_default(field_schema["default"])
return merged
def _apply_env_overrides(self, settings: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
merged = json.loads(json.dumps(settings))
aliases = {
("transcribe", "groq_api_key"): ["GROQ_API_KEY"],
("transcribe", "groq_api_keys"): ["GROQ_API_KEYS"],
("collection", "season_id_a"): ["COLLECTION_SEASON_ID_A"],
("collection", "season_id_b"): ["COLLECTION_SEASON_ID_B"],
}
for group_name, fields in schema.get("groups", {}).items():
group_value = merged.setdefault(group_name, {})
if not isinstance(group_value, dict):
continue
for field_name, field_schema in fields.items():
env_names = [
f"BILIUP_NEXT__{group_name}__{field_name}".upper(),
f"BILIUP_NEXT_{group_name}_{field_name}".upper(),
*aliases.get((group_name, field_name), []),
]
raw_value = self._first_env_value(env_names)
if raw_value is None:
continue
group_value[field_name] = self._parse_env_value(raw_value, field_schema)
return merged
@staticmethod
def _first_env_value(names: list[str]) -> str | None:
for name in names:
value = os.environ.get(name)
if value:
return value
return None
@staticmethod
def _parse_env_value(value: str, field_schema: dict[str, Any]) -> Any:
expected = field_schema.get("type")
if expected == "integer":
return int(value)
if expected == "boolean":
normalized = value.strip().lower()
if normalized in {"1", "true", "yes", "on"}:
return True
if normalized in {"0", "false", "no", "off"}:
return False
raise ConfigError(f"无法解析布尔环境变量值: {value}")
if expected == "array":
stripped = value.strip()
if stripped.startswith("["):
return json.loads(stripped)
return [item.strip() for item in value.split(",") if item.strip()]
return value
@staticmethod
def _clone_default(value: Any) -> Any:
return json.loads(json.dumps(value))

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import os
import subprocess
from pathlib import Path
@ -18,9 +19,7 @@ class CodexCliAdapter:
codex_cmd,
"exec",
prompt.replace("\n", " "),
"--full-auto",
"--sandbox",
"workspace-write",
"--dangerously-bypass-approvals-and-sandbox",
"--output-schema",
"./song_schema.json",
"-o",
@ -35,6 +34,7 @@ class CodexCliAdapter:
capture_output=True,
text=True,
check=False,
env=self._subprocess_env(),
)
except FileNotFoundError as exc:
raise ModuleError(
@ -42,3 +42,12 @@ class CodexCliAdapter:
message=f"找不到 codex 命令: {codex_cmd}",
retryable=False,
) from exc
@staticmethod
def _subprocess_env() -> dict[str, str]:
env = os.environ.copy()
for key in ("HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"):
value = env.get(key)
if value and "://" not in value:
env[key] = f"http://{value}"
return env

View File

@ -8,6 +8,9 @@ from typing import Any
from biliup_next.core.errors import ModuleError
VISIBLE_BILIUP_LIST_STATES = {"开放浏览", "审核中"}
def normalize_title(text: str) -> str:
return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()
@ -38,7 +41,7 @@ def fetch_biliup_list(settings: dict[str, Any], *, max_pages: int = 5) -> list[d
if not line.startswith("BV"):
continue
parts = line.split("\t")
if len(parts) >= 3 and "开放浏览" not in parts[2]:
if len(parts) >= 3 and not any(state in parts[2] for state in VISIBLE_BILIUP_LIST_STATES):
continue
if len(parts) >= 2:
videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})

View File

@ -115,7 +115,6 @@ class TaskResetService:
work_dir / "comment_full_done.flag",
work_dir / "collection_a_done.flag",
work_dir / "collection_b_done.flag",
work_dir / "bvid.txt",
],
"comment": [
work_dir / "comment_done.flag",

View File

@ -0,0 +1,199 @@
from __future__ import annotations
from datetime import datetime
from pathlib import Path
import re
from typing import Any
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list, resolve_full_video_bvid
from biliup_next.infra.workspace_paths import resolve_task_work_dir
def bilibili_video_url(bvid: str | None) -> str:
bvid = (bvid or "").strip()
return f"https://www.bilibili.com/video/{bvid}" if bvid.startswith("BV") else ""
def read_task_split_bvid(task: Any) -> str:
path = resolve_task_work_dir(task) / "bvid.txt"
return _read_bvid(path)
def read_task_full_bvid(task: Any, context: Any | None = None) -> str:
if context is not None and getattr(context, "full_video_bvid", None):
return str(context.full_video_bvid).strip()
path = resolve_task_work_dir(task) / "full_video_bvid.txt"
return _read_bvid(path)
def link_context_for_task(task: Any, repo: Any | None, settings: dict[str, Any] | None = None) -> dict[str, str]:
context = _get_context(repo, task.id)
full_bvid = read_task_full_bvid(task, context)
if not full_bvid:
full_bvid = resolve_current_full_video_bvid(task, settings)
split_bvid = read_task_split_bvid(task)
previous = previous_live_links(task, repo, context, settings)
return {
"current_full_video_bvid": full_bvid,
"current_full_video_link": bilibili_video_url(full_bvid),
"current_pure_video_bvid": split_bvid,
"current_pure_video_link": bilibili_video_url(split_bvid),
"previous_full_video_bvid": previous.get("previous_full_video_bvid", ""),
"previous_full_video_link": previous.get("previous_full_video_link", ""),
"previous_pure_video_bvid": previous.get("previous_pure_video_bvid", ""),
"previous_pure_video_link": previous.get("previous_pure_video_link", ""),
}
def resolve_current_full_video_bvid(task: Any, settings: dict[str, Any] | None = None) -> str:
if not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
return ""
try:
return resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings) or ""
except Exception:
return ""
def previous_live_links(
task: Any,
repo: Any | None,
context: Any | None = None,
settings: dict[str, Any] | None = None,
) -> dict[str, str]:
context = context or _get_context(repo, task.id)
streamer = _context_streamer(context) or _parse_streamer_from_title(task.title)
if not streamer:
return {}
current_started = _parse_datetime(getattr(context, "segment_started_at", None)) if context is not None else None
if current_started is None:
current_started = _parse_title_datetime(task.title)
current_session_key = getattr(context, "session_key", None) if context is not None else None
previous: dict[str, str] = {}
if repo is not None and hasattr(repo, "find_recent_task_contexts") and hasattr(repo, "get_task"):
for candidate in repo.find_recent_task_contexts(streamer, limit=50):
if candidate.task_id == task.id:
continue
if current_session_key and getattr(candidate, "session_key", None) == current_session_key:
continue
candidate_started = _parse_datetime(getattr(candidate, "segment_started_at", None))
if current_started is not None and candidate_started is not None and candidate_started >= current_started:
continue
candidate_task = repo.get_task(candidate.task_id)
if candidate_task is None:
continue
full_bvid = read_task_full_bvid(candidate_task, candidate)
split_bvid = read_task_split_bvid(candidate_task)
if full_bvid or split_bvid:
previous = {
"previous_full_video_bvid": full_bvid,
"previous_full_video_link": bilibili_video_url(full_bvid),
"previous_pure_video_bvid": split_bvid,
"previous_pure_video_link": bilibili_video_url(split_bvid),
}
break
if not previous.get("previous_full_video_bvid") or not previous.get("previous_pure_video_bvid"):
listed_previous = _previous_live_from_biliup_list(streamer, current_started, settings)
for key, value in listed_previous.items():
if value and not previous.get(key):
previous[key] = value
return previous
def _get_context(repo: Any | None, task_id: str) -> Any | None:
if repo is None or not hasattr(repo, "get_task_context"):
return None
return repo.get_task_context(task_id)
def _context_streamer(context: Any | None) -> str:
if context is None:
return ""
return str(getattr(context, "streamer", "") or "").strip()
def _read_bvid(path: Path) -> str:
if not path.exists():
return ""
bvid = path.read_text(encoding="utf-8").strip()
return bvid if bvid.startswith("BV") else ""
def _parse_datetime(value: str | None) -> datetime | None:
if not value:
return None
try:
return datetime.fromisoformat(value)
except ValueError:
return None
def _parse_title_datetime(title: str) -> datetime | None:
patterns = (
r"(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
)
for pattern in patterns:
match = re.search(pattern, title)
if not match:
continue
data = match.groupdict()
year = int(data.get("year") or datetime.now().year)
try:
return datetime(year, int(data["month"]), int(data["day"]), int(data["hour"]), int(data["minute"]))
except ValueError:
return None
return None
def _parse_streamer_from_title(title: str) -> str:
marker = "唱歌录播"
if marker in title:
return title.split(marker, 1)[0].strip()
return ""
def _previous_live_from_biliup_list(
streamer: str,
current_started: datetime | None,
settings: dict[str, Any] | None,
) -> dict[str, str]:
if current_started is None or not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
return {}
try:
videos = fetch_biliup_list(settings)
except Exception:
return {}
current_compare = current_started.replace(tzinfo=None)
full_candidates: list[tuple[datetime, str]] = []
pure_candidates: list[tuple[datetime, str]] = []
for video in videos:
title = video.get("title", "")
bvid = video.get("bvid", "")
if not bvid.startswith("BV"):
continue
if streamer not in title:
continue
started = _parse_title_datetime(title)
if started is not None and started > current_compare and "" not in title:
started = started.replace(year=started.year - 1)
if started is None or started >= current_compare:
continue
if "纯享" in title:
pure_candidates.append((started, bvid))
elif "唱歌录播" in title:
full_candidates.append((started, bvid))
if not full_candidates and not pure_candidates:
return {}
full_bvid = max(full_candidates, key=lambda item: item[0])[1] if full_candidates else ""
pure_bvid = max(pure_candidates, key=lambda item: item[0])[1] if pure_candidates else ""
return {
"previous_full_video_bvid": full_bvid,
"previous_full_video_link": bilibili_video_url(full_bvid),
"previous_pure_video_bvid": pure_bvid,
"previous_pure_video_link": bilibili_video_url(pure_bvid),
}

View File

@ -1,6 +1,8 @@
from __future__ import annotations
import shutil
from pathlib import Path
from typing import Any
from biliup_next.infra.task_repository import TaskRepository
from biliup_next.infra.workspace_paths import resolve_task_work_dir
@ -11,35 +13,59 @@ class WorkspaceCleanupService:
self.repo = repo
def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
task = self.repo.get_task(task_id)
if task is None:
cleanup_tasks = self._cleanup_tasks(task_id)
if not cleanup_tasks:
raise RuntimeError(f"task not found: {task_id}")
session_dir = resolve_task_work_dir(task)
removed: list[str] = []
skipped: list[str] = []
cleaned_task_ids: list[str] = []
if settings.get("delete_source_video_after_collection_synced", False):
source_path = Path(task.source_path).resolve()
try:
source_path.relative_to(session_dir)
source_managed = True
except ValueError:
source_managed = False
if source_path.exists() and source_managed:
source_path.unlink()
self.repo.delete_artifact_by_path(task_id, str(source_path.resolve()))
removed.append(str(source_path))
else:
skipped.append(str(source_path))
for task in cleanup_tasks:
session_dir = resolve_task_work_dir(task)
cleaned_task_ids.append(task.id)
if settings.get("delete_split_videos_after_collection_synced", False):
split_dir = session_dir / "split_video"
if split_dir.exists():
shutil.rmtree(split_dir, ignore_errors=True)
self.repo.delete_artifacts(task_id, "clip_video")
removed.append(str(split_dir))
else:
skipped.append(str(split_dir))
if settings.get("delete_source_video_after_collection_synced", False):
source_path = Path(task.source_path).resolve()
try:
source_path.relative_to(session_dir)
source_managed = True
except ValueError:
source_managed = False
if source_path.exists() and source_managed:
source_path.unlink()
self.repo.delete_artifact_by_path(task.id, str(source_path.resolve()))
removed.append(str(source_path))
else:
skipped.append(str(source_path))
return {"removed": removed, "skipped": skipped}
if settings.get("delete_split_videos_after_collection_synced", False):
for video_dir_name in ("split_video", "publish_video"):
video_dir = session_dir / video_dir_name
if video_dir.exists():
shutil.rmtree(video_dir, ignore_errors=True)
removed.append(str(video_dir))
else:
skipped.append(str(video_dir))
self.repo.delete_artifacts(task.id, "clip_video")
return {"removed": removed, "skipped": skipped, "task_ids": cleaned_task_ids}
def _cleanup_tasks(self, task_id: str) -> list[Any]:
task = self.repo.get_task(task_id)
if task is None:
return []
if not hasattr(self.repo, "get_task_context") or not hasattr(self.repo, "list_task_contexts_by_session_key"):
return [task]
context = self.repo.get_task_context(task_id)
if context is None or not context.session_key or context.session_key.startswith("task:"):
return [task]
tasks = []
for session_context in self.repo.list_task_contexts_by_session_key(context.session_key):
session_task = self.repo.get_task(session_context.task_id)
if session_task is not None:
tasks.append(session_task)
return tasks or [task]

View File

@ -31,4 +31,5 @@ class CollectionService:
self.repo.update_task_status(task_id, "collection_synced", finished_at)
cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
return {**result, "cleanup": cleanup_result}
self.repo.update_task_status(task_id, "commented", finished_at)
return result

View File

@ -11,9 +11,34 @@ from biliup_next.core.models import Task
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
from biliup_next.infra.video_links import bilibili_video_url, link_context_for_task
from biliup_next.infra.workspace_paths import resolve_task_work_dir
DEFAULT_COMMENT_TEMPLATE = {
"split_header": (
"当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n"
"直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n"
"上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)"
),
"full_header": (
"当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n"
"歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n"
"上次完整版:{previous_full_video_link} (上一场完整录播)"
),
"split_part_header": "P{part_index}:",
"full_part_header": "P{part_index}:",
"split_song_line": "{song_index}. {title}{artist_suffix}",
"split_text_song_line": "{song_index}. {song_text}",
"full_timeline_line": "{song_index}. {line_text}",
}
class _SafeFormatDict(dict):
def __missing__(self, key: str) -> str:
return ""
class BilibiliTopCommentProvider:
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
@ -41,7 +66,8 @@ class BilibiliTopCommentProvider:
)
timeline_content = songs_path.read_text(encoding="utf-8").strip()
split_content, split_reason = self._build_split_comment(task, settings)
comment_template = self._load_comment_template(settings)
split_content, split_reason = self._build_split_comment(task, settings, comment_template)
if not timeline_content and not split_content:
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
return {"status": "skipped", "reason": "comment_content_empty"}
@ -78,7 +104,7 @@ class BilibiliTopCommentProvider:
if settings.get("post_full_video_timeline_comment", True) and not full_done:
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
full_content, full_reason = self._build_full_comment_content(task, settings)
full_content, full_reason = self._build_full_comment_content(task, settings, comment_template)
if full_reason is not None:
full_result = {"status": "skipped", "reason": full_reason}
elif full_bvid and full_content:
@ -135,44 +161,116 @@ class BilibiliTopCommentProvider:
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
@staticmethod
def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
def _build_split_comment_content(
songs_json_path: Path,
songs_txt_path: Path,
*,
start_index: int = 1,
comment_template: dict[str, str] | None = None,
) -> tuple[str, int]:
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
next_index = start_index
if songs_json_path.exists():
try:
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
lines = []
for index, song in enumerate(data.get("songs", []), 1):
for song in data.get("songs", []):
title = str(song.get("title", "")).strip()
artist = str(song.get("artist", "")).strip()
if not title:
continue
suffix = f"{artist}" if artist else ""
lines.append(f"{index}. {title}{suffix}")
lines.append(
BilibiliTopCommentProvider._format_template(
comment_template.get("split_song_line", DEFAULT_COMMENT_TEMPLATE["split_song_line"]),
{
"song_index": str(next_index),
"title": title,
"artist": artist,
"artist_suffix": suffix,
},
)
)
next_index += 1
if lines:
return "\n".join(lines)
return "\n".join(lines), next_index
except json.JSONDecodeError:
pass
if songs_txt_path.exists():
lines = []
for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
text = raw.strip()
if not text:
continue
parts = text.split(" ", 1)
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
lines.append(f"{index}. {song_text}")
return "\n".join(lines)
return ""
lines.append(
BilibiliTopCommentProvider._format_template(
comment_template.get("split_text_song_line", DEFAULT_COMMENT_TEMPLATE["split_text_song_line"]),
{
"song_index": str(next_index),
"song_text": song_text,
"line_text": text,
},
)
)
next_index += 1
return "\n".join(lines), next_index
return "", next_index
def _build_split_comment(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
@staticmethod
def _build_full_timeline_content(
songs_txt_path: Path,
*,
start_index: int = 1,
comment_template: dict[str, str] | None = None,
) -> tuple[str, int]:
if not songs_txt_path.exists():
return "", start_index
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
next_index = start_index
lines = []
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
text = raw.strip()
if not text:
continue
lines.append(
BilibiliTopCommentProvider._format_template(
comment_template.get("full_timeline_line", DEFAULT_COMMENT_TEMPLATE["full_timeline_line"]),
{
"song_index": str(next_index),
"line_text": text,
},
)
)
next_index += 1
return "\n".join(lines), next_index
def _build_split_comment(
self,
task: Task,
settings: dict[str, Any],
comment_template: dict[str, str],
) -> tuple[str, str | None]:
repo = settings.get("__repo")
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
session_dir = resolve_task_work_dir(task)
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
content, _ = self._build_split_comment_content(
session_dir / "songs.json",
session_dir / "songs.txt",
comment_template=comment_template,
)
return self._with_split_footer(content, task, settings, comment_template), None
context = repo.get_task_context(task.id)
if context is None or not context.session_key or context.session_key.startswith("task:"):
session_dir = resolve_task_work_dir(task)
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
content, _ = self._build_split_comment_content(
session_dir / "songs.json",
session_dir / "songs.txt",
comment_template=comment_template,
)
return self._with_split_footer(content, task, settings, comment_template), None
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
if not ordered_contexts:
@ -182,31 +280,42 @@ class BilibiliTopCommentProvider:
return "", "session_split_comment_owned_by_anchor"
blocks: list[str] = []
next_song_index = 1
for index, session_context in enumerate(ordered_contexts, start=1):
session_task = repo.get_task(session_context.task_id)
if session_task is None:
continue
task_dir = resolve_task_work_dir(session_task)
content = self._build_split_comment_content(task_dir / "songs.json", task_dir / "songs.txt")
content, next_song_index = self._build_split_comment_content(
task_dir / "songs.json",
task_dir / "songs.txt",
start_index=next_song_index,
comment_template=comment_template,
)
if not content:
continue
blocks.append(f"P{index}:\n{content}")
blocks.append(f"{self._part_header(comment_template, 'split_part_header', index)}\n{content}")
if not blocks:
return "", "split_comment_empty"
return "\n\n".join(blocks), None
return self._with_split_footer("\n\n".join(blocks), task, settings, comment_template), None
def _build_full_comment_content(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
def _build_full_comment_content(
self,
task: Task,
settings: dict[str, Any],
comment_template: dict[str, str],
) -> tuple[str, str | None]:
repo = settings.get("__repo")
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
session_dir = resolve_task_work_dir(task)
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
return content, None if content else "timeline_comment_empty"
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
context = repo.get_task_context(task.id)
if context is None or not context.session_key or context.session_key.startswith("task:"):
session_dir = resolve_task_work_dir(task)
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
return content, None if content else "timeline_comment_empty"
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
if not ordered_contexts:
@ -216,21 +325,109 @@ class BilibiliTopCommentProvider:
return "", "session_full_comment_owned_by_anchor"
blocks: list[str] = []
next_song_index = 1
for index, session_context in enumerate(ordered_contexts, start=1):
session_task = repo.get_task(session_context.task_id)
if session_task is None:
continue
task_dir = resolve_task_work_dir(session_task)
songs_path = task_dir / "songs.txt"
if not songs_path.exists():
continue
content = songs_path.read_text(encoding="utf-8").strip()
content, next_song_index = self._build_full_timeline_content(
songs_path,
start_index=next_song_index,
comment_template=comment_template,
)
if not content:
continue
blocks.append(f"P{index}:\n{content}")
blocks.append(f"{self._part_header(comment_template, 'full_part_header', index)}\n{content}")
if not blocks:
return "", "timeline_comment_empty"
return "\n\n".join(blocks), None
return self._with_full_footer("\n\n".join(blocks), task, settings, comment_template), None
def _with_split_footer(
self,
content: str,
task: Task,
settings: dict[str, Any],
comment_template: dict[str, str],
) -> str:
links = link_context_for_task(task, settings.get("__repo"), settings)
current_full_link = links.get("current_full_video_link", "")
if not current_full_link and settings.get("biliup_path") and settings.get("cookie_file"):
full_bvid = resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings)
current_full_link = bilibili_video_url(full_bvid)
header_vars = dict(links)
header_vars["current_full_video_link"] = current_full_link
header = self._format_header_template(
comment_template.get("split_header", DEFAULT_COMMENT_TEMPLATE["split_header"]),
header_vars,
)
return self._prepend_header(content, header)
def _with_full_footer(
self,
content: str,
task: Task,
settings: dict[str, Any],
comment_template: dict[str, str],
) -> str:
links = link_context_for_task(task, settings.get("__repo"), settings)
header = self._format_header_template(
comment_template.get("full_header", DEFAULT_COMMENT_TEMPLATE["full_header"]),
links,
)
return self._prepend_header(content, header)
@staticmethod
def _prepend_header(content: str, header: str) -> str:
content = content.strip()
lines = [line.rstrip() for line in header.splitlines() if line.strip()]
if not content:
return "\n".join(lines)
if not lines:
return content
return "\n".join(lines) + f"\n\n{content}"
@staticmethod
def _part_header(comment_template: dict[str, str], key: str, part_index: int) -> str:
return BilibiliTopCommentProvider._format_template(
comment_template.get(key, DEFAULT_COMMENT_TEMPLATE[key]),
{"part_index": str(part_index)},
)
@staticmethod
def _format_template(template: str, values: dict[str, str]) -> str:
return template.format_map(_SafeFormatDict(values)).strip()
@staticmethod
def _format_header_template(template: str, values: dict[str, str]) -> str:
lines = []
for raw_line in template.splitlines():
if any(f"{{{key}}}" in raw_line and not value for key, value in values.items()):
continue
lines.append(BilibiliTopCommentProvider._format_template(raw_line, values))
return "\n".join(line for line in lines if line.strip()).strip()
@staticmethod
def _load_comment_template(settings: dict[str, Any]) -> dict[str, str]:
merged = dict(DEFAULT_COMMENT_TEMPLATE)
path_value = settings.get("upload_config_file")
if not path_value:
return merged
path = Path(str(path_value))
if not path.exists():
return merged
try:
config = json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return merged
template = config.get("comment_template", {})
if not isinstance(template, dict):
return merged
for key, value in template.items():
if key in merged and isinstance(value, str):
merged[key] = value
return merged
def _ordered_session_contexts(self, repo, session_key: str) -> list[object]: # type: ignore[no-untyped-def]
contexts = list(repo.list_task_contexts_by_session_key(session_key))

View File

@ -217,7 +217,7 @@ class IngestService:
"room_id": sidecar_meta["payload"].get("room_id"),
"session_key": sidecar_meta["payload"].get("session_key"),
"full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or source_path.stat().st_mtime,
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or target_source.stat().st_mtime,
}
task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
accepted.append(

View File

@ -3,6 +3,7 @@ from __future__ import annotations
import json
import random
import re
import shutil
import time
from pathlib import Path
from typing import Any
@ -11,9 +12,13 @@ from biliup_next.core.errors import ModuleError
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
from biliup_next.core.providers import ProviderManifest
from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
from biliup_next.infra.video_links import link_context_for_task
from biliup_next.infra.workspace_paths import resolve_task_work_dir
DESC_MAX_CHARS = 1900
class BiliupCliPublishProvider:
def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
self.adapter = adapter or BiliupCliAdapter()
@ -36,7 +41,7 @@ class BiliupCliPublishProvider:
publish_progress = work_dir / "publish_progress.json"
config = self._load_upload_config(Path(str(settings["upload_config_file"])))
video_files = [artifact.path for artifact in clip_videos]
video_files = self._prepare_publish_video_files(work_dir, [artifact.path for artifact in clip_videos])
if not video_files:
raise ModuleError(
code="PUBLISH_NO_CLIPS",
@ -64,10 +69,13 @@ class BiliupCliPublishProvider:
"daily_quote": quote.get("text", ""),
"quote_author": quote.get("author", ""),
}
template_vars.update(link_context_for_task(task, settings.get("__repo"), settings))
template = config.get("template", {})
title = template.get("title", "{streamer}_{date}").format(**template_vars)
description = template.get("description", "{songs_list}").format(**template_vars)
dynamic = template.get("dynamic", "").format(**template_vars)
description = self._fit_bilibili_desc(
self._drop_empty_link_lines(template.get("description", "{songs_list}").format(**template_vars))
)
dynamic = self._drop_empty_link_lines(template.get("dynamic", "").format(**template_vars))
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
streamer_cfg = config.get("streamers", {})
if streamer in streamer_cfg:
@ -90,8 +98,12 @@ class BiliupCliPublishProvider:
first_batch = video_files[:5]
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
progress = self._load_publish_progress(publish_progress)
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
progress_bvid = str(progress.get("bvid", "")).strip()
if not existing_bvid.startswith("BV") and progress_bvid.startswith("BV"):
existing_bvid = progress_bvid
bvid_file.write_text(existing_bvid, encoding="utf-8")
if upload_done.exists() and existing_bvid.startswith("BV"):
return PublishRecord(
id=None,
@ -201,6 +213,7 @@ class BiliupCliPublishProvider:
upload_cmd.extend(["--cover", cover])
for attempt in range(1, retry_count + 1):
self._append_description_summary(publish_log, description)
result = self.adapter.run(
upload_cmd,
label=f"首批上传[{attempt}/{retry_count}]",
@ -253,6 +266,29 @@ class BiliupCliPublishProvider:
def _wait_seconds(retry_index: int) -> int:
return min(300 * (2**retry_index), 3600)
@staticmethod
def _prepare_publish_video_files(work_dir: Path, video_files: list[str]) -> list[str]:
publish_dir = work_dir / "publish_video"
if publish_dir.exists():
shutil.rmtree(publish_dir)
publish_dir.mkdir(parents=True, exist_ok=True)
prepared: list[str] = []
for index, video_file in enumerate(video_files, start=1):
source = Path(video_file)
name = BiliupCliPublishProvider._strip_clip_number_prefix(source.name)
target = publish_dir / f"{index:02d}_{name}"
try:
target.hardlink_to(source)
except OSError:
shutil.copy2(source, target)
prepared.append(str(target))
return prepared
@staticmethod
def _strip_clip_number_prefix(filename: str) -> str:
return re.sub(r"^\d+[_-]+", "", filename, count=1)
@staticmethod
def _load_upload_config(path: Path) -> dict[str, Any]:
if not path.exists():
@ -262,6 +298,9 @@ class BiliupCliPublishProvider:
@staticmethod
def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
config = config or {}
builtin = BiliupCliPublishProvider._parse_builtin_filename(filename)
if builtin:
return builtin
patterns = config.get("filename_patterns", {}).get("patterns", [])
for pattern_config in patterns:
regex = pattern_config.get("regex")
@ -278,6 +317,48 @@ class BiliupCliPublishProvider:
return data
return {"streamer": filename, "date": ""}
@staticmethod
def _parse_builtin_filename(filename: str) -> dict[str, str]:
patterns = (
r"^(?P<streamer>.+?)唱歌录播\s+(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
r"^(?P<streamer>.+?)唱歌录播[:]\s*(?P<year>\d{4})年(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
)
for pattern in patterns:
match = re.match(pattern, filename)
if not match:
continue
data = match.groupdict()
data["date"] = f"{data['month']}{data['day']}{data['hour']}{data['minute']}"
return data
return {}
@staticmethod
def _drop_empty_link_lines(text: str) -> str:
lines = []
for line in text.splitlines():
stripped = line.strip()
if stripped in {"直播完整版:", "歌曲纯享版:", "上次直播:", "上次纯享:", "上次完整版:"}:
continue
lines.append(line.rstrip())
return "\n".join(lines).strip()
@staticmethod
def _fit_bilibili_desc(text: str, max_chars: int = DESC_MAX_CHARS) -> str:
text = text.strip()
if len(text) <= max_chars:
return text
suffix = "\n\n完整歌单见置顶评论。"
return text[: max(0, max_chars - len(suffix))].rstrip() + suffix
@staticmethod
def _append_description_summary(log_path: Path, description: str) -> None:
log_path.parent.mkdir(parents=True, exist_ok=True)
line = f"description_chars: {len(description)}\n"
if log_path.exists():
log_path.write_text(log_path.read_text(encoding="utf-8") + line, encoding="utf-8")
else:
log_path.write_text(line, encoding="utf-8")
@staticmethod
def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
quotes = config.get("quotes", [])

View File

@ -26,7 +26,9 @@ class PublishService:
session_contexts = self._session_contexts(task_id)
if len(session_contexts) <= 1:
clip_videos = self._clip_videos_for_task(task_id)
record = provider.publish(task, clip_videos, settings)
provider_settings = dict(settings)
provider_settings["__repo"] = self.repo
record = provider.publish(task, clip_videos, provider_settings)
self._persist_publish_success(task, record)
return record
@ -50,6 +52,7 @@ class PublishService:
if anchor_task is None:
raise RuntimeError(f"anchor task not found: {anchor_context.task_id}")
session_settings = dict(settings)
session_settings["__repo"] = self.repo
session_settings.update(self._session_publish_metadata(anchor_task, session_contexts, settings))
record = provider.publish(anchor_task, clip_videos, session_settings)
for context in session_contexts:

View File

@ -37,13 +37,17 @@ class CodexSongDetector:
work_dir=work_dir,
prompt=TASK_PROMPT,
)
self._write_codex_log(work_dir, result)
if result.returncode != 0:
stderr = result.stderr[-2000:]
stdout = result.stdout[-2000:]
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
raise ModuleError(
code="SONG_DETECT_FAILED",
message="codex exec 执行失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
retryable=retryable,
details={"stdout": stdout, "stderr": stderr},
)
ensure_song_outputs(
@ -72,3 +76,37 @@ class CodexSongDetector:
created_at=utc_now_iso(),
),
)
@staticmethod
def _write_codex_log(work_dir: Path, result) -> None: # noqa: ANN001
log_path = work_dir / "codex.log"
log_path.write_text(
"\n".join(
[
"codex song_detect",
f"returncode: {result.returncode}",
"",
"stdout:",
result.stdout,
"",
"stderr:",
result.stderr,
"",
]
),
encoding="utf-8",
)
@staticmethod
def _is_auth_error(text: str) -> bool:
lowered = text.lower()
return any(
needle in lowered
for needle in (
"401",
"invalid access token",
"token expired",
"unauthorized",
"authentication",
)
)

View File

@ -43,11 +43,14 @@ class QwenCliSongDetector:
)
if result.returncode != 0:
stderr = result.stderr[-2000:]
stdout = result.stdout[-2000:]
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
raise ModuleError(
code="SONG_DETECT_FAILED",
message="qwen -p 执行失败",
retryable=True,
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
retryable=retryable,
details={"stdout": stdout, "stderr": stderr},
)
ensure_song_outputs(
@ -76,3 +79,17 @@ class QwenCliSongDetector:
created_at=utc_now_iso(),
),
)
@staticmethod
def _is_auth_error(text: str) -> bool:
lowered = text.lower()
return any(
needle in lowered
for needle in (
"401",
"invalid access token",
"token expired",
"unauthorized",
"authentication",
)
)

View File

@ -2,9 +2,12 @@ from __future__ import annotations
import json
import math
import os
import shutil
import subprocess
import time
from contextlib import suppress
from contextlib import contextmanager
from pathlib import Path
from typing import Any
@ -16,6 +19,7 @@ from biliup_next.core.providers import ProviderManifest
LANGUAGE = "zh"
BITRATE_KBPS = 64
MODEL_NAME = "whisper-large-v3-turbo"
SEGMENT_SIZE_SAFETY_RATIO = 0.75
class GroqTranscribeProvider:
@ -30,11 +34,11 @@ class GroqTranscribeProvider:
)
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
groq_api_key = str(settings.get("groq_api_key", "")).strip()
if not groq_api_key:
groq_api_keys = self._groq_api_keys(settings)
if not groq_api_keys:
raise ModuleError(
code="GROQ_API_KEY_MISSING",
message="未配置 transcribe.groq_api_key",
message="未配置 transcribe.groq_api_key 或 transcribe.groq_api_keys",
retryable=False,
)
try:
@ -55,18 +59,23 @@ class GroqTranscribeProvider:
)
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
max_file_size_mb = int(settings.get("max_file_size_mb", 23))
max_file_size_mb = int(settings.get("max_file_size_mb", 12))
work_dir = source_path.parent
temp_audio_dir = work_dir / "temp_audio"
checkpoint_dir = work_dir / "transcribe_segments"
temp_audio_dir.mkdir(parents=True, exist_ok=True)
segment_duration = max(1, math.floor((max_file_size_mb * 8 * 1024) / BITRATE_KBPS))
checkpoint_dir.mkdir(parents=True, exist_ok=True)
max_segment_bytes = max(1, max_file_size_mb) * 1024 * 1024
segment_duration = self._initial_segment_duration(max_file_size_mb)
output_pattern = temp_audio_dir / "part_%03d.mp3"
self._extract_audio_segments(
segment_duration = self._extract_audio_segments_with_size_guard(
ffmpeg_bin=ffmpeg_bin,
source_path=source_path,
output_pattern=output_pattern,
segment_duration=segment_duration,
temp_audio_dir=temp_audio_dir,
initial_segment_duration=segment_duration,
max_segment_bytes=max_segment_bytes,
)
segments = sorted(temp_audio_dir.glob("part_*.mp3"))
@ -77,22 +86,47 @@ class GroqTranscribeProvider:
retryable=False,
)
client = Groq(api_key=groq_api_key)
request_timeout_seconds = max(1, int(settings.get("request_timeout_seconds", 180)))
request_max_retries = max(0, int(settings.get("request_max_retries", 1)))
request_retry_backoff_seconds = max(0, int(settings.get("request_retry_backoff_seconds", 30)))
lock_enabled = bool(settings.get("serialize_groq_requests", True))
lock_path = self._groq_lock_path(settings, work_dir)
clients = [Groq(api_key=key, timeout=request_timeout_seconds, max_retries=0) for key in groq_api_keys]
srt_path = work_dir / f"{task.title}.srt"
temp_srt_path = work_dir / f".{task.title}.srt.tmp"
global_idx = 1
try:
with srt_path.open("w", encoding="utf-8") as srt_file:
with temp_srt_path.open("w", encoding="utf-8") as srt_file:
for index, segment in enumerate(segments):
offset_seconds = index * segment_duration
segment_data = self._transcribe_with_retry(client, segment)
segment_checkpoint = checkpoint_dir / f"{segment.stem}.json"
segment_data = self._load_segment_checkpoint(segment_checkpoint, segment_duration=segment_duration)
if segment_data is None:
with self._optional_groq_lock(lock_path, enabled=lock_enabled):
segment_data = self._transcribe_with_retry(
clients,
segment,
request_timeout_seconds=request_timeout_seconds,
request_max_retries=request_max_retries,
request_retry_backoff_seconds=request_retry_backoff_seconds,
)
self._write_segment_checkpoint(
segment_checkpoint,
segment_data,
segment_duration=segment_duration,
audio_file=segment,
)
for chunk in segment_data:
start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
text = str(chunk["text"]).strip()
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
global_idx += 1
temp_srt_path.replace(srt_path)
finally:
with suppress(FileNotFoundError):
temp_srt_path.unlink()
shutil.rmtree(temp_audio_dir, ignore_errors=True)
return Artifact(
@ -104,12 +138,126 @@ class GroqTranscribeProvider:
{
"provider": "groq",
"model": MODEL_NAME,
"api_key_count": len(groq_api_keys),
"segment_duration_seconds": segment_duration,
"checkpoint_dir": str(checkpoint_dir.resolve()),
}
),
created_at=utc_now_iso(),
)
@staticmethod
def _groq_api_keys(settings: dict[str, Any]) -> list[str]:
keys: list[str] = []
raw_keys = settings.get("groq_api_keys")
if isinstance(raw_keys, list):
keys.extend(str(key).strip() for key in raw_keys if str(key).strip())
legacy_key = str(settings.get("groq_api_key", "")).strip()
if legacy_key:
keys.append(legacy_key)
deduped: list[str] = []
seen: set[str] = set()
for key in keys:
if key in seen:
continue
seen.add(key)
deduped.append(key)
return deduped
@staticmethod
def _initial_segment_duration(max_file_size_mb: int) -> int:
safe_target_mb = max_file_size_mb * SEGMENT_SIZE_SAFETY_RATIO
return max(1, math.floor((safe_target_mb * 8 * 1024) / BITRATE_KBPS))
def _extract_audio_segments_with_size_guard(
self,
*,
ffmpeg_bin: str,
source_path: Path,
output_pattern: Path,
temp_audio_dir: Path,
initial_segment_duration: int,
max_segment_bytes: int,
) -> int:
segment_duration = initial_segment_duration
for _attempt in range(4):
self._clear_audio_segments(temp_audio_dir)
self._extract_audio_segments(
ffmpeg_bin=ffmpeg_bin,
source_path=source_path,
output_pattern=output_pattern,
segment_duration=segment_duration,
)
largest_segment = self._largest_audio_segment(temp_audio_dir)
if largest_segment is None or largest_segment.stat().st_size <= max_segment_bytes:
return segment_duration
next_duration = max(1, math.floor(segment_duration * 0.75))
if next_duration == segment_duration:
break
segment_duration = next_duration
largest_segment = self._largest_audio_segment(temp_audio_dir)
largest_size = largest_segment.stat().st_size if largest_segment else 0
raise ModuleError(
code="TRANSCRIBE_AUDIO_SEGMENT_TOO_LARGE",
message="音频分片超过 Groq 上传安全阈值",
retryable=False,
details={
"largest_segment": str(largest_segment) if largest_segment else None,
"largest_segment_bytes": largest_size,
"max_segment_bytes": max_segment_bytes,
},
)
@staticmethod
def _clear_audio_segments(temp_audio_dir: Path) -> None:
for path in temp_audio_dir.glob("part_*.mp3"):
path.unlink(missing_ok=True)
@staticmethod
def _largest_audio_segment(temp_audio_dir: Path) -> Path | None:
segments = list(temp_audio_dir.glob("part_*.mp3"))
if not segments:
return None
return max(segments, key=lambda path: path.stat().st_size)
@staticmethod
def _load_segment_checkpoint(checkpoint_path: Path, *, segment_duration: int) -> list[dict[str, Any]] | None:
if not checkpoint_path.exists():
return None
try:
data = json.loads(checkpoint_path.read_text(encoding="utf-8"))
if data.get("model") != MODEL_NAME or data.get("language") != LANGUAGE:
return None
if data.get("segment_duration_seconds") != segment_duration:
return None
segments = data.get("segments")
if not isinstance(segments, list):
return None
return [dict(segment) for segment in segments]
except Exception:
return None
@staticmethod
def _write_segment_checkpoint(
checkpoint_path: Path,
segments: list[dict[str, Any]],
*,
segment_duration: int,
audio_file: Path,
) -> None:
checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = checkpoint_path.with_suffix(f"{checkpoint_path.suffix}.tmp")
payload = {
"provider": "groq",
"model": MODEL_NAME,
"language": LANGUAGE,
"audio_file": audio_file.name,
"segment_duration_seconds": segment_duration,
"segments": segments,
}
temp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
temp_path.replace(checkpoint_path)
def _extract_audio_segments(
self,
*,
@ -156,31 +304,141 @@ class GroqTranscribeProvider:
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
) from exc
def _transcribe_with_retry(self, client: Any, audio_file: Path) -> list[dict[str, Any]]:
retry_count = 0
while True:
@staticmethod
def _groq_lock_path(settings: dict[str, Any], fallback_work_dir: Path) -> Path:
session_dir = settings.get("session_dir")
if isinstance(session_dir, str) and session_dir:
return Path(session_dir).resolve().parent / "groq_transcribe.lock"
return fallback_work_dir / "groq_transcribe.lock"
@staticmethod
@contextmanager
def _optional_groq_lock(lock_path: Path, *, enabled: bool):
if not enabled:
yield
return
lock_path.parent.mkdir(parents=True, exist_ok=True)
with lock_path.open("w", encoding="utf-8") as lock_file:
try:
with audio_file.open("rb") as file_handle:
response = client.audio.transcriptions.create(
file=(audio_file.name, file_handle.read()),
model=MODEL_NAME,
response_format="verbose_json",
language=LANGUAGE,
temperature=0.0,
)
return [dict(segment) for segment in response.segments]
except Exception as exc: # noqa: BLE001
retry_count += 1
err_str = str(exc)
if "429" in err_str or "rate_limit" in err_str.lower():
time.sleep(25)
continue
raise ModuleError(
code="GROQ_TRANSCRIBE_FAILED",
message=f"Groq 转录失败: {audio_file.name}",
retryable=True,
details={"error": err_str, "retry_count": retry_count},
) from exc
import fcntl
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
lock_file.write(f"{os.getpid()}\n")
lock_file.flush()
yield
finally:
with suppress(Exception):
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
def _transcribe_with_retry(
self,
clients: list[Any],
audio_file: Path,
*,
request_timeout_seconds: int,
request_max_retries: int,
request_retry_backoff_seconds: int,
) -> list[dict[str, Any]]:
attempt = 0
key_attempts = 0
last_error = ""
while True:
attempt += 1
for key_index, client in enumerate(clients):
key_attempts += 1
try:
with audio_file.open("rb") as file_handle:
response = client.audio.transcriptions.create(
file=(audio_file.name, file_handle.read()),
model=MODEL_NAME,
response_format="verbose_json",
language=LANGUAGE,
temperature=0.0,
timeout=request_timeout_seconds,
)
return [dict(segment) for segment in response.segments]
except Exception as exc: # noqa: BLE001
err_str = str(exc)
last_error = err_str
if self._is_rate_limit_error(err_str) and key_index < len(clients) - 1:
continue
if not self._should_retry_request(err_str):
raise self._transcribe_failed(
audio_file,
err_str,
request_attempts=attempt,
key_attempts=key_attempts,
api_key_count=len(clients),
request_timeout_seconds=request_timeout_seconds,
) from exc
break
if attempt <= request_max_retries:
if request_retry_backoff_seconds > 0:
time.sleep(request_retry_backoff_seconds)
continue
raise self._transcribe_failed(
audio_file,
last_error,
request_attempts=attempt,
key_attempts=key_attempts,
api_key_count=len(clients),
request_timeout_seconds=request_timeout_seconds,
)
@staticmethod
def _transcribe_failed(
audio_file: Path,
error_text: str,
*,
request_attempts: int,
key_attempts: int,
api_key_count: int,
request_timeout_seconds: int,
) -> ModuleError:
return ModuleError(
code="GROQ_TRANSCRIBE_FAILED",
message=f"Groq 转录失败: {audio_file.name}",
retryable=True,
details={
"error": error_text,
"request_attempts": request_attempts,
"key_attempts": key_attempts,
"api_key_count": api_key_count,
"request_timeout_seconds": request_timeout_seconds,
},
)
@staticmethod
def _is_rate_limit_error(error_text: str) -> bool:
lowered = error_text.lower()
return any(
needle in lowered
for needle in (
"429",
"rate_limit",
"rate limit",
"too many requests",
)
)
@staticmethod
def _should_retry_request(error_text: str) -> bool:
lowered = error_text.lower()
return any(
needle in lowered
for needle in (
"429",
"rate_limit",
"timed out",
"timeout",
"connection error",
"connect error",
"server disconnected",
"502",
"503",
"504",
)
)
@staticmethod
def _format_srt_time(seconds: float) -> str:

View File

@ -88,7 +88,7 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
self.assertEqual(result["split"]["reason"], "comment_disabled")
self.assertEqual(len(api.reply_calls), 1)
self.assertIn("P1:\n1. Song A — Artist A", api.reply_calls[0]["content"])
self.assertIn("P2:\n1. Song B — Artist B", api.reply_calls[0]["content"])
self.assertIn("P2:\n2. Song B — Artist B", api.reply_calls[0]["content"])
def test_split_comment_skips_on_non_anchor_task(self) -> None:
api = _FakeBilibiliApi()
@ -212,6 +212,63 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
self.assertEqual(result["split"]["reason"], "comment_disabled")
self.assertTrue((work_dir / "comment_done.flag").exists())
def test_comment_format_can_be_configured_from_upload_config(self) -> None:
api = _FakeBilibiliApi()
provider = BilibiliTopCommentProvider(bilibili_api=api)
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
work_dir = root / "task-1"
work_dir.mkdir(parents=True, exist_ok=True)
task = Task(
id="task-1",
source_type="local_file",
source_path=str(work_dir / "source.mp4"),
title="task-1",
status="published",
created_at=utc_now_iso(),
updated_at=utc_now_iso(),
)
(work_dir / "songs.txt").write_text("00:00:00 Song From Text — Artist T\n", encoding="utf-8")
(work_dir / "songs.json").write_text(
json.dumps({"songs": [{"title": "Song A", "artist": "Artist A"}]}),
encoding="utf-8",
)
(work_dir / "bvid.txt").write_text("BV1COMMENT123", encoding="utf-8")
(work_dir / "full_video_bvid.txt").write_text("BV1FULL12345", encoding="utf-8")
cookies_file = root / "cookies.json"
cookies_file.write_text("{}", encoding="utf-8")
upload_config = root / "upload_config.json"
upload_config.write_text(
json.dumps(
{
"comment_template": {
"split_header": "这是纯享:{current_full_video_link}\n上一场:{previous_full_video_link}",
"split_song_line": "#{song_index} {title} / {artist}",
}
}
),
encoding="utf-8",
)
result = provider.comment(
task,
{
"session_dir": str(root),
"cookies_file": str(cookies_file),
"upload_config_file": str(upload_config),
"post_split_comment": True,
"post_full_video_timeline_comment": False,
},
)
self.assertEqual(result["status"], "ok")
self.assertEqual(result["split"]["reason"], "comment_disabled")
self.assertEqual(len(api.reply_calls), 1)
content = str(api.reply_calls[0]["content"])
self.assertIn("这是纯享https://www.bilibili.com/video/BV1FULL12345", content)
self.assertNotIn("上一场:", content)
self.assertIn("#1 Song A / Artist A", content)
def test_full_comment_aggregates_session_parts_on_anchor_task(self) -> None:
api = _FakeBilibiliApi()
provider = BilibiliTopCommentProvider(bilibili_api=api)
@ -263,8 +320,8 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
self.assertEqual(result["full"]["status"], "skipped")
self.assertEqual(result["full"]["reason"], "comment_disabled")
self.assertEqual(len(api.reply_calls), 1)
self.assertIn("P1:\n00:00:01 Song A\n00:02:00 Song B", api.reply_calls[0]["content"])
self.assertIn("P2:\n00:00:03 Song C", api.reply_calls[0]["content"])
self.assertIn("P1:\n1. 00:00:01 Song A\n2. 00:02:00 Song B", api.reply_calls[0]["content"])
self.assertIn("P2:\n3. 00:00:03 Song C", api.reply_calls[0]["content"])
def test_full_comment_skips_on_non_anchor_task(self) -> None:
api = _FakeBilibiliApi()

View File

@ -269,6 +269,117 @@ class BiliupCliPublishProviderTests(unittest.TestCase):
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
self.assertTrue((work_dir / "upload_done.flag").exists())
def test_publish_recovers_bvid_from_progress_when_bvid_file_was_removed(self) -> None:
adapter = _FakeBiliupAdapter()
provider = BiliupCliPublishProvider(adapter=adapter)
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
work_dir = root / "task-1"
work_dir.mkdir(parents=True, exist_ok=True)
task = Task(
id="task-1",
source_type="local_file",
source_path=str(work_dir / "source.mp4"),
title="task-1",
status="split_done",
created_at=utc_now_iso(),
updated_at=utc_now_iso(),
)
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
(work_dir / "publish_progress.json").write_text(
json.dumps({"bvid": "BV1RESUME1234", "completed_append_batches": [2]}),
encoding="utf-8",
)
upload_config = root / "upload_config.json"
upload_config.write_text("{}", encoding="utf-8")
clips = []
for index in range(1, 16):
clip_path = work_dir / f"clip-{index}.mp4"
clip_path.write_text("fake", encoding="utf-8")
clips.append(
Artifact(
id=None,
task_id=task.id,
artifact_type="clip_video",
path=str(clip_path),
metadata_json="{}",
created_at=utc_now_iso(),
)
)
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
record = provider.publish(
task,
clips,
{
"session_dir": str(root),
"upload_config_file": str(upload_config),
"biliup_path": "runtime/biliup",
"cookie_file": "runtime/cookies.json",
"retry_count": 2,
"command_timeout_seconds": 123,
},
)
self.assertEqual(record.bvid, "BV1RESUME1234")
self.assertEqual((work_dir / "bvid.txt").read_text(encoding="utf-8"), "BV1RESUME1234")
self.assertEqual(len(adapter.run_calls), 1)
self.assertIn("append", adapter.run_calls[0]["cmd"])
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
def test_publish_renumbers_clip_filenames_across_aggregated_sessions(self) -> None:
adapter = _FakeBiliupAdapter()
provider = BiliupCliPublishProvider(adapter=adapter)
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
work_dir = root / "task-1"
second_dir = root / "task-2"
work_dir.mkdir(parents=True, exist_ok=True)
second_dir.mkdir(parents=True, exist_ok=True)
task = Task(
id="task-1",
source_type="local_file",
source_path=str(work_dir / "source.mp4"),
title="task-1",
status="split_done",
created_at=utc_now_iso(),
updated_at=utc_now_iso(),
)
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
upload_config = root / "upload_config.json"
upload_config.write_text("{}", encoding="utf-8")
clips = []
for index in range(1, 11):
clip_path = work_dir / f"{index:02d}_first-{index}.mp4"
clip_path.write_text("fake", encoding="utf-8")
clips.append(Artifact(None, task.id, "clip_video", str(clip_path), "{}", utc_now_iso()))
for index in range(1, 8):
clip_path = second_dir / f"{index:02d}_second-{index}.mp4"
clip_path.write_text("fake", encoding="utf-8")
clips.append(Artifact(None, "task-2", "clip_video", str(clip_path), "{}", utc_now_iso()))
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
provider.publish(
task,
clips,
{
"session_dir": str(root),
"upload_config_file": str(upload_config),
"biliup_path": "runtime/biliup",
"cookie_file": "runtime/cookies.json",
"retry_count": 1,
"command_timeout_seconds": 123,
},
)
all_uploaded = [part for call in adapter.run_calls for part in call["cmd"] if str(part).endswith(".mp4")]
self.assertTrue(all_uploaded[0].endswith("01_first-1.mp4"))
self.assertTrue(all_uploaded[9].endswith("10_first-10.mp4"))
self.assertTrue(all_uploaded[10].endswith("11_second-1.mp4"))
self.assertTrue(all_uploaded[16].endswith("17_second-7.mp4"))
def test_publish_creates_progress_from_existing_bvid_for_append_resume(self) -> None:
adapter = _FakeBiliupAdapter()
provider = BiliupCliPublishProvider(adapter=adapter)

View File

@ -0,0 +1,85 @@
from __future__ import annotations
import unittest
from types import SimpleNamespace
from biliup_next.core.models import Task, TaskStep, utc_now_iso
from biliup_next.modules.collection.service import CollectionService
class _FakeRegistry:
def __init__(self, provider) -> None: # type: ignore[no-untyped-def]
self.provider = provider
def get(self, provider_type: str, provider_id: str): # type: ignore[no-untyped-def]
return self.provider
class _FakeProvider:
def sync(self, task, target: str, settings: dict[str, object]) -> dict[str, object]: # type: ignore[no-untyped-def]
return {"status": "skipped", "target": target}
class _FakeRepo:
def __init__(self) -> None:
now = utc_now_iso()
self.task = Task("task-1", "local_file", "/tmp/source.mp4", "task-1", "running", now, now)
self.steps = {
"collection_a": TaskStep(None, "task-1", "collection_a", "pending", None, None, 0, None, None),
"collection_b": TaskStep(None, "task-1", "collection_b", "pending", None, None, 0, None, None),
}
self.task_status_updates: list[tuple[str, str]] = []
def get_task(self, task_id: str): # type: ignore[no-untyped-def]
return self.task if task_id == self.task.id else None
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # type: ignore[no-untyped-def]
step = self.steps[step_name]
self.steps[step_name] = TaskStep(
step.id,
step.task_id,
step.step_name,
status,
kwargs.get("error_code", step.error_code),
kwargs.get("error_message", step.error_message),
kwargs.get("retry_count", step.retry_count),
kwargs.get("started_at", step.started_at),
kwargs.get("finished_at", step.finished_at),
)
def list_steps(self, task_id: str) -> list[TaskStep]:
return list(self.steps.values())
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
self.task_status_updates.append((task_id, status))
class CollectionServiceTests(unittest.TestCase):
def test_collection_a_restores_commented_status_so_collection_b_can_run(self) -> None:
repo = _FakeRepo()
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {}) # type: ignore[assignment]
result = service.run("task-1", "a", {"provider": "fake"})
self.assertEqual(result["status"], "skipped")
self.assertEqual(repo.steps["collection_a"].status, "succeeded")
self.assertEqual(repo.steps["collection_b"].status, "pending")
self.assertEqual(repo.task_status_updates[-1], ("task-1", "commented"))
def test_collection_b_marks_collection_synced_when_both_steps_succeeded(self) -> None:
repo = _FakeRepo()
repo.steps["collection_a"] = TaskStep(None, "task-1", "collection_a", "succeeded", None, None, 0, None, utc_now_iso())
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {"deleted": []}) # type: ignore[assignment]
result = service.run("task-1", "b", {"provider": "fake"})
self.assertEqual(result["status"], "skipped")
self.assertEqual(repo.steps["collection_b"].status, "succeeded")
self.assertEqual(repo.task_status_updates[-1], ("task-1", "collection_synced"))
self.assertEqual(result["cleanup"], {"deleted": []})
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,277 @@
from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import Artifact, Task
from biliup_next.modules.transcribe.providers.groq import GroqTranscribeProvider
class _FakeResponse:
def __init__(self, segments):
self.segments = segments
class _FakeTranscriptions:
def __init__(self, outcomes: list[object]) -> None:
self.outcomes = list(outcomes)
self.calls: list[dict[str, object]] = []
def create(self, **kwargs): # noqa: ANN003
self.calls.append(kwargs)
outcome = self.outcomes.pop(0)
if isinstance(outcome, Exception):
raise outcome
return outcome
class _FakeGroqClient:
def __init__(self, outcomes: list[object]) -> None:
self.audio = SimpleNamespace(transcriptions=_FakeTranscriptions(outcomes))
class GroqTranscribeProviderTests(unittest.TestCase):
def test_transcribe_retries_timeout_and_writes_srt_atomically(self) -> None:
provider = GroqTranscribeProvider()
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
source_path = work_dir / "input.mp4"
source_path.write_bytes(b"video")
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
segment = work_dir / "temp_audio" / "part_000.mp3"
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
segment.parent.mkdir(parents=True, exist_ok=True)
segment.write_bytes(b"audio")
client = _FakeGroqClient(
[
RuntimeError("Request timed out."),
_FakeResponse([{"start": 0, "end": 1.2, "text": "hello"}]),
]
)
settings = {
"groq_api_key": "gsk_test",
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23,
"request_timeout_seconds": 33,
"request_max_retries": 1,
"request_retry_backoff_seconds": 0,
"serialize_groq_requests": False,
}
with patch("groq.Groq", return_value=client) as groq_ctor:
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
artifact = provider.transcribe(task, source_video, settings)
self.assertEqual(Path(artifact.path).read_text(encoding="utf-8"), "1\n00:00:00,000 --> 00:00:01,199\nhello\n\n")
self.assertFalse((work_dir / ".demo.srt.tmp").exists())
self.assertEqual(len(client.audio.transcriptions.calls), 2)
self.assertEqual(client.audio.transcriptions.calls[0]["timeout"], 33)
self.assertTrue((work_dir / "transcribe_segments" / "part_000.json").exists())
groq_ctor.assert_called_once_with(api_key="gsk_test", timeout=33, max_retries=0)
def test_transcribe_reuses_completed_segment_checkpoints(self) -> None:
provider = GroqTranscribeProvider()
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
source_path = work_dir / "input.mp4"
source_path.write_bytes(b"video")
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
segments = [work_dir / "temp_audio" / "part_000.mp3", work_dir / "temp_audio" / "part_001.mp3"]
checkpoint_dir = work_dir / "transcribe_segments"
checkpoint_dir.mkdir()
(checkpoint_dir / "part_000.json").write_text(
json.dumps(
{
"provider": "groq",
"model": "whisper-large-v3-turbo",
"language": "zh",
"audio_file": "part_000.mp3",
"segment_duration_seconds": 75,
"segments": [{"start": 0, "end": 1, "text": "first"}],
}
),
encoding="utf-8",
)
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
for segment in segments:
segment.parent.mkdir(parents=True, exist_ok=True)
segment.write_bytes(b"audio")
client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.5, "text": "second"}])])
settings = {
"groq_api_key": "gsk_test",
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23,
"request_timeout_seconds": 33,
"request_max_retries": 1,
"request_retry_backoff_seconds": 0,
"serialize_groq_requests": False,
}
with patch("groq.Groq", return_value=client):
with patch.object(provider, "_initial_segment_duration", return_value=75):
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
artifact = provider.transcribe(task, source_video, settings)
srt = Path(artifact.path).read_text(encoding="utf-8")
self.assertIn("00:00:00,000 --> 00:00:01,000\nfirst", srt)
self.assertIn("00:01:15,000 --> 00:01:16,500\nsecond", srt)
self.assertEqual(len(client.audio.transcriptions.calls), 1)
self.assertEqual(client.audio.transcriptions.calls[0]["file"][0], "part_001.mp3")
self.assertTrue((checkpoint_dir / "part_001.json").exists())
def test_transcribe_switches_to_next_api_key_on_rate_limit(self) -> None:
provider = GroqTranscribeProvider()
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
source_path = work_dir / "input.mp4"
source_path.write_bytes(b"video")
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
segment = work_dir / "temp_audio" / "part_000.mp3"
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
segment.parent.mkdir(parents=True, exist_ok=True)
segment.write_bytes(b"audio")
limited_client = _FakeGroqClient([RuntimeError("Error code: 429 rate_limit")])
fallback_client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.2, "text": "fallback"}])])
settings = {
"groq_api_key": "",
"groq_api_keys": ["gsk_first", "gsk_second"],
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23,
"request_timeout_seconds": 20,
"request_max_retries": 0,
"request_retry_backoff_seconds": 0,
"serialize_groq_requests": False,
}
with patch("groq.Groq", side_effect=[limited_client, fallback_client]) as groq_ctor:
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
artifact = provider.transcribe(task, source_video, settings)
self.assertIn("fallback", Path(artifact.path).read_text(encoding="utf-8"))
self.assertEqual(len(limited_client.audio.transcriptions.calls), 1)
self.assertEqual(len(fallback_client.audio.transcriptions.calls), 1)
self.assertEqual([call.kwargs["api_key"] for call in groq_ctor.call_args_list], ["gsk_first", "gsk_second"])
def test_transcribe_waits_after_all_api_keys_are_rate_limited(self) -> None:
provider = GroqTranscribeProvider()
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
source_path = work_dir / "input.mp4"
source_path.write_bytes(b"video")
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
segment = work_dir / "temp_audio" / "part_000.mp3"
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
segment.parent.mkdir(parents=True, exist_ok=True)
segment.write_bytes(b"audio")
first_client = _FakeGroqClient([RuntimeError("429 rate_limit"), _FakeResponse([{"start": 0, "end": 1, "text": "retry ok"}])])
second_client = _FakeGroqClient([RuntimeError("429 rate_limit")])
settings = {
"groq_api_key": "",
"groq_api_keys": ["gsk_first", "gsk_second"],
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23,
"request_timeout_seconds": 20,
"request_max_retries": 1,
"request_retry_backoff_seconds": 7,
"serialize_groq_requests": False,
}
with patch("groq.Groq", side_effect=[first_client, second_client]):
with patch("time.sleep") as sleep_mock:
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
artifact = provider.transcribe(task, source_video, settings)
self.assertIn("retry ok", Path(artifact.path).read_text(encoding="utf-8"))
sleep_mock.assert_called_once_with(7)
self.assertEqual(len(first_client.audio.transcriptions.calls), 2)
self.assertEqual(len(second_client.audio.transcriptions.calls), 1)
def test_transcribe_raises_after_retry_budget_is_exhausted(self) -> None:
provider = GroqTranscribeProvider()
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
source_path = work_dir / "input.mp4"
source_path.write_bytes(b"video")
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
segment = work_dir / "temp_audio" / "part_000.mp3"
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
segment.parent.mkdir(parents=True, exist_ok=True)
segment.write_bytes(b"audio")
client = _FakeGroqClient([RuntimeError("Connection error."), RuntimeError("Connection error.")])
settings = {
"groq_api_key": "gsk_test",
"ffmpeg_bin": "ffmpeg",
"max_file_size_mb": 23,
"request_timeout_seconds": 20,
"request_max_retries": 1,
"request_retry_backoff_seconds": 0,
"serialize_groq_requests": False,
}
with patch("groq.Groq", return_value=client):
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
with self.assertRaises(ModuleError) as exc_info:
provider.transcribe(task, source_video, settings)
self.assertEqual(exc_info.exception.message, "Groq 转录失败: part_000.mp3")
def test_initial_segment_duration_keeps_safety_margin(self) -> None:
self.assertLess(GroqTranscribeProvider._initial_segment_duration(12), 1536)
def test_extract_audio_segments_retries_when_segment_exceeds_size_limit(self) -> None:
provider = GroqTranscribeProvider()
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
temp_audio_dir = work_dir / "temp_audio"
temp_audio_dir.mkdir()
output_pattern = temp_audio_dir / "part_%03d.mp3"
durations: list[int] = []
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
durations.append(int(kwargs["segment_duration"]))
size = 20 if len(durations) == 1 else 5
(temp_audio_dir / "part_000.mp3").write_bytes(b"x" * size)
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
result = provider._extract_audio_segments_with_size_guard(
ffmpeg_bin="ffmpeg",
source_path=work_dir / "input.mp4",
output_pattern=output_pattern,
temp_audio_dir=temp_audio_dir,
initial_segment_duration=100,
max_segment_bytes=10,
)
self.assertEqual(durations, [100, 75])
self.assertEqual(result, 75)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,85 @@
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from biliup_next.core.providers import ProviderManifest
from biliup_next.core.registry import Registry
from biliup_next.infra.db import Database
from biliup_next.infra.task_repository import TaskRepository
from biliup_next.modules.ingest.service import IngestService
class _FakeLocalFileProvider:
manifest = ProviderManifest(
id="local_file",
name="Fake Local File Ingest",
version="0.1.0",
provider_type="ingest_provider",
entrypoint="tests.test_ingest_scan_stage:_FakeLocalFileProvider",
capabilities=["ingest"],
enabled_by_default=True,
)
def validate_source(self, source_path: Path, settings: dict[str, object]) -> None:
if not source_path.exists() or not source_path.is_file():
raise AssertionError(f"unexpected source path: {source_path}")
class IngestScanStageTests(unittest.TestCase):
def setUp(self) -> None:
self.tempdir = tempfile.TemporaryDirectory()
root = Path(self.tempdir.name)
self.stage_dir = root / "stage"
self.backup_dir = root / "backup"
self.session_dir = root / "session"
self.stage_dir.mkdir()
self.backup_dir.mkdir()
self.session_dir.mkdir()
db = Database(root / "test.db")
db.initialize()
repo = TaskRepository(db)
registry = Registry()
provider = _FakeLocalFileProvider()
registry.register("ingest_provider", "local_file", provider, provider.manifest)
self.service = IngestService(registry=registry, repo=repo)
def tearDown(self) -> None:
self.tempdir.cleanup()
def test_scan_stage_uses_moved_file_for_reference_timestamp(self) -> None:
source_path = self.stage_dir / "王海颖唱歌录播 04月14日 17时49分.mp4"
source_path.write_bytes(b"fake-video")
settings = {
"provider": "local_file",
"stage_dir": str(self.stage_dir),
"backup_dir": str(self.backup_dir),
"session_dir": str(self.session_dir),
"allowed_extensions": [".mp4"],
"ffprobe_bin": "ffprobe",
"min_duration_seconds": 0,
"stability_wait_seconds": 0,
"meta_sidecar_enabled": True,
}
self.service._probe_duration_seconds = lambda *_args, **_kwargs: 120.0 # type: ignore[method-assign]
result = self.service.scan_stage(settings)
self.assertEqual(len(result["accepted"]), 1)
accepted = result["accepted"][0]
moved_path = Path(str(accepted["source_path"]))
self.assertTrue(moved_path.exists())
self.assertFalse(source_path.exists())
task = self.service.repo.get_task(moved_path.stem)
self.assertIsNotNone(task)
context = self.service.repo.get_task_context(moved_path.stem)
self.assertIsNotNone(context)
self.assertIsNotNone(context.segment_started_at)
if __name__ == "__main__":
unittest.main()

View File

@ -2,6 +2,7 @@ from __future__ import annotations
import tempfile
import unittest
from unittest.mock import patch
from pathlib import Path
from biliup_next.core.config import SettingsService
@ -78,6 +79,146 @@ class SettingsServiceTests(unittest.TestCase):
self.assertTrue((config_dir / "settings.staged.json").exists())
self.assertEqual(bundle.settings["paths"]["cookies_file"], str((root / "runtime" / "cookies.json").resolve()))
def test_load_applies_environment_overrides_before_path_normalization(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
config_dir = root / "config"
config_dir.mkdir(parents=True, exist_ok=True)
(config_dir / "settings.schema.json").write_text(
"""
{
"groups": {
"runtime": {
"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}
},
"paths": {
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
"session_dir": {"type": "string", "default": "data/workspace/session"},
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
},
"ingest": {
"ffprobe_bin": {"type": "string", "default": "ffprobe"},
"yt_dlp_cmd": {"type": "string", "default": "yt-dlp"},
"yt_dlp_format": {"type": "string", "default": ""}
},
"transcribe": {
"groq_api_key": {"type": "string", "default": "", "sensitive": true},
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
},
"split": {
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
},
"song_detect": {
"codex_cmd": {"type": "string", "default": "codex"},
"qwen_cmd": {"type": "string", "default": "qwen"}
},
"publish": {
"biliup_path": {"type": "string", "default": "runtime/biliup"},
"cookie_file": {"type": "string", "default": "runtime/cookies.json"}
},
"collection": {
"season_id_a": {"type": "integer", "default": 0},
"season_id_b": {"type": "integer", "default": 0}
}
}
}
""",
encoding="utf-8",
)
(config_dir / "settings.standalone.example.json").write_text(
"""
{
"runtime": {"database_path": "data/workspace/biliup_next.db"},
"paths": {
"stage_dir": "data/workspace/stage",
"backup_dir": "data/workspace/backup",
"session_dir": "data/workspace/session",
"cookies_file": "runtime/cookies.json",
"upload_config_file": "runtime/upload_config.json"
},
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp", "yt_dlp_format": ""},
"transcribe": {"groq_api_key": "", "ffmpeg_bin": "ffmpeg"},
"split": {"ffmpeg_bin": "ffmpeg"},
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"},
"collection": {"season_id_a": 0, "season_id_b": 0}
}
""",
encoding="utf-8",
)
with patch.dict(
"os.environ",
{
"GROQ_API_KEY": "gsk_test",
"COLLECTION_SEASON_ID_A": "7196643",
"BILIUP_NEXT__COLLECTION__SEASON_ID_B": "7196624",
"BILIUP_NEXT__PATHS__STAGE_DIR": "data/custom-stage",
},
clear=True,
):
bundle = SettingsService(root).load()
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "gsk_test")
self.assertEqual(bundle.settings["collection"]["season_id_a"], 7196643)
self.assertEqual(bundle.settings["collection"]["season_id_b"], 7196624)
self.assertEqual(bundle.settings["paths"]["stage_dir"], str((root / "data" / "custom-stage").resolve()))
def test_empty_environment_values_do_not_override_settings(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
config_dir = root / "config"
config_dir.mkdir(parents=True, exist_ok=True)
(config_dir / "settings.schema.json").write_text(
"""
{
"groups": {
"runtime": {"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}},
"paths": {
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
"session_dir": {"type": "string", "default": "data/workspace/session"},
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
},
"ingest": {"ffprobe_bin": {"type": "string", "default": "ffprobe"}, "yt_dlp_cmd": {"type": "string", "default": "yt-dlp"}},
"transcribe": {"groq_api_key": {"type": "string", "default": ""}, "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
"split": {"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
"song_detect": {"codex_cmd": {"type": "string", "default": "codex"}, "qwen_cmd": {"type": "string", "default": "qwen"}},
"publish": {"biliup_path": {"type": "string", "default": "runtime/biliup"}, "cookie_file": {"type": "string", "default": "runtime/cookies.json"}}
}
}
""",
encoding="utf-8",
)
(config_dir / "settings.standalone.example.json").write_text(
"""
{
"runtime": {"database_path": "data/workspace/biliup_next.db"},
"paths": {
"stage_dir": "data/workspace/stage",
"backup_dir": "data/workspace/backup",
"session_dir": "data/workspace/session",
"cookies_file": "runtime/cookies.json",
"upload_config_file": "runtime/upload_config.json"
},
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp"},
"transcribe": {"groq_api_key": "from-file", "ffmpeg_bin": "ffmpeg"},
"split": {"ffmpeg_bin": "ffmpeg"},
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"}
}
""",
encoding="utf-8",
)
with patch.dict("os.environ", {"GROQ_API_KEY": ""}, clear=True):
bundle = SettingsService(root).load()
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "from-file")
if __name__ == "__main__":
unittest.main()

View File

@ -1,11 +1,15 @@
from __future__ import annotations
import json
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from biliup_next.core.models import Artifact, Task, utc_now_iso
from biliup_next.infra.adapters.codex_cli import CodexCliAdapter
from biliup_next.modules.song_detect.providers.codex import CodexSongDetector
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
@ -38,6 +42,33 @@ class FakeQwenCliAdapter:
return type("Result", (), {"returncode": self.returncode, "stdout": "ok", "stderr": ""})()
class FakeCodexCliAdapter:
def __init__(self, returncode: int = 0) -> None:
self.returncode = returncode
def run_song_detect(self, *, codex_cmd: str, work_dir: Path, prompt: str): # noqa: ANN001
songs_json_path = work_dir / "songs.json"
songs_json_path.write_text(
json.dumps(
{
"songs": [
{
"start": "00:01:23,000",
"end": "00:03:45,000",
"title": "测试歌曲",
"artist": "测试歌手",
"confidence": 0.93,
"evidence": "歌词命中",
}
]
},
ensure_ascii=False,
),
encoding="utf-8",
)
return type("Result", (), {"returncode": self.returncode, "stdout": "codex stdout", "stderr": "codex stderr"})()
class SongDetectProviderTests(unittest.TestCase):
def test_qwen_cli_provider_generates_json_and_txt_artifacts(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
@ -72,6 +103,67 @@ class SongDetectProviderTests(unittest.TestCase):
self.assertTrue(Path(songs_txt.path).exists())
self.assertIn("测试歌曲", Path(songs_txt.path).read_text(encoding="utf-8"))
def test_codex_provider_writes_execution_output_to_session_log(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
work_dir = Path(tmpdir)
subtitle_path = work_dir / "subtitle.srt"
subtitle_path.write_text("1\n00:00:00,000 --> 00:00:03,000\n测试字幕\n", encoding="utf-8")
provider = CodexSongDetector(adapter=FakeCodexCliAdapter())
task = Task(
id="task-1",
source_type="local_file",
source_path=str(work_dir / "video.mp4"),
title="task-1",
status="transcribed",
created_at=utc_now_iso(),
updated_at=utc_now_iso(),
)
subtitle = Artifact(
id=None,
task_id=task.id,
artifact_type="subtitle_srt",
path=str(subtitle_path),
metadata_json=None,
created_at=utc_now_iso(),
)
songs_json, songs_txt = provider.detect(task, subtitle, {"codex_cmd": "codex"})
json_metadata = json.loads(songs_json.metadata_json)
txt_metadata = json.loads(songs_txt.metadata_json)
self.assertEqual(json_metadata["provider"], "codex")
self.assertEqual(txt_metadata["provider"], "codex")
self.assertNotIn("execution", json_metadata)
codex_log = work_dir / "codex.log"
self.assertTrue(codex_log.exists())
log_text = codex_log.read_text(encoding="utf-8")
self.assertIn("returncode: 0", log_text)
self.assertIn("codex stdout", log_text)
self.assertIn("codex stderr", log_text)
def test_codex_cli_adapter_disables_inner_sandbox_and_normalizes_proxy_env(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
calls = []
def fake_run(cmd, **kwargs): # noqa: ANN001
calls.append((cmd, kwargs))
return type("Result", (), {"returncode": 0, "stdout": "", "stderr": ""})()
with patch.dict(os.environ, {"HTTPS_PROXY": "192.168.1.100:7897"}, clear=True):
with patch("subprocess.run", side_effect=fake_run):
CodexCliAdapter().run_song_detect(
codex_cmd="codex",
work_dir=Path(tmpdir),
prompt="detect songs",
)
cmd, kwargs = calls[0]
self.assertIn("--dangerously-bypass-approvals-and-sandbox", cmd)
self.assertNotIn("--full-auto", cmd)
self.assertNotIn("workspace-write", cmd)
self.assertEqual(kwargs["env"]["HTTPS_PROXY"], "http://192.168.1.100:7897")
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,103 @@
from __future__ import annotations
import unittest
from types import SimpleNamespace
from biliup_next.app.retry_meta import retry_meta_for_step
from biliup_next.app.task_engine import next_runnable_step
from biliup_next.app.task_policies import resolve_failure
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import TaskStep
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
class _Repo:
def __init__(self) -> None:
self.steps = [TaskStep(None, "task-1", "song_detect", "running", None, None, 0, None, None)]
self.step_updates: list[tuple] = []
self.task_updates: list[tuple] = []
def list_steps(self, task_id: str): # noqa: ANN001
return list(self.steps)
def get_task(self, task_id: str): # noqa: ANN001
return SimpleNamespace(id=task_id, status="running")
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
self.step_updates.append((task_id, step_name, status, kwargs))
self.steps = [
TaskStep(
None,
task_id,
step_name,
status,
kwargs.get("error_code"),
kwargs.get("error_message"),
kwargs.get("retry_count", 0),
kwargs.get("started_at"),
kwargs.get("finished_at"),
)
]
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
self.task_updates.append((task_id, status, updated_at))
class SongDetectRetryPolicyTests(unittest.TestCase):
def test_retry_meta_reports_wait_window_for_song_detect(self) -> None:
step = TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
payload = retry_meta_for_step(step, {"song_detect": {"retry_schedule_minutes": [10]}})
self.assertIsNotNone(payload)
self.assertFalse(payload["retry_due"])
self.assertEqual(payload["retry_wait_seconds"], 600)
def test_next_runnable_step_waits_for_retryable_song_detect(self) -> None:
task = SimpleNamespace(id="task-1", status="failed_retryable")
steps = {
"song_detect": TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
}
state = {
"settings": {
"transcribe": {},
"song_detect": {"retry_schedule_minutes": [10]},
"comment": {"enabled": True},
"collection": {"enabled": True},
"paths": {},
"publish": {},
}
}
step_name, waiting_payload = next_runnable_step(task, steps, state)
self.assertIsNone(step_name)
self.assertIsNotNone(waiting_payload)
self.assertEqual(waiting_payload["step"], "song_detect")
def test_resolve_failure_adds_song_detect_retry_delay(self) -> None:
repo = _Repo()
task = SimpleNamespace(id="task-1", status="running")
state = {
"settings": {
"transcribe": {},
"song_detect": {"retry_schedule_minutes": [5, 10]},
"publish": {},
"comment": {},
"paths": {},
"collection": {"enabled": True},
}
}
result = resolve_failure(task, repo, state, ModuleError(code="SONG_DETECT_FAILED", message="boom", retryable=True))
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
def test_qwen_auth_errors_are_not_retryable(self) -> None:
self.assertTrue(QwenCliSongDetector._is_auth_error("[API Error: 401 invalid access token or token expired]"))
self.assertFalse(QwenCliSongDetector._is_auth_error("temporary network failure"))
if __name__ == "__main__":
unittest.main()

View File

@ -51,6 +51,7 @@ class TaskEngineTests(unittest.TestCase):
}
state = {
"settings": {
"transcribe": {},
"comment": {"enabled": True},
"collection": {"enabled": True},
"paths": {},

View File

@ -0,0 +1,84 @@
from __future__ import annotations
import unittest
from types import SimpleNamespace
from biliup_next.app.retry_meta import retry_meta_for_step
from biliup_next.app.task_engine import next_runnable_step
from biliup_next.app.task_policies import resolve_failure
from biliup_next.core.errors import ModuleError
from biliup_next.core.models import TaskStep
class _Repo:
def __init__(self) -> None:
self.steps = [TaskStep(None, "task-1", "transcribe", "running", None, None, 0, None, None)]
self.step_updates: list[tuple] = []
self.task_updates: list[tuple] = []
def list_steps(self, task_id: str): # noqa: ANN001
return list(self.steps)
def get_task(self, task_id: str): # noqa: ANN001
return SimpleNamespace(id=task_id, status="running")
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
self.step_updates.append((task_id, step_name, status, kwargs))
self.steps = [TaskStep(None, task_id, step_name, status, kwargs.get("error_code"), kwargs.get("error_message"), kwargs.get("retry_count", 0), kwargs.get("started_at"), kwargs.get("finished_at"))]
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
self.task_updates.append((task_id, status, updated_at))
class TranscribeRetryPolicyTests(unittest.TestCase):
def test_retry_meta_reports_wait_window_for_transcribe(self) -> None:
step = TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
payload = retry_meta_for_step(step, {"transcribe": {"retry_schedule_minutes": [10]}})
self.assertIsNotNone(payload)
self.assertFalse(payload["retry_due"])
self.assertEqual(payload["retry_wait_seconds"], 600)
def test_next_runnable_step_waits_for_retryable_transcribe(self) -> None:
task = SimpleNamespace(id="task-1", status="failed_retryable")
steps = {
"transcribe": TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
}
state = {
"settings": {
"transcribe": {"retry_schedule_minutes": [10]},
"comment": {"enabled": True},
"collection": {"enabled": True},
"paths": {},
"publish": {},
}
}
step_name, waiting_payload = next_runnable_step(task, steps, state)
self.assertIsNone(step_name)
self.assertIsNotNone(waiting_payload)
self.assertEqual(waiting_payload["step"], "transcribe")
def test_resolve_failure_adds_transcribe_retry_delay(self) -> None:
repo = _Repo()
task = SimpleNamespace(id="task-1", status="running")
state = {
"settings": {
"transcribe": {"retry_schedule_minutes": [5, 10]},
"publish": {},
"comment": {},
"paths": {},
"collection": {"enabled": True},
}
}
result = resolve_failure(task, repo, state, ModuleError(code="GROQ_TRANSCRIBE_FAILED", message="boom", retryable=True))
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
if __name__ == "__main__":
unittest.main()

170
tests/test_video_links.py Normal file
View File

@ -0,0 +1,170 @@
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
import subprocess
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list
from biliup_next.infra.video_links import link_context_for_task
class VideoLinksTests(unittest.TestCase):
def test_fetch_biliup_list_keeps_pubing_videos(self) -> None:
output = (
"2026-04-22 15:56:43 INFO biliup_cli::uploader: user: test\n"
"BVREVIEW\t王海颖唱歌录播 04月22日 15时56分\t审核中\n"
"BVPUB\t王海颖唱歌录播 04月20日 22时08分\t开放浏览\n"
"BVPRIVATE\t私密视频\t仅自己可见\n"
)
with patch(
"biliup_next.infra.adapters.full_video_locator.subprocess.run",
return_value=subprocess.CompletedProcess(["biliup"], 0, stdout=output, stderr=""),
):
videos = fetch_biliup_list({"biliup_path": "biliup", "cookie_file": "cookies.json"}, max_pages=1)
self.assertEqual(
videos,
[
{"bvid": "BVREVIEW", "title": "王海颖唱歌录播 04月22日 15时56分"},
{"bvid": "BVPUB", "title": "王海颖唱歌录播 04月20日 22时08分"},
],
)
def test_previous_live_falls_back_to_biliup_list(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
source_path = Path(tmpdir) / "source.mp4"
source_path.write_bytes(b"")
task = SimpleNamespace(
id="task-current",
title="王海颖唱歌录播 04月19日 22时10分",
source_path=str(source_path),
)
repo = SimpleNamespace(get_task_context=lambda task_id: None)
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
with patch(
"biliup_next.infra.video_links.fetch_biliup_list",
return_value=[
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月18日 22时06分 共10首歌"},
{"bvid": "BVNEWER", "title": "王海颖唱歌录播 04月20日 22时00分"},
{"bvid": "BVPREV", "title": "王海颖唱歌录播 04月18日 22时06分"},
{"bvid": "BVOLDER", "title": "王海颖唱歌录播 04月17日 22时00分"},
],
):
context = link_context_for_task(task, repo, settings)
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVPREV")
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
def test_previous_live_merges_repo_and_biliup_list_links(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
current_path = root / "current" / "source.mp4"
previous_path = root / "previous" / "source.mp4"
current_path.parent.mkdir()
previous_path.parent.mkdir()
current_path.write_bytes(b"")
previous_path.write_bytes(b"")
(previous_path.parent / "full_video_bvid.txt").write_text("BVLOCALFULL", encoding="utf-8")
task = SimpleNamespace(
id="task-current",
title="王海颖唱歌录播 04月19日 22时10分",
source_path=str(current_path),
)
previous_task = SimpleNamespace(
id="task-previous",
title="王海颖唱歌录播 04月18日 22时06分",
source_path=str(previous_path),
)
current_context = SimpleNamespace(
task_id=task.id,
streamer="王海颖",
session_key="王海颖-0419",
segment_started_at="2026-04-19T22:10:00",
)
previous_context = SimpleNamespace(
task_id=previous_task.id,
streamer="王海颖",
session_key="王海颖-0418",
segment_started_at="2026-04-18T22:06:00",
full_video_bvid="BVLOCALFULL",
)
tasks = {task.id: task, previous_task.id: previous_task}
contexts = {task.id: current_context, previous_task.id: previous_context}
repo = SimpleNamespace(
get_task_context=lambda task_id: contexts.get(task_id),
get_task=lambda task_id: tasks.get(task_id),
find_recent_task_contexts=lambda streamer, limit=50: [current_context, previous_context],
)
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
with patch(
"biliup_next.infra.video_links.fetch_biliup_list",
return_value=[
{"bvid": "BVPURE", "title": "【王海颖(歌曲纯享版)】04月18日 22时06分 共18首歌"},
],
):
context = link_context_for_task(task, repo, settings)
self.assertEqual(context["previous_full_video_bvid"], "BVLOCALFULL")
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVLOCALFULL")
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
def test_previous_live_biliup_list_handles_year_boundary(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
source_path = Path(tmpdir) / "source.mp4"
source_path.write_bytes(b"")
task = SimpleNamespace(
id="task-current",
title="王海颖唱歌录播 01月01日 22时10分",
source_path=str(source_path),
)
repo = SimpleNamespace(get_task_context=lambda task_id: None)
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
with patch(
"biliup_next.infra.video_links.fetch_biliup_list",
return_value=[
{"bvid": "BVPREV", "title": "王海颖唱歌录播 12月31日 22时06分"},
],
):
context = link_context_for_task(task, repo, settings)
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
def test_current_full_video_falls_back_to_biliup_list(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
source_path = Path(tmpdir) / "source.mp4"
source_path.write_bytes(b"")
task = SimpleNamespace(
id="task-current",
title="王海颖唱歌录播 04月22日 15时56分",
source_path=str(source_path),
)
repo = SimpleNamespace(get_task_context=lambda task_id: None)
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
with patch(
"biliup_next.infra.adapters.full_video_locator.fetch_biliup_list",
return_value=[
{"bvid": "BVFULL", "title": "王海颖唱歌录播 04月22日 15时56分"},
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月22日 15时56分 共20首歌"},
],
):
context = link_context_for_task(task, repo, settings)
self.assertEqual(context["current_full_video_bvid"], "BVFULL")
self.assertEqual(context["current_full_video_link"], "https://www.bilibili.com/video/BVFULL")
self.assertEqual((source_path.parent / "full_video_bvid.txt").read_text(encoding="utf-8"), "BVFULL")
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,116 @@
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from biliup_next.core.models import Task, utc_now_iso
from biliup_next.infra.workspace_cleanup import WorkspaceCleanupService
class _FakeRepo:
def __init__(self, tasks: list[Task], session_key: str | None = None) -> None:
self.tasks = {task.id: task for task in tasks}
self.session_key = session_key
self.deleted_artifacts: list[tuple[str, str]] = []
self.deleted_artifact_paths: list[tuple[str, str]] = []
def get_task(self, task_id: str) -> Task | None:
return self.tasks.get(task_id)
def get_task_context(self, task_id: str): # noqa: ANN201
if self.session_key is None or task_id not in self.tasks:
return None
return SimpleNamespace(task_id=task_id, session_key=self.session_key)
def list_task_contexts_by_session_key(self, session_key: str): # noqa: ANN201
if session_key != self.session_key:
return []
return [SimpleNamespace(task_id=task_id, session_key=session_key) for task_id in self.tasks]
def delete_artifacts(self, task_id: str, artifact_type: str) -> None:
self.deleted_artifacts.append((task_id, artifact_type))
def delete_artifact_by_path(self, task_id: str, path: str) -> None:
self.deleted_artifact_paths.append((task_id, path))
def _make_task(task_id: str, root: Path) -> Task:
now = utc_now_iso()
work_dir = root / task_id
work_dir.mkdir(parents=True)
source = work_dir / "source.mp4"
source.write_bytes(b"source")
for dirname in ("split_video", "publish_video"):
video_dir = work_dir / dirname
video_dir.mkdir()
(video_dir / "01_song.mp4").write_bytes(b"clip")
return Task(task_id, "local_file", str(source), task_id, "collection_synced", now, now)
class WorkspaceCleanupServiceTests(unittest.TestCase):
def test_cleanup_removes_source_split_and_publish_video_for_single_task(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
task = _make_task("task-1", root)
repo = _FakeRepo([task])
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
task.id,
{
"delete_source_video_after_collection_synced": True,
"delete_split_videos_after_collection_synced": True,
},
)
work_dir = root / "task-1"
self.assertFalse((work_dir / "source.mp4").exists())
self.assertFalse((work_dir / "split_video").exists())
self.assertFalse((work_dir / "publish_video").exists())
self.assertEqual(result["task_ids"], ["task-1"])
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video")])
self.assertEqual(repo.deleted_artifact_paths, [("task-1", str((work_dir / "source.mp4").resolve()))])
def test_cleanup_removes_all_tasks_in_same_session(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
task_1 = _make_task("task-1", root)
task_2 = _make_task("task-2", root)
repo = _FakeRepo([task_1, task_2], session_key="session-1")
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
task_1.id,
{
"delete_source_video_after_collection_synced": True,
"delete_split_videos_after_collection_synced": True,
},
)
for task_id in ("task-1", "task-2"):
work_dir = root / task_id
self.assertFalse((work_dir / "source.mp4").exists())
self.assertFalse((work_dir / "split_video").exists())
self.assertFalse((work_dir / "publish_video").exists())
self.assertEqual(result["task_ids"], ["task-1", "task-2"])
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video"), ("task-2", "clip_video")])
def test_cleanup_skips_missing_source_video(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
task = _make_task("task-1", root)
source = Path(task.source_path)
source.unlink()
repo = _FakeRepo([task])
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
task.id,
{
"delete_source_video_after_collection_synced": True,
"delete_split_videos_after_collection_synced": False,
},
)
self.assertIn(str(source.resolve()), result["skipped"])
self.assertEqual(repo.deleted_artifact_paths, [])
if __name__ == "__main__":
unittest.main()