feat: package docker deployment and publish flow
This commit is contained in:
19
.dockerignore
Normal file
19
.dockerignore
Normal file
@ -0,0 +1,19 @@
|
||||
.git
|
||||
.venv
|
||||
.pytest_cache
|
||||
__pycache__
|
||||
*.pyc
|
||||
|
||||
data/
|
||||
runtime/cookies.json
|
||||
runtime/upload_config.json
|
||||
runtime/biliup
|
||||
runtime/codex/
|
||||
runtime/logs/
|
||||
|
||||
frontend/node_modules/
|
||||
frontend/dist/
|
||||
|
||||
.env
|
||||
config/settings.json
|
||||
config/settings.staged.json
|
||||
49
.env.example
Normal file
49
.env.example
Normal file
@ -0,0 +1,49 @@
|
||||
# Web/API port exposed on the host.
|
||||
BILIUP_NEXT_PORT=8000
|
||||
|
||||
# Image used by both api and worker. Override this when using a versioned tag
|
||||
# or a private registry image, for example 192.168.1.100:25490/biliup-next:20260420.
|
||||
BILIUP_NEXT_IMAGE=biliup-next:local
|
||||
|
||||
# Worker polling interval in seconds.
|
||||
WORKER_INTERVAL=5
|
||||
|
||||
# Container timezone.
|
||||
TZ=Asia/Shanghai
|
||||
|
||||
# Optional container outbound proxy. In Docker Desktop/WSL, host.docker.internal
|
||||
# points to the Windows host; set this to your local proxy port.
|
||||
# These values are also passed as Docker build args for apt/pip/npm.
|
||||
# HTTP_PROXY=http://host.docker.internal:7897
|
||||
# HTTPS_PROXY=http://host.docker.internal:7897
|
||||
# ALL_PROXY=http://host.docker.internal:7897
|
||||
# NO_PROXY=localhost,127.0.0.1,api,worker
|
||||
#
|
||||
# Docker build-time proxy. Separate names avoid being overridden by host
|
||||
# HTTP_PROXY/HTTPS_PROXY when Compose interpolates build args.
|
||||
# DOCKER_BUILD_HTTP_PROXY=http://host.docker.internal:7897
|
||||
# DOCKER_BUILD_HTTPS_PROXY=http://host.docker.internal:7897
|
||||
# DOCKER_BUILD_ALL_PROXY=http://host.docker.internal:7897
|
||||
# DOCKER_BUILD_NO_PROXY=localhost,127.0.0.1,api,worker
|
||||
|
||||
# Required for Groq transcription. Prefer this env var over writing the key
|
||||
# directly into config/settings.json.
|
||||
GROQ_API_KEY=
|
||||
# Optional key pool. Use a JSON array; keys here are tried before GROQ_API_KEY.
|
||||
# GROQ_API_KEYS=["gsk_xxx","gsk_yyy"]
|
||||
|
||||
# Optional for the Codex song detector when you do not mount an existing
|
||||
# Codex login state into runtime/codex.
|
||||
OPENAI_API_KEY=
|
||||
|
||||
# Bilibili collection IDs.
|
||||
# A: live full-video collection
|
||||
# B: live split/pure-song collection
|
||||
COLLECTION_SEASON_ID_A=7196643
|
||||
COLLECTION_SEASON_ID_B=7196624
|
||||
|
||||
# Optional explicit config overrides. The generic format is:
|
||||
# BILIUP_NEXT__GROUP__FIELD=value
|
||||
#
|
||||
# BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
|
||||
# BILIUP_NEXT__PUBLISH__RATE_LIMIT_RETRY_SCHEDULE_MINUTES=[15,30,60]
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
.venv/
|
||||
.codex
|
||||
.codex/
|
||||
.env
|
||||
.tmp-tests/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
@ -12,6 +16,7 @@ systemd/rendered/
|
||||
runtime/cookies.json
|
||||
runtime/upload_config.json
|
||||
runtime/biliup
|
||||
runtime/codex/
|
||||
runtime/logs/
|
||||
|
||||
frontend/node_modules/
|
||||
|
||||
61
Dockerfile
Normal file
61
Dockerfile
Normal file
@ -0,0 +1,61 @@
|
||||
FROM node:24-bookworm-slim AS frontend-builder
|
||||
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ARG ALL_PROXY
|
||||
ARG NO_PROXY
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
ARG all_proxy
|
||||
ARG no_proxy
|
||||
|
||||
WORKDIR /build/frontend
|
||||
COPY frontend/package*.json ./
|
||||
RUN npm ci
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
FROM python:3.12-slim AS app
|
||||
|
||||
ARG HTTP_PROXY
|
||||
ARG HTTPS_PROXY
|
||||
ARG ALL_PROXY
|
||||
ARG NO_PROXY
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
ARG all_proxy
|
||||
ARG no_proxy
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
BILIUP_NEXT_CONTAINER=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
ffmpeg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY pyproject.toml README.md ./
|
||||
COPY src ./src
|
||||
COPY config ./config
|
||||
COPY runtime/README.md runtime/cookies.example.json runtime/upload_config.example.json ./runtime/
|
||||
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
|
||||
COPY --from=frontend-builder /usr/local/bin/node /usr/local/bin/node
|
||||
COPY --from=frontend-builder /usr/local/lib/node_modules /usr/local/lib/node_modules
|
||||
|
||||
RUN pip install --editable . \
|
||||
&& pip install yt-dlp \
|
||||
&& ln -sf ../lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm \
|
||||
&& ln -sf ../lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx \
|
||||
&& npm install -g @openai/codex
|
||||
|
||||
RUN mkdir -p /app/data/workspace/stage /app/data/workspace/session /app/data/workspace/backup /app/runtime/logs /root/.codex
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
|
||||
43
README.md
43
README.md
@ -59,6 +59,10 @@ bash setup.sh
|
||||
|
||||
- `docs/cold-start-checklist.md`
|
||||
|
||||
发布流程、输出文案和评论示例见:
|
||||
|
||||
- `docs/publish-output-examples.md`
|
||||
|
||||
浏览器访问:
|
||||
|
||||
```text
|
||||
@ -192,6 +196,29 @@ cd /home/theshy/biliup/biliup-next
|
||||
- 内容按 `P1/P2/P3` 分组
|
||||
- 依赖 `full_video_bvid.txt` 或通过标题匹配解析到完整版 BV
|
||||
|
||||
评论格式和投稿文案一样,优先从 `runtime/upload_config.json` 读取。可编辑字段:
|
||||
|
||||
```json
|
||||
"comment_template": {
|
||||
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}"
|
||||
}
|
||||
```
|
||||
|
||||
常用变量:
|
||||
|
||||
- 链接:`{current_full_video_link}`、`{current_pure_video_link}`、`{previous_full_video_link}`、`{previous_pure_video_link}`
|
||||
- 分段与序号:`{part_index}`、`{song_index}`
|
||||
- 纯享歌单:`{title}`、`{artist}`、`{artist_suffix}`、`{song_text}`
|
||||
- 完整版时间轴:`{line_text}`
|
||||
|
||||
如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过。
|
||||
|
||||
清理默认关闭:
|
||||
|
||||
- `cleanup.delete_source_video_after_collection_synced = false`
|
||||
@ -201,11 +228,14 @@ cd /home/theshy/biliup/biliup-next
|
||||
|
||||
## Full Video BV Input
|
||||
|
||||
完整版 `BV` 目前支持 3 种来源:
|
||||
完整版 `BV` 目前支持 4 种来源:
|
||||
|
||||
- `stage/*.meta.json` 中的 `full_video_bvid`
|
||||
- 前端 / API 手工绑定
|
||||
- webhook:`POST /webhooks/full-video-uploaded`
|
||||
- `biliup list` 标题匹配,包含 `开放浏览` 和 `审核中` 状态
|
||||
|
||||
只要完整版上传后已经生成 BV,即使仍在审核中,也可以被用于纯享版简介、动态和评论互链。
|
||||
|
||||
推荐 webhook 负载:
|
||||
|
||||
@ -320,3 +350,14 @@ curl -X POST http://127.0.0.1:8787/tasks \
|
||||
|
||||
- `ingest.provider = bilibili_url`
|
||||
- `ingest.yt_dlp_cmd = yt-dlp`
|
||||
|
||||
## Docker Compose Deployment
|
||||
|
||||
如果希望用容器方式一键运行 API 和 worker,请参考 [README_DEPLOY.md](README_DEPLOY.md)。
|
||||
|
||||
快速入口:
|
||||
|
||||
```bash
|
||||
./scripts/init-docker-config.sh
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
176
README_DEPLOY.md
Normal file
176
README_DEPLOY.md
Normal file
@ -0,0 +1,176 @@
|
||||
# Docker Compose Deployment
|
||||
|
||||
This deployment runs the API and worker as two services from the same image.
|
||||
Runtime state, credentials, staged videos, generated sessions, and the SQLite
|
||||
database stay on the host through bind mounts.
|
||||
|
||||
## 1. Initialize Local Files
|
||||
|
||||
```bash
|
||||
chmod +x scripts/init-docker-config.sh
|
||||
./scripts/init-docker-config.sh
|
||||
```
|
||||
|
||||
This creates these files if they do not already exist:
|
||||
|
||||
```text
|
||||
.env
|
||||
config/settings.json
|
||||
runtime/cookies.json
|
||||
runtime/upload_config.json
|
||||
data/workspace/
|
||||
```
|
||||
|
||||
## 2. Edit Required Secrets And IDs
|
||||
|
||||
Edit `.env`:
|
||||
|
||||
```env
|
||||
GROQ_API_KEY=your_groq_key
|
||||
OPENAI_API_KEY=your_openai_key_if_using_codex
|
||||
COLLECTION_SEASON_ID_A=7196643
|
||||
COLLECTION_SEASON_ID_B=7196624
|
||||
```
|
||||
|
||||
Edit `runtime/cookies.json` and `runtime/upload_config.json` with real Bilibili
|
||||
credentials and upload metadata.
|
||||
|
||||
`runtime/upload_config.json` also controls pure-video title, description,
|
||||
dynamic text, and top-comment formatting. Existing deployments mount
|
||||
`./runtime` from the host, so updating the image does not overwrite this file.
|
||||
When you want to change output text, edit the host file directly.
|
||||
|
||||
Common output templates:
|
||||
|
||||
```json
|
||||
{
|
||||
"template": {
|
||||
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}\n\n本视频为歌曲纯享切片,适合只听歌曲。",
|
||||
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}"
|
||||
},
|
||||
"comment_template": {
|
||||
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次直播:{previous_full_video_link} (上一场完整录播)",
|
||||
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次直播:{previous_full_video_link} (上一场完整录播)",
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Supported comment variables:
|
||||
|
||||
- `{current_full_video_link}` / `{current_pure_video_link}`
|
||||
- `{previous_full_video_link}` / `{previous_pure_video_link}`
|
||||
- `{part_index}` / `{song_index}`
|
||||
- `{title}` / `{artist}` / `{artist_suffix}` / `{song_text}` / `{line_text}`
|
||||
|
||||
If a comment header line contains an empty link variable, that whole line is
|
||||
omitted. This prevents comments from showing blank `上次直播:` lines when the
|
||||
previous live video cannot be found.
|
||||
|
||||
Provide the `biliup` binary at:
|
||||
|
||||
```text
|
||||
runtime/biliup
|
||||
```
|
||||
|
||||
It must be executable inside the container:
|
||||
|
||||
```bash
|
||||
chmod +x runtime/biliup
|
||||
```
|
||||
|
||||
The image installs the `codex` CLI for `song_detect.provider=codex`. Provide
|
||||
Codex auth in one of these ways:
|
||||
|
||||
```text
|
||||
OPENAI_API_KEY in .env
|
||||
runtime/codex mounted to /root/.codex
|
||||
```
|
||||
|
||||
## 3. Start
|
||||
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
Open:
|
||||
|
||||
```text
|
||||
http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
Drop videos into:
|
||||
|
||||
```text
|
||||
data/workspace/stage/
|
||||
```
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
docker compose logs -f api
|
||||
docker compose logs -f worker
|
||||
docker compose restart worker
|
||||
docker compose down
|
||||
```
|
||||
|
||||
Run one scheduler cycle manually:
|
||||
|
||||
```bash
|
||||
docker compose run --rm worker biliup-next run-once
|
||||
```
|
||||
|
||||
Run doctor:
|
||||
|
||||
```bash
|
||||
docker compose run --rm api biliup-next doctor
|
||||
```
|
||||
|
||||
## Environment Overrides
|
||||
|
||||
`config/settings.json` is still the base configuration. Environment variables
|
||||
override selected values at runtime.
|
||||
|
||||
The Compose file already forces container-safe paths such as
|
||||
`/app/data/workspace` and `/app/runtime/cookies.json`, so an existing local
|
||||
`config/settings.json` with host paths can still be mounted safely.
|
||||
|
||||
Generic format:
|
||||
|
||||
```text
|
||||
BILIUP_NEXT__GROUP__FIELD=value
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
```env
|
||||
BILIUP_NEXT__PATHS__STAGE_DIR=/app/data/workspace/stage
|
||||
BILIUP_NEXT__PUBLISH__BILIUP_PATH=/app/runtime/biliup
|
||||
BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
|
||||
```
|
||||
|
||||
Convenience aliases:
|
||||
|
||||
```env
|
||||
GROQ_API_KEY=...
|
||||
COLLECTION_SEASON_ID_A=7196643
|
||||
COLLECTION_SEASON_ID_B=7196624
|
||||
```
|
||||
|
||||
## Data Persistence
|
||||
|
||||
These host paths are mounted into the containers:
|
||||
|
||||
```text
|
||||
./config -> /app/config
|
||||
./runtime -> /app/runtime
|
||||
./data/workspace -> /app/data/workspace
|
||||
```
|
||||
|
||||
Do not store `cookies.json`, Groq keys, or generated workspace data in the image.
|
||||
They should stay in the mounted host directories.
|
||||
127
config/settings.docker.example.json
Normal file
127
config/settings.docker.example.json
Normal file
@ -0,0 +1,127 @@
|
||||
{
|
||||
"runtime": {
|
||||
"database_path": "/app/data/workspace/biliup_next.db",
|
||||
"control_token": "",
|
||||
"log_level": "INFO"
|
||||
},
|
||||
"paths": {
|
||||
"stage_dir": "/app/data/workspace/stage",
|
||||
"backup_dir": "/app/data/workspace/backup",
|
||||
"session_dir": "/app/data/workspace/session",
|
||||
"cookies_file": "/app/runtime/cookies.json",
|
||||
"upload_config_file": "/app/runtime/upload_config.json"
|
||||
},
|
||||
"scheduler": {
|
||||
"candidate_scan_limit": 500,
|
||||
"max_tasks_per_cycle": 50,
|
||||
"prioritize_retry_due": true,
|
||||
"oldest_first": true,
|
||||
"status_priority": [
|
||||
"failed_retryable",
|
||||
"created",
|
||||
"transcribed",
|
||||
"songs_detected",
|
||||
"split_done",
|
||||
"published",
|
||||
"commented",
|
||||
"collection_synced"
|
||||
]
|
||||
},
|
||||
"ingest": {
|
||||
"provider": "local_file",
|
||||
"min_duration_seconds": 900,
|
||||
"ffprobe_bin": "ffprobe",
|
||||
"yt_dlp_cmd": "yt-dlp",
|
||||
"yt_dlp_format": "",
|
||||
"allowed_extensions": [
|
||||
".mp4",
|
||||
".flv",
|
||||
".mkv",
|
||||
".mov"
|
||||
],
|
||||
"stage_min_free_space_mb": 1024,
|
||||
"stability_wait_seconds": 30,
|
||||
"session_gap_minutes": 60,
|
||||
"meta_sidecar_enabled": true,
|
||||
"meta_sidecar_suffix": ".meta.json"
|
||||
},
|
||||
"transcribe": {
|
||||
"provider": "groq",
|
||||
"groq_api_key": "",
|
||||
"groq_api_keys": [],
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 12,
|
||||
"request_timeout_seconds": 180,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 30,
|
||||
"serialize_groq_requests": true,
|
||||
"retry_count": 3,
|
||||
"retry_backoff_seconds": 300,
|
||||
"retry_schedule_minutes": [
|
||||
5,
|
||||
10,
|
||||
15
|
||||
]
|
||||
},
|
||||
"song_detect": {
|
||||
"provider": "codex",
|
||||
"codex_cmd": "codex",
|
||||
"qwen_cmd": "qwen",
|
||||
"poll_interval_seconds": 2,
|
||||
"retry_count": 3,
|
||||
"retry_backoff_seconds": 300,
|
||||
"retry_schedule_minutes": [
|
||||
5,
|
||||
10,
|
||||
15
|
||||
]
|
||||
},
|
||||
"split": {
|
||||
"provider": "ffmpeg_copy",
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"poll_interval_seconds": 2,
|
||||
"min_free_space_mb": 2048
|
||||
},
|
||||
"publish": {
|
||||
"provider": "biliup_cli",
|
||||
"biliup_path": "/app/runtime/biliup",
|
||||
"cookie_file": "/app/runtime/cookies.json",
|
||||
"retry_count": 5,
|
||||
"retry_schedule_minutes": [
|
||||
15,
|
||||
5,
|
||||
5,
|
||||
5,
|
||||
5
|
||||
],
|
||||
"retry_backoff_seconds": 300,
|
||||
"command_timeout_seconds": 1800,
|
||||
"rate_limit_retry_schedule_minutes": [
|
||||
15,
|
||||
30,
|
||||
60
|
||||
]
|
||||
},
|
||||
"comment": {
|
||||
"provider": "bilibili_top_comment",
|
||||
"enabled": true,
|
||||
"max_retries": 5,
|
||||
"base_delay_seconds": 180,
|
||||
"poll_interval_seconds": 10,
|
||||
"post_split_comment": true,
|
||||
"post_full_video_timeline_comment": true
|
||||
},
|
||||
"collection": {
|
||||
"provider": "bilibili_collection",
|
||||
"enabled": true,
|
||||
"season_id_a": 7196643,
|
||||
"season_id_b": 7196624,
|
||||
"allow_fuzzy_full_video_match": false,
|
||||
"append_collection_a_new_to_end": true,
|
||||
"append_collection_b_new_to_end": true
|
||||
},
|
||||
"cleanup": {
|
||||
"delete_source_video_after_collection_synced": false,
|
||||
"delete_split_videos_after_collection_synced": false
|
||||
}
|
||||
}
|
||||
@ -1,15 +1,15 @@
|
||||
{
|
||||
"runtime": {
|
||||
"database_path": "data/workspace/biliup_next.db",
|
||||
"database_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/biliup_next.db",
|
||||
"control_token": "",
|
||||
"log_level": "INFO"
|
||||
},
|
||||
"paths": {
|
||||
"stage_dir": "data/workspace/stage",
|
||||
"backup_dir": "data/workspace/backup",
|
||||
"session_dir": "data/workspace/session",
|
||||
"cookies_file": "runtime/cookies.json",
|
||||
"upload_config_file": "runtime/upload_config.json"
|
||||
"stage_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/stage",
|
||||
"backup_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/backup",
|
||||
"session_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/session",
|
||||
"cookies_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
|
||||
"upload_config_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/upload_config.json"
|
||||
},
|
||||
"scheduler": {
|
||||
"candidate_scan_limit": 500,
|
||||
@ -31,7 +31,7 @@
|
||||
"provider": "local_file",
|
||||
"min_duration_seconds": 900,
|
||||
"ffprobe_bin": "ffprobe",
|
||||
"yt_dlp_cmd": "yt-dlp",
|
||||
"yt_dlp_cmd": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/.venv/bin/yt-dlp",
|
||||
"yt_dlp_format": "",
|
||||
"allowed_extensions": [
|
||||
".mp4",
|
||||
@ -47,15 +47,34 @@
|
||||
},
|
||||
"transcribe": {
|
||||
"provider": "groq",
|
||||
"groq_api_key": "",
|
||||
"groq_api_key": "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",
|
||||
"groq_api_keys": [],
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23
|
||||
"max_file_size_mb": 12,
|
||||
"request_timeout_seconds": 180,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 30,
|
||||
"serialize_groq_requests": true,
|
||||
"retry_count": 3,
|
||||
"retry_backoff_seconds": 300,
|
||||
"retry_schedule_minutes": [
|
||||
5,
|
||||
10,
|
||||
15
|
||||
]
|
||||
},
|
||||
"song_detect": {
|
||||
"provider": "qwen_cli",
|
||||
"provider": "codex",
|
||||
"codex_cmd": "codex",
|
||||
"qwen_cmd": "qwen",
|
||||
"poll_interval_seconds": 2
|
||||
"poll_interval_seconds": 2,
|
||||
"retry_count": 3,
|
||||
"retry_backoff_seconds": 300,
|
||||
"retry_schedule_minutes": [
|
||||
5,
|
||||
10,
|
||||
15
|
||||
]
|
||||
},
|
||||
"split": {
|
||||
"provider": "ffmpeg_copy",
|
||||
@ -65,8 +84,8 @@
|
||||
},
|
||||
"publish": {
|
||||
"provider": "biliup_cli",
|
||||
"biliup_path": "runtime/biliup",
|
||||
"cookie_file": "runtime/cookies.json",
|
||||
"biliup_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/biliup",
|
||||
"cookie_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
|
||||
"retry_count": 5,
|
||||
"retry_schedule_minutes": [
|
||||
15,
|
||||
@ -78,9 +97,9 @@
|
||||
"retry_backoff_seconds": 300,
|
||||
"command_timeout_seconds": 1800,
|
||||
"rate_limit_retry_schedule_minutes": [
|
||||
15,
|
||||
30,
|
||||
60,
|
||||
120
|
||||
60
|
||||
]
|
||||
},
|
||||
"comment": {
|
||||
@ -95,8 +114,8 @@
|
||||
"collection": {
|
||||
"provider": "bilibili_collection",
|
||||
"enabled": true,
|
||||
"season_id_a": 0,
|
||||
"season_id_b": 0,
|
||||
"season_id_a": 7196643,
|
||||
"season_id_b": 7196624,
|
||||
"allow_fuzzy_full_video_match": false,
|
||||
"append_collection_a_new_to_end": true,
|
||||
"append_collection_b_new_to_end": true
|
||||
|
||||
@ -229,6 +229,16 @@
|
||||
"description": "用于调用 Groq 转录 API。",
|
||||
"sensitive": true
|
||||
},
|
||||
"groq_api_keys": {
|
||||
"type": "array",
|
||||
"default": [],
|
||||
"title": "Groq API Keys",
|
||||
"ui_order": 12,
|
||||
"ui_widget": "secret_list",
|
||||
"items": { "type": "string" },
|
||||
"description": "可选 Groq API Key 池。遇到单个 key 限流时会自动切换下一个 key;为空时使用 groq_api_key。",
|
||||
"sensitive": true
|
||||
},
|
||||
"ffmpeg_bin": {
|
||||
"type": "string",
|
||||
"default": "ffmpeg",
|
||||
@ -238,10 +248,66 @@
|
||||
},
|
||||
"max_file_size_mb": {
|
||||
"type": "integer",
|
||||
"default": 23,
|
||||
"default": 12,
|
||||
"title": "Max File Size MB",
|
||||
"ui_order": 40,
|
||||
"minimum": 1
|
||||
"minimum": 1,
|
||||
"description": "Groq 音频分片目标上限。实际切分会额外保留安全余量,避免贴近上传限制。"
|
||||
},
|
||||
"request_timeout_seconds": {
|
||||
"type": "integer",
|
||||
"default": 180,
|
||||
"title": "Request Timeout Seconds",
|
||||
"ui_order": 50,
|
||||
"minimum": 1,
|
||||
"description": "单个 Groq 转录请求的超时时间。"
|
||||
},
|
||||
"request_max_retries": {
|
||||
"type": "integer",
|
||||
"default": 1,
|
||||
"title": "Request Max Retries",
|
||||
"ui_order": 60,
|
||||
"minimum": 0,
|
||||
"description": "单个音频分片在超时、限流或连接错误时的请求级重试次数。"
|
||||
},
|
||||
"request_retry_backoff_seconds": {
|
||||
"type": "integer",
|
||||
"default": 30,
|
||||
"title": "Request Retry Backoff Seconds",
|
||||
"ui_order": 70,
|
||||
"minimum": 0,
|
||||
"description": "Groq 请求级重试之间的等待时间。"
|
||||
},
|
||||
"serialize_groq_requests": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"title": "Serialize Groq Requests",
|
||||
"ui_order": 75,
|
||||
"description": "是否串行化 Groq 分片上传请求,避免多个 worker 或多个任务同时上传导致超时。"
|
||||
},
|
||||
"retry_count": {
|
||||
"type": "integer",
|
||||
"default": 3,
|
||||
"title": "Task Retry Count",
|
||||
"ui_order": 80,
|
||||
"minimum": 0,
|
||||
"description": "transcribe 步骤允许的任务级失败重试次数。"
|
||||
},
|
||||
"retry_backoff_seconds": {
|
||||
"type": "integer",
|
||||
"default": 300,
|
||||
"title": "Task Retry Backoff Seconds",
|
||||
"ui_order": 90,
|
||||
"minimum": 0,
|
||||
"description": "未配置 retry_schedule_minutes 时,transcribe 任务级重试的等待时间。"
|
||||
},
|
||||
"retry_schedule_minutes": {
|
||||
"type": "array",
|
||||
"default": [5, 10, 15],
|
||||
"title": "Task Retry Schedule Minutes",
|
||||
"ui_order": 100,
|
||||
"items": { "type": "integer", "minimum": 0 },
|
||||
"description": "transcribe 任务级失败后的自动重试等待时间。"
|
||||
}
|
||||
},
|
||||
"song_detect": {
|
||||
@ -275,6 +341,30 @@
|
||||
"title": "Poll Interval Seconds",
|
||||
"ui_order": 30,
|
||||
"minimum": 1
|
||||
},
|
||||
"retry_count": {
|
||||
"type": "integer",
|
||||
"default": 3,
|
||||
"title": "Task Retry Count",
|
||||
"ui_order": 40,
|
||||
"minimum": 0,
|
||||
"description": "song_detect 步骤允许的任务级失败重试次数。认证失败会直接进入人工失败,不会重试。"
|
||||
},
|
||||
"retry_backoff_seconds": {
|
||||
"type": "integer",
|
||||
"default": 300,
|
||||
"title": "Task Retry Backoff Seconds",
|
||||
"ui_order": 50,
|
||||
"minimum": 0,
|
||||
"description": "未配置 retry_schedule_minutes 时,song_detect 任务级重试的等待时间。"
|
||||
},
|
||||
"retry_schedule_minutes": {
|
||||
"type": "array",
|
||||
"default": [5, 10, 15],
|
||||
"title": "Task Retry Schedule Minutes",
|
||||
"ui_order": 60,
|
||||
"items": { "type": "integer", "minimum": 0 },
|
||||
"description": "song_detect 任务级失败后的自动重试等待时间。"
|
||||
}
|
||||
},
|
||||
"split": {
|
||||
@ -375,9 +465,9 @@
|
||||
"rate_limit_retry_schedule_minutes": {
|
||||
"type": "array",
|
||||
"default": [
|
||||
15,
|
||||
30,
|
||||
60,
|
||||
120
|
||||
60
|
||||
],
|
||||
"title": "Rate Limit Retry Schedule Minutes",
|
||||
"ui_order": 70,
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
"transcribe": {
|
||||
"provider": "groq",
|
||||
"groq_api_key": "",
|
||||
"groq_api_keys": [],
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23
|
||||
},
|
||||
|
||||
74
docker-compose.yml
Normal file
74
docker-compose.yml
Normal file
@ -0,0 +1,74 @@
|
||||
services:
|
||||
api:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
|
||||
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
|
||||
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
|
||||
command: ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
|
||||
env_file:
|
||||
- path: .env
|
||||
required: false
|
||||
environment:
|
||||
TZ: ${TZ:-Asia/Shanghai}
|
||||
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
|
||||
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
|
||||
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
|
||||
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
|
||||
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
|
||||
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
|
||||
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
|
||||
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
|
||||
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
|
||||
ports:
|
||||
- "${BILIUP_NEXT_PORT:-8000}:8000"
|
||||
volumes:
|
||||
- ./config:/app/config
|
||||
- ./runtime:/app/runtime
|
||||
- ./data/workspace:/app/data/workspace
|
||||
- ./runtime/codex:/root/.codex
|
||||
restart: unless-stopped
|
||||
|
||||
worker:
|
||||
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
|
||||
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
|
||||
command: ["sh", "-c", "biliup-next worker --interval ${WORKER_INTERVAL:-5}"]
|
||||
env_file:
|
||||
- path: .env
|
||||
required: false
|
||||
environment:
|
||||
TZ: ${TZ:-Asia/Shanghai}
|
||||
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
|
||||
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
|
||||
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
|
||||
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
|
||||
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
|
||||
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
|
||||
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
|
||||
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
|
||||
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
|
||||
volumes:
|
||||
- ./config:/app/config
|
||||
- ./runtime:/app/runtime
|
||||
- ./data/workspace:/app/data/workspace
|
||||
- ./runtime/codex:/root/.codex
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- api
|
||||
@ -155,6 +155,60 @@ User edits config
|
||||
- `base_delay_seconds`
|
||||
- `poll_interval_seconds`
|
||||
|
||||
## Upload And Comment Templates
|
||||
|
||||
`paths.upload_config_file` 指向 `runtime/upload_config.json`。这个文件不只控制 `biliup upload` 的标题、简介、动态和标签,也控制 B 站置顶评论格式。
|
||||
|
||||
投稿字段在 `template` 中:
|
||||
|
||||
```json
|
||||
{
|
||||
"template": {
|
||||
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}",
|
||||
"tag": "可爱,王海颖,唱歌,音乐",
|
||||
"dynamic": "{streamer} {date} 歌曲纯享版已发布。\n直播完整版:{current_full_video_link}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
评论字段在 `comment_template` 中:
|
||||
|
||||
```json
|
||||
{
|
||||
"comment_template": {
|
||||
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
可用变量:
|
||||
|
||||
- `streamer`:主播名。
|
||||
- `date`:从文件名解析出来的日期和时间。
|
||||
- `song_count`:识别到的歌曲数量。
|
||||
- `songs_list`:`songs.txt` 原始歌单内容。
|
||||
- `daily_quote` / `quote_author`:随机引用文本。
|
||||
- `current_full_video_bvid` / `current_full_video_link`:本场直播完整版 BV 和链接。
|
||||
- `current_pure_video_bvid` / `current_pure_video_link`:本场歌曲纯享版 BV 和链接。
|
||||
- `previous_full_video_bvid` / `previous_full_video_link`:上一场直播完整版 BV 和链接。
|
||||
- `previous_pure_video_bvid` / `previous_pure_video_link`:上一场歌曲纯享版 BV 和链接。
|
||||
- `part_index`:评论中的 `P1/P2/P3` 分段序号。
|
||||
- `song_index`:全局歌曲序号。
|
||||
- `title` / `artist` / `artist_suffix`:从 `songs.json` 生成纯享歌单时使用。
|
||||
- `song_text`:从 `songs.txt` 兜底生成纯享歌单时使用,通常不含时间戳。
|
||||
- `line_text`:完整版时间轴的原始行,通常包含时间戳。
|
||||
|
||||
评论头部模板有一条额外规则:如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过,避免发出空链接提示。
|
||||
|
||||
Docker 部署时 `./runtime` 是宿主机挂载目录。镜像更新不会覆盖已有 `runtime/upload_config.json`,因此调整文案或评论格式时应修改宿主机上的这个文件,然后重启容器。
|
||||
|
||||
### collection
|
||||
|
||||
- `enabled`
|
||||
|
||||
@ -75,7 +75,7 @@
|
||||
"platform": "bilibili",
|
||||
"aid": 123456,
|
||||
"bvid": "BV1xxxx",
|
||||
"title": "【王海颖 (歌曲纯享版)】_03月29日 22时02分 共18首歌",
|
||||
"title": "【王海颖 (歌曲纯享版)】 03月29日 22时02分 共18首歌",
|
||||
"published_at": "2026-03-30T07:56:13+08:00"
|
||||
}
|
||||
```
|
||||
|
||||
321
docs/publish-output-examples.md
Normal file
321
docs/publish-output-examples.md
Normal file
@ -0,0 +1,321 @@
|
||||
# 发布输出示例与流程说明
|
||||
|
||||
本文档面向使用者说明 `biliup-next` 的主流程、输入输出、当前已实现功能,以及一次多段同场直播发布后的示例文案。
|
||||
|
||||
## 项目功能
|
||||
|
||||
`biliup-next` 将一场直播录播拆成两个最终发布目标:
|
||||
|
||||
- 直播完整版:由外部流程或人工上传到 B 站,本项目负责记录/绑定它的 BV 号,并给它补充置顶时间轴评论、加入完整版合集。
|
||||
- 歌曲纯享版:由本项目从直播录播中识别歌曲、切出歌曲片段、合并发布为一个分 P 视频,并给它补充置顶歌单评论、加入纯享版合集。
|
||||
|
||||
当前主链路:
|
||||
|
||||
```text
|
||||
stage 输入视频
|
||||
-> ingest 导入并归并 session
|
||||
-> transcribe 语音转字幕
|
||||
-> song_detect 识别歌曲
|
||||
-> split 切出歌曲片段
|
||||
-> publish 发布歌曲纯享版
|
||||
-> comment 发布/置顶评论
|
||||
-> collection 加入合集
|
||||
```
|
||||
|
||||
## 输入
|
||||
|
||||
最常见输入是把录播视频放入 `data/workspace/stage/`。
|
||||
|
||||
支持的形式:
|
||||
|
||||
- 单个视频文件:一场直播只有一个录播文件。
|
||||
- 多个视频文件:同一场直播被分成多段录播文件。
|
||||
- 浏览器上传:通过控制台上传到 stage。
|
||||
- 本机复制:通过控制台把服务器上的文件复制到 stage。
|
||||
|
||||
输入文件名会用于推测主播和直播开始时间,例如:
|
||||
|
||||
```text
|
||||
王海颖唱歌录播 04月19日 22时10分.mp4
|
||||
王海颖唱歌录播 04月19日 23时05分.mp4
|
||||
王海颖唱歌录播 04月20日 00时01分.mp4
|
||||
```
|
||||
|
||||
## Session 归并
|
||||
|
||||
同一主播、时间接近的多个录播片段会归入同一个 session。
|
||||
|
||||
同一 session 的行为:
|
||||
|
||||
- 只发布一个歌曲纯享版 BV。
|
||||
- 多段录播的歌曲会按时间顺序聚合。
|
||||
- 评论按 `P1`、`P2`、`P3` 分段展示。
|
||||
- 歌曲序号全局递增,不在每个 P 内重新从 1 开始。
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
P1:
|
||||
1. 程艾影 — 赵雷
|
||||
2. 钟无艳 — 谢安琪
|
||||
|
||||
P2:
|
||||
3. 慢慢喜欢你 — 莫文蔚
|
||||
|
||||
P3:
|
||||
4. 空白格 — 蔡健雅
|
||||
```
|
||||
|
||||
## BV 获取
|
||||
|
||||
### 歌曲纯享版 BV
|
||||
|
||||
歌曲纯享版由本项目调用 `biliup upload` 发布。
|
||||
|
||||
发布成功后,项目会从 `biliup` 输出中提取 BV 号,并写入当前 session 目录:
|
||||
|
||||
```text
|
||||
bvid.txt
|
||||
```
|
||||
|
||||
这个 BV 会用于:
|
||||
|
||||
- 纯享版评论发布。
|
||||
- 完整版评论顶部反向链接。
|
||||
- 纯享版合集同步。
|
||||
|
||||
### 直播完整版 BV
|
||||
|
||||
完整版 BV 可以来自三种方式:
|
||||
|
||||
- 控制台手动绑定。
|
||||
- API/webhook 传入。
|
||||
- `biliup list` 标题匹配。
|
||||
|
||||
`biliup list` 会同时接受 `开放浏览` 和 `审核中` 状态。完整版视频只要上传后生成了 BV,即使仍在审核中,也可以被写入纯享版简介、动态和评论互链。
|
||||
|
||||
成功解析后会写入:
|
||||
|
||||
```text
|
||||
full_video_bvid.txt
|
||||
```
|
||||
|
||||
默认标题匹配是保守的精确匹配:会先去掉空格、标点、括号、冒号等,只保留中文、英文、数字,再比较标题是否相等。
|
||||
|
||||
如果 `allow_fuzzy_full_video_match=false`,不会做包含式模糊匹配。为了避免误匹配,推荐在完整版上传完成后手动绑定 BV。
|
||||
|
||||
## 示例场景
|
||||
|
||||
假设本次直播由三段录播组成:
|
||||
|
||||
```text
|
||||
王海颖唱歌录播 04月19日 22时10分
|
||||
王海颖唱歌录播 04月19日 23时05分
|
||||
王海颖唱歌录播 04月20日 00时01分
|
||||
```
|
||||
|
||||
假设 BV 绑定结果如下:
|
||||
|
||||
```text
|
||||
本次直播完整版:BVFULLCURR
|
||||
本次歌曲纯享版:BVPURECURR
|
||||
上次直播完整版:BVFULLPREV
|
||||
```
|
||||
|
||||
假设识别出的歌曲如下:
|
||||
|
||||
```text
|
||||
P1:
|
||||
00:06:32 程艾影 — 赵雷
|
||||
00:14:45 钟无艳 — 谢安琪
|
||||
|
||||
P2:
|
||||
00:20:57 慢慢喜欢你 — 莫文蔚
|
||||
|
||||
P3:
|
||||
00:27:16 空白格 — 蔡健雅
|
||||
```
|
||||
|
||||
## 歌曲纯享版标题
|
||||
|
||||
当前模板:
|
||||
|
||||
```text
|
||||
【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌
|
||||
```
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
【王海颖 (歌曲纯享版)】 04月19日 22时10分 共4首歌
|
||||
```
|
||||
|
||||
## 歌曲纯享版简介
|
||||
|
||||
当前模板会保持简介较短,完整歌单放到置顶评论中,避免 B 站简介截断。
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
王海颖 04月19日 22时10分 歌曲纯享版。
|
||||
|
||||
完整歌单与时间轴见置顶评论。
|
||||
直播完整版:https://www.bilibili.com/video/BVFULLCURR
|
||||
上次直播:https://www.bilibili.com/video/BVFULLPREV
|
||||
|
||||
本视频为歌曲纯享切片,适合只听歌曲。
|
||||
```
|
||||
|
||||
如果某个链接暂时没有 BV,项目会自动移除对应的空链接行。
|
||||
|
||||
## 歌曲纯享版动态
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
王海颖 04月19日 22时10分 歌曲纯享版已发布。完整歌单见置顶评论。
|
||||
直播完整版:https://www.bilibili.com/video/BVFULLCURR
|
||||
上次直播:https://www.bilibili.com/video/BVFULLPREV
|
||||
```
|
||||
|
||||
## 歌曲纯享版置顶评论
|
||||
|
||||
纯享版评论主要给听歌用户看,不带歌曲时间轴,只展示歌名、歌手和互链。
|
||||
|
||||
默认由 `runtime/upload_config.json` 的 `comment_template.split_header`、`comment_template.split_part_header`、`comment_template.split_song_line` 生成。
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。
|
||||
直播完整版:https://www.bilibili.com/video/BVFULLCURR (完整录播,含聊天/互动/完整流程)
|
||||
上次纯享:https://www.bilibili.com/video/BVPUREPREV (上一场歌曲纯享版)
|
||||
|
||||
P1:
|
||||
1. 程艾影 — 赵雷
|
||||
2. 钟无艳 — 谢安琪
|
||||
|
||||
P2:
|
||||
3. 慢慢喜欢你 — 莫文蔚
|
||||
|
||||
P3:
|
||||
4. 空白格 — 蔡健雅
|
||||
```
|
||||
|
||||
## 直播完整版置顶评论
|
||||
|
||||
完整版评论主要给看完整录播的用户跳转歌曲纯享版,并提供完整时间轴。
|
||||
|
||||
默认由 `runtime/upload_config.json` 的 `comment_template.full_header`、`comment_template.full_part_header`、`comment_template.full_timeline_line` 生成。
|
||||
|
||||
示例:
|
||||
|
||||
```text
|
||||
当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。
|
||||
歌曲纯享版:https://www.bilibili.com/video/BVPURECURR (只听歌曲看这里)
|
||||
上次完整版:https://www.bilibili.com/video/BVFULLPREV (上一场完整录播)
|
||||
|
||||
P1:
|
||||
1. 00:06:32 程艾影 — 赵雷
|
||||
2. 00:14:45 钟无艳 — 谢安琪
|
||||
|
||||
P2:
|
||||
3. 00:20:57 慢慢喜欢你 — 莫文蔚
|
||||
|
||||
P3:
|
||||
4. 00:27:16 空白格 — 蔡健雅
|
||||
```
|
||||
|
||||
## 评论格式配置
|
||||
|
||||
评论格式可以像标题、简介、动态一样通过 `runtime/upload_config.json` 修改:
|
||||
|
||||
```json
|
||||
"comment_template": {
|
||||
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}"
|
||||
}
|
||||
```
|
||||
|
||||
字段含义:
|
||||
|
||||
- `split_header`:纯享版评论顶部说明。
|
||||
- `full_header`:完整版评论顶部说明。
|
||||
- `split_part_header` / `full_part_header`:多片段 session 的分段标题,例如 `P1:`。
|
||||
- `split_song_line`:从 `songs.json` 生成纯享歌单时的单行格式。
|
||||
- `split_text_song_line`:`songs.json` 不可用时,从 `songs.txt` 兜底生成纯享歌单的单行格式。
|
||||
- `full_timeline_line`:完整版时间轴评论的单行格式。
|
||||
|
||||
常用变量:
|
||||
|
||||
- `{current_full_video_link}`:本场直播完整版链接。
|
||||
- `{current_pure_video_link}`:本场歌曲纯享版链接。
|
||||
- `{previous_full_video_link}`:上一场直播完整版链接。
|
||||
- `{previous_pure_video_link}`:上一场歌曲纯享版链接。
|
||||
- `{part_index}`:P 分段序号。
|
||||
- `{song_index}`:歌曲全局序号。
|
||||
- `{title}` / `{artist}` / `{artist_suffix}`:歌曲标题、歌手、带分隔符的歌手后缀。
|
||||
- `{song_text}`:不带时间戳的歌曲文本。
|
||||
- `{line_text}`:原始时间轴行,通常包含时间戳。
|
||||
|
||||
如果评论头部某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动省略。
|
||||
|
||||
## 合集同步
|
||||
|
||||
项目维护两个合集目标:
|
||||
|
||||
- 合集 A:直播完整版。
|
||||
- 合集 B:歌曲纯享版。
|
||||
|
||||
当前配置中的示例 ID:
|
||||
|
||||
```text
|
||||
直播完整版合集:7196643
|
||||
歌曲纯享版合集:7196624
|
||||
```
|
||||
|
||||
合集同步完成后,如果启用了清理策略,项目可以删除本地原视频或切片视频以节省空间。当前默认不删除。
|
||||
|
||||
## 幂等与重试
|
||||
|
||||
项目会在 session 目录写入标记文件,避免重复上传和重复评论。
|
||||
|
||||
常见标记:
|
||||
|
||||
```text
|
||||
bvid.txt
|
||||
full_video_bvid.txt
|
||||
upload_done.flag
|
||||
comment_split_done.flag
|
||||
comment_full_done.flag
|
||||
collection_a_done.flag
|
||||
collection_b_done.flag
|
||||
```
|
||||
|
||||
发布阶段的关键行为:
|
||||
|
||||
- 首批最多上传 5 个分 P。
|
||||
- 超过 5 个分 P 时,后续通过 append 追加。
|
||||
- 已经写入 `bvid.txt` 后,重试会优先 append 到已有视频,而不是重新发布。
|
||||
- `publish_progress.json` 记录 append 进度,避免重试时重复追加已完成批次。
|
||||
|
||||
评论阶段的关键行为:
|
||||
|
||||
- 同一 session 只由最早片段负责聚合评论。
|
||||
- 非 anchor 片段进入评论步骤时会跳过实际发评。
|
||||
- 这样可以避免同一场直播的多个片段重复发布相同评论。
|
||||
|
||||
## 使用建议
|
||||
|
||||
发布前建议确认:
|
||||
|
||||
- stage 中的视频文件名能解析出主播和时间。
|
||||
- `runtime/upload_config.json` 中标题、简介、动态符合预期。
|
||||
- 完整版上传完成后,尽量手动绑定 `full_video_bvid`。
|
||||
- worker 重启前确认已有 `bvid.txt` 和 `publish_progress.json` 是否符合当前发布进度。
|
||||
- 如需自动匹配完整版 BV,确认 `biliup list` 中完整视频标题与任务标题标准化后相等。
|
||||
@ -25,3 +25,11 @@ cd /home/theshy/biliup/biliup-next
|
||||
- `upload_config.json` <- `upload_config.example.json`
|
||||
|
||||
它们只用于占位,能保证项目进入“可配置、可 doctor”的状态,但不代表上传链路已经可用。
|
||||
|
||||
`upload_config.json` 同时控制:
|
||||
|
||||
- 纯享版投稿标题、简介、动态、标签:`template`
|
||||
- 纯享版和完整版置顶评论格式:`comment_template`
|
||||
- 文件名解析规则:`filename_patterns`
|
||||
|
||||
Docker 部署时这个目录通常会作为 `./runtime:/app/runtime` 挂载到容器内。镜像更新不会覆盖已有 `upload_config.json`,所以修改评论、动态、简介格式时,应直接改宿主机上的 `runtime/upload_config.json`。
|
||||
|
||||
@ -1,5 +1,95 @@
|
||||
{
|
||||
"line": "AUTO",
|
||||
"limit": 3,
|
||||
"threads": 3
|
||||
"comment": "B站投稿配置文件 - 根据您的需要修改模板内容",
|
||||
"upload_settings": {
|
||||
"tid": 31,
|
||||
"copyright": 1,
|
||||
"source": "王海颖好听的歌声分享",
|
||||
"cover": ""
|
||||
},
|
||||
"template": {
|
||||
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}\n\n本视频为歌曲纯享切片,适合只听歌曲。",
|
||||
"tag": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐",
|
||||
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}"
|
||||
},
|
||||
"comment_template": {
|
||||
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}"
|
||||
},
|
||||
"streamers": {
|
||||
"王海颖": {
|
||||
"display_name": "王海颖",
|
||||
"tags": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐"
|
||||
},
|
||||
"示例主播": {
|
||||
"display_name": "示例主播",
|
||||
"tags": "示例,标签1,标签2,唱歌,音乐"
|
||||
}
|
||||
},
|
||||
"quotes": [
|
||||
{
|
||||
"text": "此心安处是吾乡。",
|
||||
"author": "苏轼《定风波·南海归赠王定国侍人寓娘》"
|
||||
},
|
||||
{
|
||||
"text": "山重水复疑无路,柳暗花明又一村。",
|
||||
"author": "陆游《游山西村》"
|
||||
},
|
||||
{
|
||||
"text": "长风破浪会有时,直挂云帆济沧海。",
|
||||
"author": "李白《行路难·其一》"
|
||||
}
|
||||
],
|
||||
"filename_patterns": {
|
||||
"comment": "从文件名提取信息的正则表达式模式 - 按优先级从高到低排列",
|
||||
"patterns": [
|
||||
{
|
||||
"name": "主播名唱歌录播 日期 时间",
|
||||
"regex": "^(?P<streamer>.+?)唱歌录播 (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分",
|
||||
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||
"example": "王海颖唱歌录播 01月28日 22时06分"
|
||||
},
|
||||
{
|
||||
"name": "日期 时间 主播名 唱歌录播",
|
||||
"regex": "^(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 (?P<streamer>.+?)唱歌录播",
|
||||
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||
"example": "01月25日 09时20分 王海颖唱歌录播"
|
||||
},
|
||||
{
|
||||
"name": "主播名唱歌录播: 年月日 时分 [BV号]",
|
||||
"regex": "^(?P<streamer>.+?)唱歌录播[::] (?P<year>\\d{4})年(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
|
||||
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||
"example": "王海颖唱歌录播: 2026年01月22日 22时09分 [BV1wEzcBqEhW]"
|
||||
},
|
||||
{
|
||||
"name": "主播名 日期 时分 [BV号]",
|
||||
"regex": "^(?P<streamer>.+?) (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})点(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
|
||||
"date_format": "{month}月{day}日 {hour}点{minute}分",
|
||||
"example": "王海颖 01月25日 02点24分 [BV1KCzQBpEXC]"
|
||||
},
|
||||
{
|
||||
"name": "主播名_日期",
|
||||
"regex": "^(?P<streamer>.+?)_(?P<date>\\d{1,2}月\\d{1,2}日)",
|
||||
"date_format": "{date}",
|
||||
"example": "王海颖_1月20日"
|
||||
},
|
||||
{
|
||||
"name": "主播名_完整日期",
|
||||
"regex": "^(?P<streamer>.+?)_(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})",
|
||||
"date_format": "{month}月{day}日",
|
||||
"example": "王海颖_2026-01-20"
|
||||
},
|
||||
{
|
||||
"name": "主播名_描述",
|
||||
"regex": "^(?P<streamer>.+?)_(?P<desc>.+)",
|
||||
"date_format": "{desc}",
|
||||
"example": "测试搬运_前15分钟"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
28
scripts/init-docker-config.sh
Normal file
28
scripts/init-docker-config.sh
Normal file
@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
mkdir -p config runtime/codex data/workspace/stage data/workspace/session data/workspace/backup
|
||||
|
||||
if [ ! -f .env ]; then
|
||||
cp .env.example .env
|
||||
echo "created .env from .env.example"
|
||||
fi
|
||||
|
||||
if [ ! -f config/settings.json ]; then
|
||||
cp config/settings.docker.example.json config/settings.json
|
||||
echo "created config/settings.json from config/settings.docker.example.json"
|
||||
fi
|
||||
|
||||
if [ ! -f runtime/cookies.json ]; then
|
||||
cp runtime/cookies.example.json runtime/cookies.json
|
||||
echo "created runtime/cookies.json placeholder"
|
||||
fi
|
||||
|
||||
if [ ! -f runtime/upload_config.json ]; then
|
||||
cp runtime/upload_config.example.json runtime/upload_config.json
|
||||
echo "created runtime/upload_config.json placeholder"
|
||||
fi
|
||||
|
||||
if [ ! -x runtime/biliup ]; then
|
||||
echo "warning: runtime/biliup is missing or not executable; publish will fail until you provide it" >&2
|
||||
fi
|
||||
@ -4,3 +4,4 @@ Version: 0.1.0
|
||||
Summary: Next-generation control-plane-first biliup pipeline
|
||||
Requires-Python: >=3.11
|
||||
Requires-Dist: requests>=2.32.0
|
||||
Requires-Dist: groq>=0.18.0
|
||||
|
||||
@ -10,7 +10,19 @@ src/biliup_next.egg-info/top_level.txt
|
||||
src/biliup_next/app/api_server.py
|
||||
src/biliup_next/app/bootstrap.py
|
||||
src/biliup_next/app/cli.py
|
||||
src/biliup_next/app/control_plane_get_dispatcher.py
|
||||
src/biliup_next/app/control_plane_post_dispatcher.py
|
||||
src/biliup_next/app/dashboard.py
|
||||
src/biliup_next/app/retry_meta.py
|
||||
src/biliup_next/app/scheduler.py
|
||||
src/biliup_next/app/serializers.py
|
||||
src/biliup_next/app/session_delivery_service.py
|
||||
src/biliup_next/app/task_actions.py
|
||||
src/biliup_next/app/task_audit.py
|
||||
src/biliup_next/app/task_control_service.py
|
||||
src/biliup_next/app/task_engine.py
|
||||
src/biliup_next/app/task_policies.py
|
||||
src/biliup_next/app/task_runner.py
|
||||
src/biliup_next/app/worker.py
|
||||
src/biliup_next/core/config.py
|
||||
src/biliup_next/core/errors.py
|
||||
@ -18,25 +30,56 @@ src/biliup_next/core/models.py
|
||||
src/biliup_next/core/providers.py
|
||||
src/biliup_next/core/registry.py
|
||||
src/biliup_next/infra/db.py
|
||||
src/biliup_next/infra/legacy_asset_sync.py
|
||||
src/biliup_next/infra/log_reader.py
|
||||
src/biliup_next/infra/plugin_loader.py
|
||||
src/biliup_next/infra/runtime_doctor.py
|
||||
src/biliup_next/infra/stage_importer.py
|
||||
src/biliup_next/infra/storage_guard.py
|
||||
src/biliup_next/infra/systemd_runtime.py
|
||||
src/biliup_next/infra/task_repository.py
|
||||
src/biliup_next/infra/task_reset.py
|
||||
src/biliup_next/infra/workspace_cleanup.py
|
||||
src/biliup_next/infra/workspace_paths.py
|
||||
src/biliup_next/infra/adapters/bilibili_api.py
|
||||
src/biliup_next/infra/adapters/biliup_cli.py
|
||||
src/biliup_next/infra/adapters/codex_cli.py
|
||||
src/biliup_next/infra/adapters/full_video_locator.py
|
||||
src/biliup_next/infra/adapters/qwen_cli.py
|
||||
src/biliup_next/infra/adapters/yt_dlp.py
|
||||
src/biliup_next/modules/collection/service.py
|
||||
src/biliup_next/modules/collection/providers/bilibili_collection.py
|
||||
src/biliup_next/modules/comment/service.py
|
||||
src/biliup_next/modules/comment/providers/bilibili_top_comment.py
|
||||
src/biliup_next/modules/ingest/service.py
|
||||
src/biliup_next/modules/ingest/providers/bilibili_url.py
|
||||
src/biliup_next/modules/ingest/providers/local_file.py
|
||||
src/biliup_next/modules/publish/service.py
|
||||
src/biliup_next/modules/publish/providers/biliup_cli.py
|
||||
src/biliup_next/modules/song_detect/service.py
|
||||
src/biliup_next/modules/song_detect/providers/codex.py
|
||||
src/biliup_next/modules/song_detect/providers/common.py
|
||||
src/biliup_next/modules/song_detect/providers/qwen_cli.py
|
||||
src/biliup_next/modules/split/service.py
|
||||
src/biliup_next/modules/split/providers/ffmpeg_copy.py
|
||||
src/biliup_next/modules/transcribe/service.py
|
||||
src/biliup_next/modules/transcribe/providers/groq.py
|
||||
tests/test_api_server.py
|
||||
tests/test_bilibili_top_comment_provider.py
|
||||
tests/test_biliup_cli_publish_provider.py
|
||||
tests/test_control_plane_get_dispatcher.py
|
||||
tests/test_control_plane_post_dispatcher.py
|
||||
tests/test_ingest_bilibili_url.py
|
||||
tests/test_ingest_session_grouping.py
|
||||
tests/test_publish_service.py
|
||||
tests/test_retry_meta.py
|
||||
tests/test_serializers.py
|
||||
tests/test_session_delivery_service.py
|
||||
tests/test_settings_service.py
|
||||
tests/test_song_detect_providers.py
|
||||
tests/test_task_actions.py
|
||||
tests/test_task_control_service.py
|
||||
tests/test_task_engine.py
|
||||
tests/test_task_policies.py
|
||||
tests/test_task_repository_sqlite.py
|
||||
tests/test_task_runner.py
|
||||
@ -1 +1,2 @@
|
||||
requests>=2.32.0
|
||||
groq>=0.18.0
|
||||
|
||||
@ -3,6 +3,8 @@ from __future__ import annotations
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
STEP_SETTINGS_GROUP = {
|
||||
"transcribe": "transcribe",
|
||||
"song_detect": "song_detect",
|
||||
"publish": "publish",
|
||||
"comment": "comment",
|
||||
}
|
||||
@ -54,6 +56,26 @@ def publish_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||
)
|
||||
|
||||
|
||||
def transcribe_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||
return retry_schedule_seconds(
|
||||
settings,
|
||||
count_key="retry_count",
|
||||
backoff_key="retry_backoff_seconds",
|
||||
default_count=3,
|
||||
default_backoff=300,
|
||||
)
|
||||
|
||||
|
||||
def song_detect_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||
return retry_schedule_seconds(
|
||||
settings,
|
||||
count_key="retry_count",
|
||||
backoff_key="retry_backoff_seconds",
|
||||
default_count=3,
|
||||
default_backoff=300,
|
||||
)
|
||||
|
||||
|
||||
def comment_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||
return retry_schedule_seconds(
|
||||
settings,
|
||||
@ -77,7 +99,11 @@ def retry_meta_for_step(step, settings_by_group: dict[str, object]) -> dict[str,
|
||||
if not isinstance(group_settings, dict):
|
||||
group_settings = {}
|
||||
|
||||
if step_name == "publish":
|
||||
if step_name == "transcribe":
|
||||
schedule = transcribe_retry_schedule_seconds(group_settings)
|
||||
elif step_name == "song_detect":
|
||||
schedule = song_detect_retry_schedule_seconds(group_settings)
|
||||
elif step_name == "publish":
|
||||
schedule = publish_retry_schedule_seconds(group_settings)
|
||||
elif step_name == "comment":
|
||||
schedule = comment_retry_schedule_seconds(group_settings)
|
||||
|
||||
@ -52,7 +52,16 @@ def infer_error_step_name(task, steps: dict[str, object]) -> str: # type: ignor
|
||||
def retry_wait_payload(task_id: str, step, state: dict[str, object]) -> dict[str, object] | None: # type: ignore[no-untyped-def]
|
||||
if step.status != "failed_retryable":
|
||||
return None
|
||||
meta = retry_meta_for_step(step, {"publish": settings_for(state, "publish")})
|
||||
step_settings_group = {
|
||||
"transcribe": "transcribe",
|
||||
"song_detect": "song_detect",
|
||||
"publish": "publish",
|
||||
"comment": "comment",
|
||||
}.get(step.step_name)
|
||||
settings_by_group = {}
|
||||
if step_settings_group is not None and step_settings_group in state["settings"]:
|
||||
settings_by_group[step_settings_group] = settings_for(state, step_settings_group)
|
||||
meta = retry_meta_for_step(step, settings_by_group)
|
||||
if meta is None or meta["retry_due"]:
|
||||
return None
|
||||
return {
|
||||
|
||||
@ -2,6 +2,8 @@ from __future__ import annotations
|
||||
|
||||
from biliup_next.app.retry_meta import comment_retry_schedule_seconds
|
||||
from biliup_next.app.retry_meta import publish_retry_schedule_seconds
|
||||
from biliup_next.app.retry_meta import song_detect_retry_schedule_seconds
|
||||
from biliup_next.app.retry_meta import transcribe_retry_schedule_seconds
|
||||
from biliup_next.app.task_engine import infer_error_step_name, settings_for as task_engine_settings_for
|
||||
from biliup_next.core.models import utc_now_iso
|
||||
|
||||
@ -35,6 +37,18 @@ def resolve_failure(task, repo, state: dict[str, object], exc) -> dict[str, obje
|
||||
next_retry_count = current_retry + 1
|
||||
next_status = "failed_retryable" if exc.retryable else "failed_manual"
|
||||
next_retry_delay_seconds: int | None = None
|
||||
if exc.retryable and step_name == "transcribe":
|
||||
schedule = transcribe_retry_schedule_seconds(settings_for(state, "transcribe"))
|
||||
if next_retry_count > len(schedule):
|
||||
next_status = "failed_manual"
|
||||
else:
|
||||
next_retry_delay_seconds = schedule[next_retry_count - 1]
|
||||
if exc.retryable and step_name == "song_detect":
|
||||
schedule = song_detect_retry_schedule_seconds(settings_for(state, "song_detect"))
|
||||
if next_retry_count > len(schedule):
|
||||
next_status = "failed_manual"
|
||||
else:
|
||||
next_retry_delay_seconds = schedule[next_retry_count - 1]
|
||||
if exc.retryable and step_name == "publish":
|
||||
publish_settings = settings_for(state, "publish")
|
||||
if exc.code == "PUBLISH_RATE_LIMITED":
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@ -32,6 +33,7 @@ class SettingsService:
|
||||
schema = self._read_json(self.schema_path)
|
||||
settings = self._read_json(self.settings_path)
|
||||
settings = self._apply_schema_defaults(settings, schema)
|
||||
settings = self._apply_env_overrides(settings, schema)
|
||||
settings = self._normalize_paths(settings)
|
||||
self.validate(settings, schema)
|
||||
return SettingsBundle(schema=schema, settings=settings)
|
||||
@ -125,6 +127,57 @@ class SettingsService:
|
||||
group_value[field_name] = self._clone_default(field_schema["default"])
|
||||
return merged
|
||||
|
||||
def _apply_env_overrides(self, settings: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
|
||||
merged = json.loads(json.dumps(settings))
|
||||
aliases = {
|
||||
("transcribe", "groq_api_key"): ["GROQ_API_KEY"],
|
||||
("transcribe", "groq_api_keys"): ["GROQ_API_KEYS"],
|
||||
("collection", "season_id_a"): ["COLLECTION_SEASON_ID_A"],
|
||||
("collection", "season_id_b"): ["COLLECTION_SEASON_ID_B"],
|
||||
}
|
||||
for group_name, fields in schema.get("groups", {}).items():
|
||||
group_value = merged.setdefault(group_name, {})
|
||||
if not isinstance(group_value, dict):
|
||||
continue
|
||||
for field_name, field_schema in fields.items():
|
||||
env_names = [
|
||||
f"BILIUP_NEXT__{group_name}__{field_name}".upper(),
|
||||
f"BILIUP_NEXT_{group_name}_{field_name}".upper(),
|
||||
*aliases.get((group_name, field_name), []),
|
||||
]
|
||||
raw_value = self._first_env_value(env_names)
|
||||
if raw_value is None:
|
||||
continue
|
||||
group_value[field_name] = self._parse_env_value(raw_value, field_schema)
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def _first_env_value(names: list[str]) -> str | None:
|
||||
for name in names:
|
||||
value = os.environ.get(name)
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _parse_env_value(value: str, field_schema: dict[str, Any]) -> Any:
|
||||
expected = field_schema.get("type")
|
||||
if expected == "integer":
|
||||
return int(value)
|
||||
if expected == "boolean":
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if normalized in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
raise ConfigError(f"无法解析布尔环境变量值: {value}")
|
||||
if expected == "array":
|
||||
stripped = value.strip()
|
||||
if stripped.startswith("["):
|
||||
return json.loads(stripped)
|
||||
return [item.strip() for item in value.split(",") if item.strip()]
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _clone_default(value: Any) -> Any:
|
||||
return json.loads(json.dumps(value))
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
@ -18,9 +19,7 @@ class CodexCliAdapter:
|
||||
codex_cmd,
|
||||
"exec",
|
||||
prompt.replace("\n", " "),
|
||||
"--full-auto",
|
||||
"--sandbox",
|
||||
"workspace-write",
|
||||
"--dangerously-bypass-approvals-and-sandbox",
|
||||
"--output-schema",
|
||||
"./song_schema.json",
|
||||
"-o",
|
||||
@ -35,6 +34,7 @@ class CodexCliAdapter:
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
env=self._subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise ModuleError(
|
||||
@ -42,3 +42,12 @@ class CodexCliAdapter:
|
||||
message=f"找不到 codex 命令: {codex_cmd}",
|
||||
retryable=False,
|
||||
) from exc
|
||||
|
||||
@staticmethod
|
||||
def _subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
for key in ("HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"):
|
||||
value = env.get(key)
|
||||
if value and "://" not in value:
|
||||
env[key] = f"http://{value}"
|
||||
return env
|
||||
|
||||
@ -8,6 +8,9 @@ from typing import Any
|
||||
from biliup_next.core.errors import ModuleError
|
||||
|
||||
|
||||
VISIBLE_BILIUP_LIST_STATES = {"开放浏览", "审核中"}
|
||||
|
||||
|
||||
def normalize_title(text: str) -> str:
|
||||
return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()
|
||||
|
||||
@ -38,7 +41,7 @@ def fetch_biliup_list(settings: dict[str, Any], *, max_pages: int = 5) -> list[d
|
||||
if not line.startswith("BV"):
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 3 and "开放浏览" not in parts[2]:
|
||||
if len(parts) >= 3 and not any(state in parts[2] for state in VISIBLE_BILIUP_LIST_STATES):
|
||||
continue
|
||||
if len(parts) >= 2:
|
||||
videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
|
||||
|
||||
@ -115,7 +115,6 @@ class TaskResetService:
|
||||
work_dir / "comment_full_done.flag",
|
||||
work_dir / "collection_a_done.flag",
|
||||
work_dir / "collection_b_done.flag",
|
||||
work_dir / "bvid.txt",
|
||||
],
|
||||
"comment": [
|
||||
work_dir / "comment_done.flag",
|
||||
|
||||
199
src/biliup_next/infra/video_links.py
Normal file
199
src/biliup_next/infra/video_links.py
Normal file
@ -0,0 +1,199 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list, resolve_full_video_bvid
|
||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||
|
||||
|
||||
def bilibili_video_url(bvid: str | None) -> str:
|
||||
bvid = (bvid or "").strip()
|
||||
return f"https://www.bilibili.com/video/{bvid}" if bvid.startswith("BV") else ""
|
||||
|
||||
|
||||
def read_task_split_bvid(task: Any) -> str:
|
||||
path = resolve_task_work_dir(task) / "bvid.txt"
|
||||
return _read_bvid(path)
|
||||
|
||||
|
||||
def read_task_full_bvid(task: Any, context: Any | None = None) -> str:
|
||||
if context is not None and getattr(context, "full_video_bvid", None):
|
||||
return str(context.full_video_bvid).strip()
|
||||
path = resolve_task_work_dir(task) / "full_video_bvid.txt"
|
||||
return _read_bvid(path)
|
||||
|
||||
|
||||
def link_context_for_task(task: Any, repo: Any | None, settings: dict[str, Any] | None = None) -> dict[str, str]:
|
||||
context = _get_context(repo, task.id)
|
||||
full_bvid = read_task_full_bvid(task, context)
|
||||
if not full_bvid:
|
||||
full_bvid = resolve_current_full_video_bvid(task, settings)
|
||||
split_bvid = read_task_split_bvid(task)
|
||||
previous = previous_live_links(task, repo, context, settings)
|
||||
return {
|
||||
"current_full_video_bvid": full_bvid,
|
||||
"current_full_video_link": bilibili_video_url(full_bvid),
|
||||
"current_pure_video_bvid": split_bvid,
|
||||
"current_pure_video_link": bilibili_video_url(split_bvid),
|
||||
"previous_full_video_bvid": previous.get("previous_full_video_bvid", ""),
|
||||
"previous_full_video_link": previous.get("previous_full_video_link", ""),
|
||||
"previous_pure_video_bvid": previous.get("previous_pure_video_bvid", ""),
|
||||
"previous_pure_video_link": previous.get("previous_pure_video_link", ""),
|
||||
}
|
||||
|
||||
|
||||
def resolve_current_full_video_bvid(task: Any, settings: dict[str, Any] | None = None) -> str:
|
||||
if not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
|
||||
return ""
|
||||
try:
|
||||
return resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings) or ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def previous_live_links(
|
||||
task: Any,
|
||||
repo: Any | None,
|
||||
context: Any | None = None,
|
||||
settings: dict[str, Any] | None = None,
|
||||
) -> dict[str, str]:
|
||||
context = context or _get_context(repo, task.id)
|
||||
streamer = _context_streamer(context) or _parse_streamer_from_title(task.title)
|
||||
if not streamer:
|
||||
return {}
|
||||
|
||||
current_started = _parse_datetime(getattr(context, "segment_started_at", None)) if context is not None else None
|
||||
if current_started is None:
|
||||
current_started = _parse_title_datetime(task.title)
|
||||
current_session_key = getattr(context, "session_key", None) if context is not None else None
|
||||
|
||||
previous: dict[str, str] = {}
|
||||
if repo is not None and hasattr(repo, "find_recent_task_contexts") and hasattr(repo, "get_task"):
|
||||
for candidate in repo.find_recent_task_contexts(streamer, limit=50):
|
||||
if candidate.task_id == task.id:
|
||||
continue
|
||||
if current_session_key and getattr(candidate, "session_key", None) == current_session_key:
|
||||
continue
|
||||
candidate_started = _parse_datetime(getattr(candidate, "segment_started_at", None))
|
||||
if current_started is not None and candidate_started is not None and candidate_started >= current_started:
|
||||
continue
|
||||
candidate_task = repo.get_task(candidate.task_id)
|
||||
if candidate_task is None:
|
||||
continue
|
||||
full_bvid = read_task_full_bvid(candidate_task, candidate)
|
||||
split_bvid = read_task_split_bvid(candidate_task)
|
||||
if full_bvid or split_bvid:
|
||||
previous = {
|
||||
"previous_full_video_bvid": full_bvid,
|
||||
"previous_full_video_link": bilibili_video_url(full_bvid),
|
||||
"previous_pure_video_bvid": split_bvid,
|
||||
"previous_pure_video_link": bilibili_video_url(split_bvid),
|
||||
}
|
||||
break
|
||||
if not previous.get("previous_full_video_bvid") or not previous.get("previous_pure_video_bvid"):
|
||||
listed_previous = _previous_live_from_biliup_list(streamer, current_started, settings)
|
||||
for key, value in listed_previous.items():
|
||||
if value and not previous.get(key):
|
||||
previous[key] = value
|
||||
return previous
|
||||
|
||||
|
||||
def _get_context(repo: Any | None, task_id: str) -> Any | None:
|
||||
if repo is None or not hasattr(repo, "get_task_context"):
|
||||
return None
|
||||
return repo.get_task_context(task_id)
|
||||
|
||||
|
||||
def _context_streamer(context: Any | None) -> str:
|
||||
if context is None:
|
||||
return ""
|
||||
return str(getattr(context, "streamer", "") or "").strip()
|
||||
|
||||
|
||||
def _read_bvid(path: Path) -> str:
|
||||
if not path.exists():
|
||||
return ""
|
||||
bvid = path.read_text(encoding="utf-8").strip()
|
||||
return bvid if bvid.startswith("BV") else ""
|
||||
|
||||
|
||||
def _parse_datetime(value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(value)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_title_datetime(title: str) -> datetime | None:
|
||||
patterns = (
|
||||
r"(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
|
||||
r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
|
||||
)
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, title)
|
||||
if not match:
|
||||
continue
|
||||
data = match.groupdict()
|
||||
year = int(data.get("year") or datetime.now().year)
|
||||
try:
|
||||
return datetime(year, int(data["month"]), int(data["day"]), int(data["hour"]), int(data["minute"]))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _parse_streamer_from_title(title: str) -> str:
|
||||
marker = "唱歌录播"
|
||||
if marker in title:
|
||||
return title.split(marker, 1)[0].strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _previous_live_from_biliup_list(
|
||||
streamer: str,
|
||||
current_started: datetime | None,
|
||||
settings: dict[str, Any] | None,
|
||||
) -> dict[str, str]:
|
||||
if current_started is None or not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
|
||||
return {}
|
||||
try:
|
||||
videos = fetch_biliup_list(settings)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
current_compare = current_started.replace(tzinfo=None)
|
||||
full_candidates: list[tuple[datetime, str]] = []
|
||||
pure_candidates: list[tuple[datetime, str]] = []
|
||||
for video in videos:
|
||||
title = video.get("title", "")
|
||||
bvid = video.get("bvid", "")
|
||||
if not bvid.startswith("BV"):
|
||||
continue
|
||||
if streamer not in title:
|
||||
continue
|
||||
started = _parse_title_datetime(title)
|
||||
if started is not None and started > current_compare and "年" not in title:
|
||||
started = started.replace(year=started.year - 1)
|
||||
if started is None or started >= current_compare:
|
||||
continue
|
||||
if "纯享" in title:
|
||||
pure_candidates.append((started, bvid))
|
||||
elif "唱歌录播" in title:
|
||||
full_candidates.append((started, bvid))
|
||||
|
||||
if not full_candidates and not pure_candidates:
|
||||
return {}
|
||||
|
||||
full_bvid = max(full_candidates, key=lambda item: item[0])[1] if full_candidates else ""
|
||||
pure_bvid = max(pure_candidates, key=lambda item: item[0])[1] if pure_candidates else ""
|
||||
return {
|
||||
"previous_full_video_bvid": full_bvid,
|
||||
"previous_full_video_link": bilibili_video_url(full_bvid),
|
||||
"previous_pure_video_bvid": pure_bvid,
|
||||
"previous_pure_video_link": bilibili_video_url(pure_bvid),
|
||||
}
|
||||
@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from biliup_next.infra.task_repository import TaskRepository
|
||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||
@ -11,35 +13,59 @@ class WorkspaceCleanupService:
|
||||
self.repo = repo
|
||||
|
||||
def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
|
||||
task = self.repo.get_task(task_id)
|
||||
if task is None:
|
||||
cleanup_tasks = self._cleanup_tasks(task_id)
|
||||
if not cleanup_tasks:
|
||||
raise RuntimeError(f"task not found: {task_id}")
|
||||
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
removed: list[str] = []
|
||||
skipped: list[str] = []
|
||||
cleaned_task_ids: list[str] = []
|
||||
|
||||
if settings.get("delete_source_video_after_collection_synced", False):
|
||||
source_path = Path(task.source_path).resolve()
|
||||
try:
|
||||
source_path.relative_to(session_dir)
|
||||
source_managed = True
|
||||
except ValueError:
|
||||
source_managed = False
|
||||
if source_path.exists() and source_managed:
|
||||
source_path.unlink()
|
||||
self.repo.delete_artifact_by_path(task_id, str(source_path.resolve()))
|
||||
removed.append(str(source_path))
|
||||
else:
|
||||
skipped.append(str(source_path))
|
||||
for task in cleanup_tasks:
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
cleaned_task_ids.append(task.id)
|
||||
|
||||
if settings.get("delete_split_videos_after_collection_synced", False):
|
||||
split_dir = session_dir / "split_video"
|
||||
if split_dir.exists():
|
||||
shutil.rmtree(split_dir, ignore_errors=True)
|
||||
self.repo.delete_artifacts(task_id, "clip_video")
|
||||
removed.append(str(split_dir))
|
||||
else:
|
||||
skipped.append(str(split_dir))
|
||||
if settings.get("delete_source_video_after_collection_synced", False):
|
||||
source_path = Path(task.source_path).resolve()
|
||||
try:
|
||||
source_path.relative_to(session_dir)
|
||||
source_managed = True
|
||||
except ValueError:
|
||||
source_managed = False
|
||||
if source_path.exists() and source_managed:
|
||||
source_path.unlink()
|
||||
self.repo.delete_artifact_by_path(task.id, str(source_path.resolve()))
|
||||
removed.append(str(source_path))
|
||||
else:
|
||||
skipped.append(str(source_path))
|
||||
|
||||
return {"removed": removed, "skipped": skipped}
|
||||
if settings.get("delete_split_videos_after_collection_synced", False):
|
||||
for video_dir_name in ("split_video", "publish_video"):
|
||||
video_dir = session_dir / video_dir_name
|
||||
if video_dir.exists():
|
||||
shutil.rmtree(video_dir, ignore_errors=True)
|
||||
removed.append(str(video_dir))
|
||||
else:
|
||||
skipped.append(str(video_dir))
|
||||
self.repo.delete_artifacts(task.id, "clip_video")
|
||||
|
||||
return {"removed": removed, "skipped": skipped, "task_ids": cleaned_task_ids}
|
||||
|
||||
def _cleanup_tasks(self, task_id: str) -> list[Any]:
|
||||
task = self.repo.get_task(task_id)
|
||||
if task is None:
|
||||
return []
|
||||
|
||||
if not hasattr(self.repo, "get_task_context") or not hasattr(self.repo, "list_task_contexts_by_session_key"):
|
||||
return [task]
|
||||
|
||||
context = self.repo.get_task_context(task_id)
|
||||
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||
return [task]
|
||||
|
||||
tasks = []
|
||||
for session_context in self.repo.list_task_contexts_by_session_key(context.session_key):
|
||||
session_task = self.repo.get_task(session_context.task_id)
|
||||
if session_task is not None:
|
||||
tasks.append(session_task)
|
||||
return tasks or [task]
|
||||
|
||||
@ -31,4 +31,5 @@ class CollectionService:
|
||||
self.repo.update_task_status(task_id, "collection_synced", finished_at)
|
||||
cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
|
||||
return {**result, "cleanup": cleanup_result}
|
||||
self.repo.update_task_status(task_id, "commented", finished_at)
|
||||
return result
|
||||
|
||||
@ -11,9 +11,34 @@ from biliup_next.core.models import Task
|
||||
from biliup_next.core.providers import ProviderManifest
|
||||
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
|
||||
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
|
||||
from biliup_next.infra.video_links import bilibili_video_url, link_context_for_task
|
||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||
|
||||
|
||||
DEFAULT_COMMENT_TEMPLATE = {
|
||||
"split_header": (
|
||||
"当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n"
|
||||
"直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n"
|
||||
"上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)"
|
||||
),
|
||||
"full_header": (
|
||||
"当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n"
|
||||
"歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n"
|
||||
"上次完整版:{previous_full_video_link} (上一场完整录播)"
|
||||
),
|
||||
"split_part_header": "P{part_index}:",
|
||||
"full_part_header": "P{part_index}:",
|
||||
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||
"split_text_song_line": "{song_index}. {song_text}",
|
||||
"full_timeline_line": "{song_index}. {line_text}",
|
||||
}
|
||||
|
||||
|
||||
class _SafeFormatDict(dict):
|
||||
def __missing__(self, key: str) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
class BilibiliTopCommentProvider:
|
||||
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
|
||||
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
|
||||
@ -41,7 +66,8 @@ class BilibiliTopCommentProvider:
|
||||
)
|
||||
|
||||
timeline_content = songs_path.read_text(encoding="utf-8").strip()
|
||||
split_content, split_reason = self._build_split_comment(task, settings)
|
||||
comment_template = self._load_comment_template(settings)
|
||||
split_content, split_reason = self._build_split_comment(task, settings, comment_template)
|
||||
if not timeline_content and not split_content:
|
||||
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
|
||||
return {"status": "skipped", "reason": "comment_content_empty"}
|
||||
@ -78,7 +104,7 @@ class BilibiliTopCommentProvider:
|
||||
|
||||
if settings.get("post_full_video_timeline_comment", True) and not full_done:
|
||||
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
|
||||
full_content, full_reason = self._build_full_comment_content(task, settings)
|
||||
full_content, full_reason = self._build_full_comment_content(task, settings, comment_template)
|
||||
if full_reason is not None:
|
||||
full_result = {"status": "skipped", "reason": full_reason}
|
||||
elif full_bvid and full_content:
|
||||
@ -135,44 +161,116 @@ class BilibiliTopCommentProvider:
|
||||
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
|
||||
|
||||
@staticmethod
|
||||
def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
|
||||
def _build_split_comment_content(
|
||||
songs_json_path: Path,
|
||||
songs_txt_path: Path,
|
||||
*,
|
||||
start_index: int = 1,
|
||||
comment_template: dict[str, str] | None = None,
|
||||
) -> tuple[str, int]:
|
||||
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
|
||||
next_index = start_index
|
||||
if songs_json_path.exists():
|
||||
try:
|
||||
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
|
||||
lines = []
|
||||
for index, song in enumerate(data.get("songs", []), 1):
|
||||
for song in data.get("songs", []):
|
||||
title = str(song.get("title", "")).strip()
|
||||
artist = str(song.get("artist", "")).strip()
|
||||
if not title:
|
||||
continue
|
||||
suffix = f" — {artist}" if artist else ""
|
||||
lines.append(f"{index}. {title}{suffix}")
|
||||
lines.append(
|
||||
BilibiliTopCommentProvider._format_template(
|
||||
comment_template.get("split_song_line", DEFAULT_COMMENT_TEMPLATE["split_song_line"]),
|
||||
{
|
||||
"song_index": str(next_index),
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"artist_suffix": suffix,
|
||||
},
|
||||
)
|
||||
)
|
||||
next_index += 1
|
||||
if lines:
|
||||
return "\n".join(lines)
|
||||
return "\n".join(lines), next_index
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
if songs_txt_path.exists():
|
||||
lines = []
|
||||
for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
|
||||
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
|
||||
text = raw.strip()
|
||||
if not text:
|
||||
continue
|
||||
parts = text.split(" ", 1)
|
||||
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
|
||||
lines.append(f"{index}. {song_text}")
|
||||
return "\n".join(lines)
|
||||
return ""
|
||||
lines.append(
|
||||
BilibiliTopCommentProvider._format_template(
|
||||
comment_template.get("split_text_song_line", DEFAULT_COMMENT_TEMPLATE["split_text_song_line"]),
|
||||
{
|
||||
"song_index": str(next_index),
|
||||
"song_text": song_text,
|
||||
"line_text": text,
|
||||
},
|
||||
)
|
||||
)
|
||||
next_index += 1
|
||||
return "\n".join(lines), next_index
|
||||
return "", next_index
|
||||
|
||||
def _build_split_comment(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
|
||||
@staticmethod
|
||||
def _build_full_timeline_content(
|
||||
songs_txt_path: Path,
|
||||
*,
|
||||
start_index: int = 1,
|
||||
comment_template: dict[str, str] | None = None,
|
||||
) -> tuple[str, int]:
|
||||
if not songs_txt_path.exists():
|
||||
return "", start_index
|
||||
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
|
||||
next_index = start_index
|
||||
lines = []
|
||||
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
|
||||
text = raw.strip()
|
||||
if not text:
|
||||
continue
|
||||
lines.append(
|
||||
BilibiliTopCommentProvider._format_template(
|
||||
comment_template.get("full_timeline_line", DEFAULT_COMMENT_TEMPLATE["full_timeline_line"]),
|
||||
{
|
||||
"song_index": str(next_index),
|
||||
"line_text": text,
|
||||
},
|
||||
)
|
||||
)
|
||||
next_index += 1
|
||||
return "\n".join(lines), next_index
|
||||
|
||||
def _build_split_comment(
|
||||
self,
|
||||
task: Task,
|
||||
settings: dict[str, Any],
|
||||
comment_template: dict[str, str],
|
||||
) -> tuple[str, str | None]:
|
||||
repo = settings.get("__repo")
|
||||
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
|
||||
content, _ = self._build_split_comment_content(
|
||||
session_dir / "songs.json",
|
||||
session_dir / "songs.txt",
|
||||
comment_template=comment_template,
|
||||
)
|
||||
return self._with_split_footer(content, task, settings, comment_template), None
|
||||
|
||||
context = repo.get_task_context(task.id)
|
||||
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
|
||||
content, _ = self._build_split_comment_content(
|
||||
session_dir / "songs.json",
|
||||
session_dir / "songs.txt",
|
||||
comment_template=comment_template,
|
||||
)
|
||||
return self._with_split_footer(content, task, settings, comment_template), None
|
||||
|
||||
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
||||
if not ordered_contexts:
|
||||
@ -182,31 +280,42 @@ class BilibiliTopCommentProvider:
|
||||
return "", "session_split_comment_owned_by_anchor"
|
||||
|
||||
blocks: list[str] = []
|
||||
next_song_index = 1
|
||||
for index, session_context in enumerate(ordered_contexts, start=1):
|
||||
session_task = repo.get_task(session_context.task_id)
|
||||
if session_task is None:
|
||||
continue
|
||||
task_dir = resolve_task_work_dir(session_task)
|
||||
content = self._build_split_comment_content(task_dir / "songs.json", task_dir / "songs.txt")
|
||||
content, next_song_index = self._build_split_comment_content(
|
||||
task_dir / "songs.json",
|
||||
task_dir / "songs.txt",
|
||||
start_index=next_song_index,
|
||||
comment_template=comment_template,
|
||||
)
|
||||
if not content:
|
||||
continue
|
||||
blocks.append(f"P{index}:\n{content}")
|
||||
blocks.append(f"{self._part_header(comment_template, 'split_part_header', index)}\n{content}")
|
||||
if not blocks:
|
||||
return "", "split_comment_empty"
|
||||
return "\n\n".join(blocks), None
|
||||
return self._with_split_footer("\n\n".join(blocks), task, settings, comment_template), None
|
||||
|
||||
def _build_full_comment_content(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
|
||||
def _build_full_comment_content(
|
||||
self,
|
||||
task: Task,
|
||||
settings: dict[str, Any],
|
||||
comment_template: dict[str, str],
|
||||
) -> tuple[str, str | None]:
|
||||
repo = settings.get("__repo")
|
||||
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
|
||||
return content, None if content else "timeline_comment_empty"
|
||||
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
|
||||
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
|
||||
|
||||
context = repo.get_task_context(task.id)
|
||||
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||
session_dir = resolve_task_work_dir(task)
|
||||
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
|
||||
return content, None if content else "timeline_comment_empty"
|
||||
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
|
||||
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
|
||||
|
||||
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
||||
if not ordered_contexts:
|
||||
@ -216,21 +325,109 @@ class BilibiliTopCommentProvider:
|
||||
return "", "session_full_comment_owned_by_anchor"
|
||||
|
||||
blocks: list[str] = []
|
||||
next_song_index = 1
|
||||
for index, session_context in enumerate(ordered_contexts, start=1):
|
||||
session_task = repo.get_task(session_context.task_id)
|
||||
if session_task is None:
|
||||
continue
|
||||
task_dir = resolve_task_work_dir(session_task)
|
||||
songs_path = task_dir / "songs.txt"
|
||||
if not songs_path.exists():
|
||||
continue
|
||||
content = songs_path.read_text(encoding="utf-8").strip()
|
||||
content, next_song_index = self._build_full_timeline_content(
|
||||
songs_path,
|
||||
start_index=next_song_index,
|
||||
comment_template=comment_template,
|
||||
)
|
||||
if not content:
|
||||
continue
|
||||
blocks.append(f"P{index}:\n{content}")
|
||||
blocks.append(f"{self._part_header(comment_template, 'full_part_header', index)}\n{content}")
|
||||
if not blocks:
|
||||
return "", "timeline_comment_empty"
|
||||
return "\n\n".join(blocks), None
|
||||
return self._with_full_footer("\n\n".join(blocks), task, settings, comment_template), None
|
||||
|
||||
def _with_split_footer(
|
||||
self,
|
||||
content: str,
|
||||
task: Task,
|
||||
settings: dict[str, Any],
|
||||
comment_template: dict[str, str],
|
||||
) -> str:
|
||||
links = link_context_for_task(task, settings.get("__repo"), settings)
|
||||
current_full_link = links.get("current_full_video_link", "")
|
||||
if not current_full_link and settings.get("biliup_path") and settings.get("cookie_file"):
|
||||
full_bvid = resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings)
|
||||
current_full_link = bilibili_video_url(full_bvid)
|
||||
header_vars = dict(links)
|
||||
header_vars["current_full_video_link"] = current_full_link
|
||||
header = self._format_header_template(
|
||||
comment_template.get("split_header", DEFAULT_COMMENT_TEMPLATE["split_header"]),
|
||||
header_vars,
|
||||
)
|
||||
return self._prepend_header(content, header)
|
||||
|
||||
def _with_full_footer(
|
||||
self,
|
||||
content: str,
|
||||
task: Task,
|
||||
settings: dict[str, Any],
|
||||
comment_template: dict[str, str],
|
||||
) -> str:
|
||||
links = link_context_for_task(task, settings.get("__repo"), settings)
|
||||
header = self._format_header_template(
|
||||
comment_template.get("full_header", DEFAULT_COMMENT_TEMPLATE["full_header"]),
|
||||
links,
|
||||
)
|
||||
return self._prepend_header(content, header)
|
||||
|
||||
@staticmethod
|
||||
def _prepend_header(content: str, header: str) -> str:
|
||||
content = content.strip()
|
||||
lines = [line.rstrip() for line in header.splitlines() if line.strip()]
|
||||
if not content:
|
||||
return "\n".join(lines)
|
||||
if not lines:
|
||||
return content
|
||||
return "\n".join(lines) + f"\n\n{content}"
|
||||
|
||||
@staticmethod
|
||||
def _part_header(comment_template: dict[str, str], key: str, part_index: int) -> str:
|
||||
return BilibiliTopCommentProvider._format_template(
|
||||
comment_template.get(key, DEFAULT_COMMENT_TEMPLATE[key]),
|
||||
{"part_index": str(part_index)},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _format_template(template: str, values: dict[str, str]) -> str:
|
||||
return template.format_map(_SafeFormatDict(values)).strip()
|
||||
|
||||
@staticmethod
|
||||
def _format_header_template(template: str, values: dict[str, str]) -> str:
|
||||
lines = []
|
||||
for raw_line in template.splitlines():
|
||||
if any(f"{{{key}}}" in raw_line and not value for key, value in values.items()):
|
||||
continue
|
||||
lines.append(BilibiliTopCommentProvider._format_template(raw_line, values))
|
||||
return "\n".join(line for line in lines if line.strip()).strip()
|
||||
|
||||
@staticmethod
|
||||
def _load_comment_template(settings: dict[str, Any]) -> dict[str, str]:
|
||||
merged = dict(DEFAULT_COMMENT_TEMPLATE)
|
||||
path_value = settings.get("upload_config_file")
|
||||
if not path_value:
|
||||
return merged
|
||||
path = Path(str(path_value))
|
||||
if not path.exists():
|
||||
return merged
|
||||
try:
|
||||
config = json.loads(path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return merged
|
||||
template = config.get("comment_template", {})
|
||||
if not isinstance(template, dict):
|
||||
return merged
|
||||
for key, value in template.items():
|
||||
if key in merged and isinstance(value, str):
|
||||
merged[key] = value
|
||||
return merged
|
||||
|
||||
def _ordered_session_contexts(self, repo, session_key: str) -> list[object]: # type: ignore[no-untyped-def]
|
||||
contexts = list(repo.list_task_contexts_by_session_key(session_key))
|
||||
|
||||
@ -217,7 +217,7 @@ class IngestService:
|
||||
"room_id": sidecar_meta["payload"].get("room_id"),
|
||||
"session_key": sidecar_meta["payload"].get("session_key"),
|
||||
"full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
|
||||
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or source_path.stat().st_mtime,
|
||||
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or target_source.stat().st_mtime,
|
||||
}
|
||||
task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
|
||||
accepted.append(
|
||||
|
||||
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
@ -11,9 +12,13 @@ from biliup_next.core.errors import ModuleError
|
||||
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
|
||||
from biliup_next.core.providers import ProviderManifest
|
||||
from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
|
||||
from biliup_next.infra.video_links import link_context_for_task
|
||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||
|
||||
|
||||
DESC_MAX_CHARS = 1900
|
||||
|
||||
|
||||
class BiliupCliPublishProvider:
|
||||
def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
|
||||
self.adapter = adapter or BiliupCliAdapter()
|
||||
@ -36,7 +41,7 @@ class BiliupCliPublishProvider:
|
||||
publish_progress = work_dir / "publish_progress.json"
|
||||
config = self._load_upload_config(Path(str(settings["upload_config_file"])))
|
||||
|
||||
video_files = [artifact.path for artifact in clip_videos]
|
||||
video_files = self._prepare_publish_video_files(work_dir, [artifact.path for artifact in clip_videos])
|
||||
if not video_files:
|
||||
raise ModuleError(
|
||||
code="PUBLISH_NO_CLIPS",
|
||||
@ -64,10 +69,13 @@ class BiliupCliPublishProvider:
|
||||
"daily_quote": quote.get("text", ""),
|
||||
"quote_author": quote.get("author", ""),
|
||||
}
|
||||
template_vars.update(link_context_for_task(task, settings.get("__repo"), settings))
|
||||
template = config.get("template", {})
|
||||
title = template.get("title", "{streamer}_{date}").format(**template_vars)
|
||||
description = template.get("description", "{songs_list}").format(**template_vars)
|
||||
dynamic = template.get("dynamic", "").format(**template_vars)
|
||||
description = self._fit_bilibili_desc(
|
||||
self._drop_empty_link_lines(template.get("description", "{songs_list}").format(**template_vars))
|
||||
)
|
||||
dynamic = self._drop_empty_link_lines(template.get("dynamic", "").format(**template_vars))
|
||||
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
|
||||
streamer_cfg = config.get("streamers", {})
|
||||
if streamer in streamer_cfg:
|
||||
@ -90,8 +98,12 @@ class BiliupCliPublishProvider:
|
||||
first_batch = video_files[:5]
|
||||
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
|
||||
|
||||
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
|
||||
progress = self._load_publish_progress(publish_progress)
|
||||
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
|
||||
progress_bvid = str(progress.get("bvid", "")).strip()
|
||||
if not existing_bvid.startswith("BV") and progress_bvid.startswith("BV"):
|
||||
existing_bvid = progress_bvid
|
||||
bvid_file.write_text(existing_bvid, encoding="utf-8")
|
||||
if upload_done.exists() and existing_bvid.startswith("BV"):
|
||||
return PublishRecord(
|
||||
id=None,
|
||||
@ -201,6 +213,7 @@ class BiliupCliPublishProvider:
|
||||
upload_cmd.extend(["--cover", cover])
|
||||
|
||||
for attempt in range(1, retry_count + 1):
|
||||
self._append_description_summary(publish_log, description)
|
||||
result = self.adapter.run(
|
||||
upload_cmd,
|
||||
label=f"首批上传[{attempt}/{retry_count}]",
|
||||
@ -253,6 +266,29 @@ class BiliupCliPublishProvider:
|
||||
def _wait_seconds(retry_index: int) -> int:
|
||||
return min(300 * (2**retry_index), 3600)
|
||||
|
||||
@staticmethod
|
||||
def _prepare_publish_video_files(work_dir: Path, video_files: list[str]) -> list[str]:
|
||||
publish_dir = work_dir / "publish_video"
|
||||
if publish_dir.exists():
|
||||
shutil.rmtree(publish_dir)
|
||||
publish_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
prepared: list[str] = []
|
||||
for index, video_file in enumerate(video_files, start=1):
|
||||
source = Path(video_file)
|
||||
name = BiliupCliPublishProvider._strip_clip_number_prefix(source.name)
|
||||
target = publish_dir / f"{index:02d}_{name}"
|
||||
try:
|
||||
target.hardlink_to(source)
|
||||
except OSError:
|
||||
shutil.copy2(source, target)
|
||||
prepared.append(str(target))
|
||||
return prepared
|
||||
|
||||
@staticmethod
|
||||
def _strip_clip_number_prefix(filename: str) -> str:
|
||||
return re.sub(r"^\d+[_-]+", "", filename, count=1)
|
||||
|
||||
@staticmethod
|
||||
def _load_upload_config(path: Path) -> dict[str, Any]:
|
||||
if not path.exists():
|
||||
@ -262,6 +298,9 @@ class BiliupCliPublishProvider:
|
||||
@staticmethod
|
||||
def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
|
||||
config = config or {}
|
||||
builtin = BiliupCliPublishProvider._parse_builtin_filename(filename)
|
||||
if builtin:
|
||||
return builtin
|
||||
patterns = config.get("filename_patterns", {}).get("patterns", [])
|
||||
for pattern_config in patterns:
|
||||
regex = pattern_config.get("regex")
|
||||
@ -278,6 +317,48 @@ class BiliupCliPublishProvider:
|
||||
return data
|
||||
return {"streamer": filename, "date": ""}
|
||||
|
||||
@staticmethod
|
||||
def _parse_builtin_filename(filename: str) -> dict[str, str]:
|
||||
patterns = (
|
||||
r"^(?P<streamer>.+?)唱歌录播\s+(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
|
||||
r"^(?P<streamer>.+?)唱歌录播[::]\s*(?P<year>\d{4})年(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
|
||||
)
|
||||
for pattern in patterns:
|
||||
match = re.match(pattern, filename)
|
||||
if not match:
|
||||
continue
|
||||
data = match.groupdict()
|
||||
data["date"] = f"{data['month']}月{data['day']}日 {data['hour']}时{data['minute']}分"
|
||||
return data
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _drop_empty_link_lines(text: str) -> str:
|
||||
lines = []
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped in {"直播完整版:", "歌曲纯享版:", "上次直播:", "上次纯享:", "上次完整版:"}:
|
||||
continue
|
||||
lines.append(line.rstrip())
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
@staticmethod
|
||||
def _fit_bilibili_desc(text: str, max_chars: int = DESC_MAX_CHARS) -> str:
|
||||
text = text.strip()
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
suffix = "\n\n完整歌单见置顶评论。"
|
||||
return text[: max(0, max_chars - len(suffix))].rstrip() + suffix
|
||||
|
||||
@staticmethod
|
||||
def _append_description_summary(log_path: Path, description: str) -> None:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
line = f"description_chars: {len(description)}\n"
|
||||
if log_path.exists():
|
||||
log_path.write_text(log_path.read_text(encoding="utf-8") + line, encoding="utf-8")
|
||||
else:
|
||||
log_path.write_text(line, encoding="utf-8")
|
||||
|
||||
@staticmethod
|
||||
def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
|
||||
quotes = config.get("quotes", [])
|
||||
|
||||
@ -26,7 +26,9 @@ class PublishService:
|
||||
session_contexts = self._session_contexts(task_id)
|
||||
if len(session_contexts) <= 1:
|
||||
clip_videos = self._clip_videos_for_task(task_id)
|
||||
record = provider.publish(task, clip_videos, settings)
|
||||
provider_settings = dict(settings)
|
||||
provider_settings["__repo"] = self.repo
|
||||
record = provider.publish(task, clip_videos, provider_settings)
|
||||
self._persist_publish_success(task, record)
|
||||
return record
|
||||
|
||||
@ -50,6 +52,7 @@ class PublishService:
|
||||
if anchor_task is None:
|
||||
raise RuntimeError(f"anchor task not found: {anchor_context.task_id}")
|
||||
session_settings = dict(settings)
|
||||
session_settings["__repo"] = self.repo
|
||||
session_settings.update(self._session_publish_metadata(anchor_task, session_contexts, settings))
|
||||
record = provider.publish(anchor_task, clip_videos, session_settings)
|
||||
for context in session_contexts:
|
||||
|
||||
@ -37,13 +37,17 @@ class CodexSongDetector:
|
||||
work_dir=work_dir,
|
||||
prompt=TASK_PROMPT,
|
||||
)
|
||||
self._write_codex_log(work_dir, result)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr[-2000:]
|
||||
stdout = result.stdout[-2000:]
|
||||
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
|
||||
raise ModuleError(
|
||||
code="SONG_DETECT_FAILED",
|
||||
message="codex exec 执行失败",
|
||||
retryable=True,
|
||||
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
|
||||
retryable=retryable,
|
||||
details={"stdout": stdout, "stderr": stderr},
|
||||
)
|
||||
|
||||
ensure_song_outputs(
|
||||
@ -72,3 +76,37 @@ class CodexSongDetector:
|
||||
created_at=utc_now_iso(),
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _write_codex_log(work_dir: Path, result) -> None: # noqa: ANN001
|
||||
log_path = work_dir / "codex.log"
|
||||
log_path.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"codex song_detect",
|
||||
f"returncode: {result.returncode}",
|
||||
"",
|
||||
"stdout:",
|
||||
result.stdout,
|
||||
"",
|
||||
"stderr:",
|
||||
result.stderr,
|
||||
"",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_auth_error(text: str) -> bool:
|
||||
lowered = text.lower()
|
||||
return any(
|
||||
needle in lowered
|
||||
for needle in (
|
||||
"401",
|
||||
"invalid access token",
|
||||
"token expired",
|
||||
"unauthorized",
|
||||
"authentication",
|
||||
)
|
||||
)
|
||||
|
||||
@ -43,11 +43,14 @@ class QwenCliSongDetector:
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr[-2000:]
|
||||
stdout = result.stdout[-2000:]
|
||||
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
|
||||
raise ModuleError(
|
||||
code="SONG_DETECT_FAILED",
|
||||
message="qwen -p 执行失败",
|
||||
retryable=True,
|
||||
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
|
||||
retryable=retryable,
|
||||
details={"stdout": stdout, "stderr": stderr},
|
||||
)
|
||||
|
||||
ensure_song_outputs(
|
||||
@ -76,3 +79,17 @@ class QwenCliSongDetector:
|
||||
created_at=utc_now_iso(),
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_auth_error(text: str) -> bool:
|
||||
lowered = text.lower()
|
||||
return any(
|
||||
needle in lowered
|
||||
for needle in (
|
||||
"401",
|
||||
"invalid access token",
|
||||
"token expired",
|
||||
"unauthorized",
|
||||
"authentication",
|
||||
)
|
||||
)
|
||||
|
||||
@ -2,9 +2,12 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from contextlib import suppress
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@ -16,6 +19,7 @@ from biliup_next.core.providers import ProviderManifest
|
||||
LANGUAGE = "zh"
|
||||
BITRATE_KBPS = 64
|
||||
MODEL_NAME = "whisper-large-v3-turbo"
|
||||
SEGMENT_SIZE_SAFETY_RATIO = 0.75
|
||||
|
||||
|
||||
class GroqTranscribeProvider:
|
||||
@ -30,11 +34,11 @@ class GroqTranscribeProvider:
|
||||
)
|
||||
|
||||
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
|
||||
groq_api_key = str(settings.get("groq_api_key", "")).strip()
|
||||
if not groq_api_key:
|
||||
groq_api_keys = self._groq_api_keys(settings)
|
||||
if not groq_api_keys:
|
||||
raise ModuleError(
|
||||
code="GROQ_API_KEY_MISSING",
|
||||
message="未配置 transcribe.groq_api_key",
|
||||
message="未配置 transcribe.groq_api_key 或 transcribe.groq_api_keys",
|
||||
retryable=False,
|
||||
)
|
||||
try:
|
||||
@ -55,18 +59,23 @@ class GroqTranscribeProvider:
|
||||
)
|
||||
|
||||
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
|
||||
max_file_size_mb = int(settings.get("max_file_size_mb", 23))
|
||||
max_file_size_mb = int(settings.get("max_file_size_mb", 12))
|
||||
work_dir = source_path.parent
|
||||
temp_audio_dir = work_dir / "temp_audio"
|
||||
checkpoint_dir = work_dir / "transcribe_segments"
|
||||
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
||||
segment_duration = max(1, math.floor((max_file_size_mb * 8 * 1024) / BITRATE_KBPS))
|
||||
checkpoint_dir.mkdir(parents=True, exist_ok=True)
|
||||
max_segment_bytes = max(1, max_file_size_mb) * 1024 * 1024
|
||||
segment_duration = self._initial_segment_duration(max_file_size_mb)
|
||||
output_pattern = temp_audio_dir / "part_%03d.mp3"
|
||||
|
||||
self._extract_audio_segments(
|
||||
segment_duration = self._extract_audio_segments_with_size_guard(
|
||||
ffmpeg_bin=ffmpeg_bin,
|
||||
source_path=source_path,
|
||||
output_pattern=output_pattern,
|
||||
segment_duration=segment_duration,
|
||||
temp_audio_dir=temp_audio_dir,
|
||||
initial_segment_duration=segment_duration,
|
||||
max_segment_bytes=max_segment_bytes,
|
||||
)
|
||||
|
||||
segments = sorted(temp_audio_dir.glob("part_*.mp3"))
|
||||
@ -77,22 +86,47 @@ class GroqTranscribeProvider:
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
client = Groq(api_key=groq_api_key)
|
||||
request_timeout_seconds = max(1, int(settings.get("request_timeout_seconds", 180)))
|
||||
request_max_retries = max(0, int(settings.get("request_max_retries", 1)))
|
||||
request_retry_backoff_seconds = max(0, int(settings.get("request_retry_backoff_seconds", 30)))
|
||||
lock_enabled = bool(settings.get("serialize_groq_requests", True))
|
||||
lock_path = self._groq_lock_path(settings, work_dir)
|
||||
clients = [Groq(api_key=key, timeout=request_timeout_seconds, max_retries=0) for key in groq_api_keys]
|
||||
srt_path = work_dir / f"{task.title}.srt"
|
||||
temp_srt_path = work_dir / f".{task.title}.srt.tmp"
|
||||
global_idx = 1
|
||||
|
||||
try:
|
||||
with srt_path.open("w", encoding="utf-8") as srt_file:
|
||||
with temp_srt_path.open("w", encoding="utf-8") as srt_file:
|
||||
for index, segment in enumerate(segments):
|
||||
offset_seconds = index * segment_duration
|
||||
segment_data = self._transcribe_with_retry(client, segment)
|
||||
segment_checkpoint = checkpoint_dir / f"{segment.stem}.json"
|
||||
segment_data = self._load_segment_checkpoint(segment_checkpoint, segment_duration=segment_duration)
|
||||
if segment_data is None:
|
||||
with self._optional_groq_lock(lock_path, enabled=lock_enabled):
|
||||
segment_data = self._transcribe_with_retry(
|
||||
clients,
|
||||
segment,
|
||||
request_timeout_seconds=request_timeout_seconds,
|
||||
request_max_retries=request_max_retries,
|
||||
request_retry_backoff_seconds=request_retry_backoff_seconds,
|
||||
)
|
||||
self._write_segment_checkpoint(
|
||||
segment_checkpoint,
|
||||
segment_data,
|
||||
segment_duration=segment_duration,
|
||||
audio_file=segment,
|
||||
)
|
||||
for chunk in segment_data:
|
||||
start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
|
||||
end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
|
||||
text = str(chunk["text"]).strip()
|
||||
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
|
||||
global_idx += 1
|
||||
temp_srt_path.replace(srt_path)
|
||||
finally:
|
||||
with suppress(FileNotFoundError):
|
||||
temp_srt_path.unlink()
|
||||
shutil.rmtree(temp_audio_dir, ignore_errors=True)
|
||||
|
||||
return Artifact(
|
||||
@ -104,12 +138,126 @@ class GroqTranscribeProvider:
|
||||
{
|
||||
"provider": "groq",
|
||||
"model": MODEL_NAME,
|
||||
"api_key_count": len(groq_api_keys),
|
||||
"segment_duration_seconds": segment_duration,
|
||||
"checkpoint_dir": str(checkpoint_dir.resolve()),
|
||||
}
|
||||
),
|
||||
created_at=utc_now_iso(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _groq_api_keys(settings: dict[str, Any]) -> list[str]:
|
||||
keys: list[str] = []
|
||||
raw_keys = settings.get("groq_api_keys")
|
||||
if isinstance(raw_keys, list):
|
||||
keys.extend(str(key).strip() for key in raw_keys if str(key).strip())
|
||||
legacy_key = str(settings.get("groq_api_key", "")).strip()
|
||||
if legacy_key:
|
||||
keys.append(legacy_key)
|
||||
deduped: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for key in keys:
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(key)
|
||||
return deduped
|
||||
|
||||
@staticmethod
|
||||
def _initial_segment_duration(max_file_size_mb: int) -> int:
|
||||
safe_target_mb = max_file_size_mb * SEGMENT_SIZE_SAFETY_RATIO
|
||||
return max(1, math.floor((safe_target_mb * 8 * 1024) / BITRATE_KBPS))
|
||||
|
||||
def _extract_audio_segments_with_size_guard(
|
||||
self,
|
||||
*,
|
||||
ffmpeg_bin: str,
|
||||
source_path: Path,
|
||||
output_pattern: Path,
|
||||
temp_audio_dir: Path,
|
||||
initial_segment_duration: int,
|
||||
max_segment_bytes: int,
|
||||
) -> int:
|
||||
segment_duration = initial_segment_duration
|
||||
for _attempt in range(4):
|
||||
self._clear_audio_segments(temp_audio_dir)
|
||||
self._extract_audio_segments(
|
||||
ffmpeg_bin=ffmpeg_bin,
|
||||
source_path=source_path,
|
||||
output_pattern=output_pattern,
|
||||
segment_duration=segment_duration,
|
||||
)
|
||||
largest_segment = self._largest_audio_segment(temp_audio_dir)
|
||||
if largest_segment is None or largest_segment.stat().st_size <= max_segment_bytes:
|
||||
return segment_duration
|
||||
next_duration = max(1, math.floor(segment_duration * 0.75))
|
||||
if next_duration == segment_duration:
|
||||
break
|
||||
segment_duration = next_duration
|
||||
largest_segment = self._largest_audio_segment(temp_audio_dir)
|
||||
largest_size = largest_segment.stat().st_size if largest_segment else 0
|
||||
raise ModuleError(
|
||||
code="TRANSCRIBE_AUDIO_SEGMENT_TOO_LARGE",
|
||||
message="音频分片超过 Groq 上传安全阈值",
|
||||
retryable=False,
|
||||
details={
|
||||
"largest_segment": str(largest_segment) if largest_segment else None,
|
||||
"largest_segment_bytes": largest_size,
|
||||
"max_segment_bytes": max_segment_bytes,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _clear_audio_segments(temp_audio_dir: Path) -> None:
|
||||
for path in temp_audio_dir.glob("part_*.mp3"):
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
@staticmethod
|
||||
def _largest_audio_segment(temp_audio_dir: Path) -> Path | None:
|
||||
segments = list(temp_audio_dir.glob("part_*.mp3"))
|
||||
if not segments:
|
||||
return None
|
||||
return max(segments, key=lambda path: path.stat().st_size)
|
||||
|
||||
@staticmethod
|
||||
def _load_segment_checkpoint(checkpoint_path: Path, *, segment_duration: int) -> list[dict[str, Any]] | None:
|
||||
if not checkpoint_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(checkpoint_path.read_text(encoding="utf-8"))
|
||||
if data.get("model") != MODEL_NAME or data.get("language") != LANGUAGE:
|
||||
return None
|
||||
if data.get("segment_duration_seconds") != segment_duration:
|
||||
return None
|
||||
segments = data.get("segments")
|
||||
if not isinstance(segments, list):
|
||||
return None
|
||||
return [dict(segment) for segment in segments]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _write_segment_checkpoint(
|
||||
checkpoint_path: Path,
|
||||
segments: list[dict[str, Any]],
|
||||
*,
|
||||
segment_duration: int,
|
||||
audio_file: Path,
|
||||
) -> None:
|
||||
checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
temp_path = checkpoint_path.with_suffix(f"{checkpoint_path.suffix}.tmp")
|
||||
payload = {
|
||||
"provider": "groq",
|
||||
"model": MODEL_NAME,
|
||||
"language": LANGUAGE,
|
||||
"audio_file": audio_file.name,
|
||||
"segment_duration_seconds": segment_duration,
|
||||
"segments": segments,
|
||||
}
|
||||
temp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
temp_path.replace(checkpoint_path)
|
||||
|
||||
def _extract_audio_segments(
|
||||
self,
|
||||
*,
|
||||
@ -156,31 +304,141 @@ class GroqTranscribeProvider:
|
||||
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
|
||||
) from exc
|
||||
|
||||
def _transcribe_with_retry(self, client: Any, audio_file: Path) -> list[dict[str, Any]]:
|
||||
retry_count = 0
|
||||
while True:
|
||||
@staticmethod
|
||||
def _groq_lock_path(settings: dict[str, Any], fallback_work_dir: Path) -> Path:
|
||||
session_dir = settings.get("session_dir")
|
||||
if isinstance(session_dir, str) and session_dir:
|
||||
return Path(session_dir).resolve().parent / "groq_transcribe.lock"
|
||||
return fallback_work_dir / "groq_transcribe.lock"
|
||||
|
||||
@staticmethod
|
||||
@contextmanager
|
||||
def _optional_groq_lock(lock_path: Path, *, enabled: bool):
|
||||
if not enabled:
|
||||
yield
|
||||
return
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with lock_path.open("w", encoding="utf-8") as lock_file:
|
||||
try:
|
||||
with audio_file.open("rb") as file_handle:
|
||||
response = client.audio.transcriptions.create(
|
||||
file=(audio_file.name, file_handle.read()),
|
||||
model=MODEL_NAME,
|
||||
response_format="verbose_json",
|
||||
language=LANGUAGE,
|
||||
temperature=0.0,
|
||||
)
|
||||
return [dict(segment) for segment in response.segments]
|
||||
except Exception as exc: # noqa: BLE001
|
||||
retry_count += 1
|
||||
err_str = str(exc)
|
||||
if "429" in err_str or "rate_limit" in err_str.lower():
|
||||
time.sleep(25)
|
||||
continue
|
||||
raise ModuleError(
|
||||
code="GROQ_TRANSCRIBE_FAILED",
|
||||
message=f"Groq 转录失败: {audio_file.name}",
|
||||
retryable=True,
|
||||
details={"error": err_str, "retry_count": retry_count},
|
||||
) from exc
|
||||
import fcntl
|
||||
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||
lock_file.write(f"{os.getpid()}\n")
|
||||
lock_file.flush()
|
||||
yield
|
||||
finally:
|
||||
with suppress(Exception):
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
def _transcribe_with_retry(
|
||||
self,
|
||||
clients: list[Any],
|
||||
audio_file: Path,
|
||||
*,
|
||||
request_timeout_seconds: int,
|
||||
request_max_retries: int,
|
||||
request_retry_backoff_seconds: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
attempt = 0
|
||||
key_attempts = 0
|
||||
last_error = ""
|
||||
while True:
|
||||
attempt += 1
|
||||
for key_index, client in enumerate(clients):
|
||||
key_attempts += 1
|
||||
try:
|
||||
with audio_file.open("rb") as file_handle:
|
||||
response = client.audio.transcriptions.create(
|
||||
file=(audio_file.name, file_handle.read()),
|
||||
model=MODEL_NAME,
|
||||
response_format="verbose_json",
|
||||
language=LANGUAGE,
|
||||
temperature=0.0,
|
||||
timeout=request_timeout_seconds,
|
||||
)
|
||||
return [dict(segment) for segment in response.segments]
|
||||
except Exception as exc: # noqa: BLE001
|
||||
err_str = str(exc)
|
||||
last_error = err_str
|
||||
if self._is_rate_limit_error(err_str) and key_index < len(clients) - 1:
|
||||
continue
|
||||
if not self._should_retry_request(err_str):
|
||||
raise self._transcribe_failed(
|
||||
audio_file,
|
||||
err_str,
|
||||
request_attempts=attempt,
|
||||
key_attempts=key_attempts,
|
||||
api_key_count=len(clients),
|
||||
request_timeout_seconds=request_timeout_seconds,
|
||||
) from exc
|
||||
break
|
||||
if attempt <= request_max_retries:
|
||||
if request_retry_backoff_seconds > 0:
|
||||
time.sleep(request_retry_backoff_seconds)
|
||||
continue
|
||||
raise self._transcribe_failed(
|
||||
audio_file,
|
||||
last_error,
|
||||
request_attempts=attempt,
|
||||
key_attempts=key_attempts,
|
||||
api_key_count=len(clients),
|
||||
request_timeout_seconds=request_timeout_seconds,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _transcribe_failed(
|
||||
audio_file: Path,
|
||||
error_text: str,
|
||||
*,
|
||||
request_attempts: int,
|
||||
key_attempts: int,
|
||||
api_key_count: int,
|
||||
request_timeout_seconds: int,
|
||||
) -> ModuleError:
|
||||
return ModuleError(
|
||||
code="GROQ_TRANSCRIBE_FAILED",
|
||||
message=f"Groq 转录失败: {audio_file.name}",
|
||||
retryable=True,
|
||||
details={
|
||||
"error": error_text,
|
||||
"request_attempts": request_attempts,
|
||||
"key_attempts": key_attempts,
|
||||
"api_key_count": api_key_count,
|
||||
"request_timeout_seconds": request_timeout_seconds,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_rate_limit_error(error_text: str) -> bool:
|
||||
lowered = error_text.lower()
|
||||
return any(
|
||||
needle in lowered
|
||||
for needle in (
|
||||
"429",
|
||||
"rate_limit",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _should_retry_request(error_text: str) -> bool:
|
||||
lowered = error_text.lower()
|
||||
return any(
|
||||
needle in lowered
|
||||
for needle in (
|
||||
"429",
|
||||
"rate_limit",
|
||||
"timed out",
|
||||
"timeout",
|
||||
"connection error",
|
||||
"connect error",
|
||||
"server disconnected",
|
||||
"502",
|
||||
"503",
|
||||
"504",
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _format_srt_time(seconds: float) -> str:
|
||||
|
||||
@ -88,7 +88,7 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
||||
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||
self.assertEqual(len(api.reply_calls), 1)
|
||||
self.assertIn("P1:\n1. Song A — Artist A", api.reply_calls[0]["content"])
|
||||
self.assertIn("P2:\n1. Song B — Artist B", api.reply_calls[0]["content"])
|
||||
self.assertIn("P2:\n2. Song B — Artist B", api.reply_calls[0]["content"])
|
||||
|
||||
def test_split_comment_skips_on_non_anchor_task(self) -> None:
|
||||
api = _FakeBilibiliApi()
|
||||
@ -212,6 +212,63 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
||||
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||
self.assertTrue((work_dir / "comment_done.flag").exists())
|
||||
|
||||
def test_comment_format_can_be_configured_from_upload_config(self) -> None:
|
||||
api = _FakeBilibiliApi()
|
||||
provider = BilibiliTopCommentProvider(bilibili_api=api)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
work_dir = root / "task-1"
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
task = Task(
|
||||
id="task-1",
|
||||
source_type="local_file",
|
||||
source_path=str(work_dir / "source.mp4"),
|
||||
title="task-1",
|
||||
status="published",
|
||||
created_at=utc_now_iso(),
|
||||
updated_at=utc_now_iso(),
|
||||
)
|
||||
(work_dir / "songs.txt").write_text("00:00:00 Song From Text — Artist T\n", encoding="utf-8")
|
||||
(work_dir / "songs.json").write_text(
|
||||
json.dumps({"songs": [{"title": "Song A", "artist": "Artist A"}]}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(work_dir / "bvid.txt").write_text("BV1COMMENT123", encoding="utf-8")
|
||||
(work_dir / "full_video_bvid.txt").write_text("BV1FULL12345", encoding="utf-8")
|
||||
cookies_file = root / "cookies.json"
|
||||
cookies_file.write_text("{}", encoding="utf-8")
|
||||
upload_config = root / "upload_config.json"
|
||||
upload_config.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"comment_template": {
|
||||
"split_header": "这是纯享:{current_full_video_link}\n上一场:{previous_full_video_link}",
|
||||
"split_song_line": "#{song_index} {title} / {artist}",
|
||||
}
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
result = provider.comment(
|
||||
task,
|
||||
{
|
||||
"session_dir": str(root),
|
||||
"cookies_file": str(cookies_file),
|
||||
"upload_config_file": str(upload_config),
|
||||
"post_split_comment": True,
|
||||
"post_full_video_timeline_comment": False,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "ok")
|
||||
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||
self.assertEqual(len(api.reply_calls), 1)
|
||||
content = str(api.reply_calls[0]["content"])
|
||||
self.assertIn("这是纯享:https://www.bilibili.com/video/BV1FULL12345", content)
|
||||
self.assertNotIn("上一场:", content)
|
||||
self.assertIn("#1 Song A / Artist A", content)
|
||||
|
||||
def test_full_comment_aggregates_session_parts_on_anchor_task(self) -> None:
|
||||
api = _FakeBilibiliApi()
|
||||
provider = BilibiliTopCommentProvider(bilibili_api=api)
|
||||
@ -263,8 +320,8 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
||||
self.assertEqual(result["full"]["status"], "skipped")
|
||||
self.assertEqual(result["full"]["reason"], "comment_disabled")
|
||||
self.assertEqual(len(api.reply_calls), 1)
|
||||
self.assertIn("P1:\n00:00:01 Song A\n00:02:00 Song B", api.reply_calls[0]["content"])
|
||||
self.assertIn("P2:\n00:00:03 Song C", api.reply_calls[0]["content"])
|
||||
self.assertIn("P1:\n1. 00:00:01 Song A\n2. 00:02:00 Song B", api.reply_calls[0]["content"])
|
||||
self.assertIn("P2:\n3. 00:00:03 Song C", api.reply_calls[0]["content"])
|
||||
|
||||
def test_full_comment_skips_on_non_anchor_task(self) -> None:
|
||||
api = _FakeBilibiliApi()
|
||||
|
||||
@ -269,6 +269,117 @@ class BiliupCliPublishProviderTests(unittest.TestCase):
|
||||
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
|
||||
self.assertTrue((work_dir / "upload_done.flag").exists())
|
||||
|
||||
def test_publish_recovers_bvid_from_progress_when_bvid_file_was_removed(self) -> None:
|
||||
adapter = _FakeBiliupAdapter()
|
||||
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
work_dir = root / "task-1"
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
task = Task(
|
||||
id="task-1",
|
||||
source_type="local_file",
|
||||
source_path=str(work_dir / "source.mp4"),
|
||||
title="task-1",
|
||||
status="split_done",
|
||||
created_at=utc_now_iso(),
|
||||
updated_at=utc_now_iso(),
|
||||
)
|
||||
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
|
||||
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
|
||||
(work_dir / "publish_progress.json").write_text(
|
||||
json.dumps({"bvid": "BV1RESUME1234", "completed_append_batches": [2]}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
upload_config = root / "upload_config.json"
|
||||
upload_config.write_text("{}", encoding="utf-8")
|
||||
clips = []
|
||||
for index in range(1, 16):
|
||||
clip_path = work_dir / f"clip-{index}.mp4"
|
||||
clip_path.write_text("fake", encoding="utf-8")
|
||||
clips.append(
|
||||
Artifact(
|
||||
id=None,
|
||||
task_id=task.id,
|
||||
artifact_type="clip_video",
|
||||
path=str(clip_path),
|
||||
metadata_json="{}",
|
||||
created_at=utc_now_iso(),
|
||||
)
|
||||
)
|
||||
|
||||
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
|
||||
record = provider.publish(
|
||||
task,
|
||||
clips,
|
||||
{
|
||||
"session_dir": str(root),
|
||||
"upload_config_file": str(upload_config),
|
||||
"biliup_path": "runtime/biliup",
|
||||
"cookie_file": "runtime/cookies.json",
|
||||
"retry_count": 2,
|
||||
"command_timeout_seconds": 123,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(record.bvid, "BV1RESUME1234")
|
||||
self.assertEqual((work_dir / "bvid.txt").read_text(encoding="utf-8"), "BV1RESUME1234")
|
||||
self.assertEqual(len(adapter.run_calls), 1)
|
||||
self.assertIn("append", adapter.run_calls[0]["cmd"])
|
||||
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
|
||||
|
||||
def test_publish_renumbers_clip_filenames_across_aggregated_sessions(self) -> None:
|
||||
adapter = _FakeBiliupAdapter()
|
||||
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
work_dir = root / "task-1"
|
||||
second_dir = root / "task-2"
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
second_dir.mkdir(parents=True, exist_ok=True)
|
||||
task = Task(
|
||||
id="task-1",
|
||||
source_type="local_file",
|
||||
source_path=str(work_dir / "source.mp4"),
|
||||
title="task-1",
|
||||
status="split_done",
|
||||
created_at=utc_now_iso(),
|
||||
updated_at=utc_now_iso(),
|
||||
)
|
||||
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
|
||||
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
|
||||
upload_config = root / "upload_config.json"
|
||||
upload_config.write_text("{}", encoding="utf-8")
|
||||
clips = []
|
||||
for index in range(1, 11):
|
||||
clip_path = work_dir / f"{index:02d}_first-{index}.mp4"
|
||||
clip_path.write_text("fake", encoding="utf-8")
|
||||
clips.append(Artifact(None, task.id, "clip_video", str(clip_path), "{}", utc_now_iso()))
|
||||
for index in range(1, 8):
|
||||
clip_path = second_dir / f"{index:02d}_second-{index}.mp4"
|
||||
clip_path.write_text("fake", encoding="utf-8")
|
||||
clips.append(Artifact(None, "task-2", "clip_video", str(clip_path), "{}", utc_now_iso()))
|
||||
|
||||
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
|
||||
provider.publish(
|
||||
task,
|
||||
clips,
|
||||
{
|
||||
"session_dir": str(root),
|
||||
"upload_config_file": str(upload_config),
|
||||
"biliup_path": "runtime/biliup",
|
||||
"cookie_file": "runtime/cookies.json",
|
||||
"retry_count": 1,
|
||||
"command_timeout_seconds": 123,
|
||||
},
|
||||
)
|
||||
|
||||
all_uploaded = [part for call in adapter.run_calls for part in call["cmd"] if str(part).endswith(".mp4")]
|
||||
self.assertTrue(all_uploaded[0].endswith("01_first-1.mp4"))
|
||||
self.assertTrue(all_uploaded[9].endswith("10_first-10.mp4"))
|
||||
self.assertTrue(all_uploaded[10].endswith("11_second-1.mp4"))
|
||||
self.assertTrue(all_uploaded[16].endswith("17_second-7.mp4"))
|
||||
|
||||
def test_publish_creates_progress_from_existing_bvid_for_append_resume(self) -> None:
|
||||
adapter = _FakeBiliupAdapter()
|
||||
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||
|
||||
85
tests/test_collection_service.py
Normal file
85
tests/test_collection_service.py
Normal file
@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from biliup_next.core.models import Task, TaskStep, utc_now_iso
|
||||
from biliup_next.modules.collection.service import CollectionService
|
||||
|
||||
|
||||
class _FakeRegistry:
|
||||
def __init__(self, provider) -> None: # type: ignore[no-untyped-def]
|
||||
self.provider = provider
|
||||
|
||||
def get(self, provider_type: str, provider_id: str): # type: ignore[no-untyped-def]
|
||||
return self.provider
|
||||
|
||||
|
||||
class _FakeProvider:
|
||||
def sync(self, task, target: str, settings: dict[str, object]) -> dict[str, object]: # type: ignore[no-untyped-def]
|
||||
return {"status": "skipped", "target": target}
|
||||
|
||||
|
||||
class _FakeRepo:
|
||||
def __init__(self) -> None:
|
||||
now = utc_now_iso()
|
||||
self.task = Task("task-1", "local_file", "/tmp/source.mp4", "task-1", "running", now, now)
|
||||
self.steps = {
|
||||
"collection_a": TaskStep(None, "task-1", "collection_a", "pending", None, None, 0, None, None),
|
||||
"collection_b": TaskStep(None, "task-1", "collection_b", "pending", None, None, 0, None, None),
|
||||
}
|
||||
self.task_status_updates: list[tuple[str, str]] = []
|
||||
|
||||
def get_task(self, task_id: str): # type: ignore[no-untyped-def]
|
||||
return self.task if task_id == self.task.id else None
|
||||
|
||||
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # type: ignore[no-untyped-def]
|
||||
step = self.steps[step_name]
|
||||
self.steps[step_name] = TaskStep(
|
||||
step.id,
|
||||
step.task_id,
|
||||
step.step_name,
|
||||
status,
|
||||
kwargs.get("error_code", step.error_code),
|
||||
kwargs.get("error_message", step.error_message),
|
||||
kwargs.get("retry_count", step.retry_count),
|
||||
kwargs.get("started_at", step.started_at),
|
||||
kwargs.get("finished_at", step.finished_at),
|
||||
)
|
||||
|
||||
def list_steps(self, task_id: str) -> list[TaskStep]:
|
||||
return list(self.steps.values())
|
||||
|
||||
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||
self.task_status_updates.append((task_id, status))
|
||||
|
||||
|
||||
class CollectionServiceTests(unittest.TestCase):
|
||||
def test_collection_a_restores_commented_status_so_collection_b_can_run(self) -> None:
|
||||
repo = _FakeRepo()
|
||||
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
|
||||
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {}) # type: ignore[assignment]
|
||||
|
||||
result = service.run("task-1", "a", {"provider": "fake"})
|
||||
|
||||
self.assertEqual(result["status"], "skipped")
|
||||
self.assertEqual(repo.steps["collection_a"].status, "succeeded")
|
||||
self.assertEqual(repo.steps["collection_b"].status, "pending")
|
||||
self.assertEqual(repo.task_status_updates[-1], ("task-1", "commented"))
|
||||
|
||||
def test_collection_b_marks_collection_synced_when_both_steps_succeeded(self) -> None:
|
||||
repo = _FakeRepo()
|
||||
repo.steps["collection_a"] = TaskStep(None, "task-1", "collection_a", "succeeded", None, None, 0, None, utc_now_iso())
|
||||
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
|
||||
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {"deleted": []}) # type: ignore[assignment]
|
||||
|
||||
result = service.run("task-1", "b", {"provider": "fake"})
|
||||
|
||||
self.assertEqual(result["status"], "skipped")
|
||||
self.assertEqual(repo.steps["collection_b"].status, "succeeded")
|
||||
self.assertEqual(repo.task_status_updates[-1], ("task-1", "collection_synced"))
|
||||
self.assertEqual(result["cleanup"], {"deleted": []})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
277
tests/test_groq_transcribe_provider.py
Normal file
277
tests/test_groq_transcribe_provider.py
Normal file
@ -0,0 +1,277 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from biliup_next.core.errors import ModuleError
|
||||
from biliup_next.core.models import Artifact, Task
|
||||
from biliup_next.modules.transcribe.providers.groq import GroqTranscribeProvider
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, segments):
|
||||
self.segments = segments
|
||||
|
||||
|
||||
class _FakeTranscriptions:
|
||||
def __init__(self, outcomes: list[object]) -> None:
|
||||
self.outcomes = list(outcomes)
|
||||
self.calls: list[dict[str, object]] = []
|
||||
|
||||
def create(self, **kwargs): # noqa: ANN003
|
||||
self.calls.append(kwargs)
|
||||
outcome = self.outcomes.pop(0)
|
||||
if isinstance(outcome, Exception):
|
||||
raise outcome
|
||||
return outcome
|
||||
|
||||
|
||||
class _FakeGroqClient:
|
||||
def __init__(self, outcomes: list[object]) -> None:
|
||||
self.audio = SimpleNamespace(transcriptions=_FakeTranscriptions(outcomes))
|
||||
|
||||
|
||||
class GroqTranscribeProviderTests(unittest.TestCase):
|
||||
def test_transcribe_retries_timeout_and_writes_srt_atomically(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
source_path = work_dir / "input.mp4"
|
||||
source_path.write_bytes(b"video")
|
||||
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||
segment.write_bytes(b"audio")
|
||||
|
||||
client = _FakeGroqClient(
|
||||
[
|
||||
RuntimeError("Request timed out."),
|
||||
_FakeResponse([{"start": 0, "end": 1.2, "text": "hello"}]),
|
||||
]
|
||||
)
|
||||
|
||||
settings = {
|
||||
"groq_api_key": "gsk_test",
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23,
|
||||
"request_timeout_seconds": 33,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 0,
|
||||
"serialize_groq_requests": False,
|
||||
}
|
||||
|
||||
with patch("groq.Groq", return_value=client) as groq_ctor:
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
artifact = provider.transcribe(task, source_video, settings)
|
||||
|
||||
self.assertEqual(Path(artifact.path).read_text(encoding="utf-8"), "1\n00:00:00,000 --> 00:00:01,199\nhello\n\n")
|
||||
self.assertFalse((work_dir / ".demo.srt.tmp").exists())
|
||||
self.assertEqual(len(client.audio.transcriptions.calls), 2)
|
||||
self.assertEqual(client.audio.transcriptions.calls[0]["timeout"], 33)
|
||||
self.assertTrue((work_dir / "transcribe_segments" / "part_000.json").exists())
|
||||
groq_ctor.assert_called_once_with(api_key="gsk_test", timeout=33, max_retries=0)
|
||||
|
||||
def test_transcribe_reuses_completed_segment_checkpoints(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
source_path = work_dir / "input.mp4"
|
||||
source_path.write_bytes(b"video")
|
||||
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||
segments = [work_dir / "temp_audio" / "part_000.mp3", work_dir / "temp_audio" / "part_001.mp3"]
|
||||
checkpoint_dir = work_dir / "transcribe_segments"
|
||||
checkpoint_dir.mkdir()
|
||||
(checkpoint_dir / "part_000.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"provider": "groq",
|
||||
"model": "whisper-large-v3-turbo",
|
||||
"language": "zh",
|
||||
"audio_file": "part_000.mp3",
|
||||
"segment_duration_seconds": 75,
|
||||
"segments": [{"start": 0, "end": 1, "text": "first"}],
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
for segment in segments:
|
||||
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||
segment.write_bytes(b"audio")
|
||||
|
||||
client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.5, "text": "second"}])])
|
||||
settings = {
|
||||
"groq_api_key": "gsk_test",
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23,
|
||||
"request_timeout_seconds": 33,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 0,
|
||||
"serialize_groq_requests": False,
|
||||
}
|
||||
|
||||
with patch("groq.Groq", return_value=client):
|
||||
with patch.object(provider, "_initial_segment_duration", return_value=75):
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
artifact = provider.transcribe(task, source_video, settings)
|
||||
|
||||
srt = Path(artifact.path).read_text(encoding="utf-8")
|
||||
self.assertIn("00:00:00,000 --> 00:00:01,000\nfirst", srt)
|
||||
self.assertIn("00:01:15,000 --> 00:01:16,500\nsecond", srt)
|
||||
self.assertEqual(len(client.audio.transcriptions.calls), 1)
|
||||
self.assertEqual(client.audio.transcriptions.calls[0]["file"][0], "part_001.mp3")
|
||||
self.assertTrue((checkpoint_dir / "part_001.json").exists())
|
||||
|
||||
def test_transcribe_switches_to_next_api_key_on_rate_limit(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
source_path = work_dir / "input.mp4"
|
||||
source_path.write_bytes(b"video")
|
||||
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||
segment.write_bytes(b"audio")
|
||||
|
||||
limited_client = _FakeGroqClient([RuntimeError("Error code: 429 rate_limit")])
|
||||
fallback_client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.2, "text": "fallback"}])])
|
||||
settings = {
|
||||
"groq_api_key": "",
|
||||
"groq_api_keys": ["gsk_first", "gsk_second"],
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23,
|
||||
"request_timeout_seconds": 20,
|
||||
"request_max_retries": 0,
|
||||
"request_retry_backoff_seconds": 0,
|
||||
"serialize_groq_requests": False,
|
||||
}
|
||||
|
||||
with patch("groq.Groq", side_effect=[limited_client, fallback_client]) as groq_ctor:
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
artifact = provider.transcribe(task, source_video, settings)
|
||||
|
||||
self.assertIn("fallback", Path(artifact.path).read_text(encoding="utf-8"))
|
||||
self.assertEqual(len(limited_client.audio.transcriptions.calls), 1)
|
||||
self.assertEqual(len(fallback_client.audio.transcriptions.calls), 1)
|
||||
self.assertEqual([call.kwargs["api_key"] for call in groq_ctor.call_args_list], ["gsk_first", "gsk_second"])
|
||||
|
||||
def test_transcribe_waits_after_all_api_keys_are_rate_limited(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
source_path = work_dir / "input.mp4"
|
||||
source_path.write_bytes(b"video")
|
||||
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||
segment.write_bytes(b"audio")
|
||||
|
||||
first_client = _FakeGroqClient([RuntimeError("429 rate_limit"), _FakeResponse([{"start": 0, "end": 1, "text": "retry ok"}])])
|
||||
second_client = _FakeGroqClient([RuntimeError("429 rate_limit")])
|
||||
settings = {
|
||||
"groq_api_key": "",
|
||||
"groq_api_keys": ["gsk_first", "gsk_second"],
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23,
|
||||
"request_timeout_seconds": 20,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 7,
|
||||
"serialize_groq_requests": False,
|
||||
}
|
||||
|
||||
with patch("groq.Groq", side_effect=[first_client, second_client]):
|
||||
with patch("time.sleep") as sleep_mock:
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
artifact = provider.transcribe(task, source_video, settings)
|
||||
|
||||
self.assertIn("retry ok", Path(artifact.path).read_text(encoding="utf-8"))
|
||||
sleep_mock.assert_called_once_with(7)
|
||||
self.assertEqual(len(first_client.audio.transcriptions.calls), 2)
|
||||
self.assertEqual(len(second_client.audio.transcriptions.calls), 1)
|
||||
|
||||
def test_transcribe_raises_after_retry_budget_is_exhausted(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
source_path = work_dir / "input.mp4"
|
||||
source_path.write_bytes(b"video")
|
||||
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||
segment.write_bytes(b"audio")
|
||||
|
||||
client = _FakeGroqClient([RuntimeError("Connection error."), RuntimeError("Connection error.")])
|
||||
settings = {
|
||||
"groq_api_key": "gsk_test",
|
||||
"ffmpeg_bin": "ffmpeg",
|
||||
"max_file_size_mb": 23,
|
||||
"request_timeout_seconds": 20,
|
||||
"request_max_retries": 1,
|
||||
"request_retry_backoff_seconds": 0,
|
||||
"serialize_groq_requests": False,
|
||||
}
|
||||
|
||||
with patch("groq.Groq", return_value=client):
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
with self.assertRaises(ModuleError) as exc_info:
|
||||
provider.transcribe(task, source_video, settings)
|
||||
|
||||
self.assertEqual(exc_info.exception.message, "Groq 转录失败: part_000.mp3")
|
||||
|
||||
def test_initial_segment_duration_keeps_safety_margin(self) -> None:
|
||||
self.assertLess(GroqTranscribeProvider._initial_segment_duration(12), 1536)
|
||||
|
||||
def test_extract_audio_segments_retries_when_segment_exceeds_size_limit(self) -> None:
|
||||
provider = GroqTranscribeProvider()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
temp_audio_dir = work_dir / "temp_audio"
|
||||
temp_audio_dir.mkdir()
|
||||
output_pattern = temp_audio_dir / "part_%03d.mp3"
|
||||
durations: list[int] = []
|
||||
|
||||
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||
durations.append(int(kwargs["segment_duration"]))
|
||||
size = 20 if len(durations) == 1 else 5
|
||||
(temp_audio_dir / "part_000.mp3").write_bytes(b"x" * size)
|
||||
|
||||
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||
result = provider._extract_audio_segments_with_size_guard(
|
||||
ffmpeg_bin="ffmpeg",
|
||||
source_path=work_dir / "input.mp4",
|
||||
output_pattern=output_pattern,
|
||||
temp_audio_dir=temp_audio_dir,
|
||||
initial_segment_duration=100,
|
||||
max_segment_bytes=10,
|
||||
)
|
||||
|
||||
self.assertEqual(durations, [100, 75])
|
||||
self.assertEqual(result, 75)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
85
tests/test_ingest_scan_stage.py
Normal file
85
tests/test_ingest_scan_stage.py
Normal file
@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from biliup_next.core.providers import ProviderManifest
|
||||
from biliup_next.core.registry import Registry
|
||||
from biliup_next.infra.db import Database
|
||||
from biliup_next.infra.task_repository import TaskRepository
|
||||
from biliup_next.modules.ingest.service import IngestService
|
||||
|
||||
|
||||
class _FakeLocalFileProvider:
|
||||
manifest = ProviderManifest(
|
||||
id="local_file",
|
||||
name="Fake Local File Ingest",
|
||||
version="0.1.0",
|
||||
provider_type="ingest_provider",
|
||||
entrypoint="tests.test_ingest_scan_stage:_FakeLocalFileProvider",
|
||||
capabilities=["ingest"],
|
||||
enabled_by_default=True,
|
||||
)
|
||||
|
||||
def validate_source(self, source_path: Path, settings: dict[str, object]) -> None:
|
||||
if not source_path.exists() or not source_path.is_file():
|
||||
raise AssertionError(f"unexpected source path: {source_path}")
|
||||
|
||||
|
||||
class IngestScanStageTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tempdir = tempfile.TemporaryDirectory()
|
||||
root = Path(self.tempdir.name)
|
||||
self.stage_dir = root / "stage"
|
||||
self.backup_dir = root / "backup"
|
||||
self.session_dir = root / "session"
|
||||
self.stage_dir.mkdir()
|
||||
self.backup_dir.mkdir()
|
||||
self.session_dir.mkdir()
|
||||
|
||||
db = Database(root / "test.db")
|
||||
db.initialize()
|
||||
repo = TaskRepository(db)
|
||||
registry = Registry()
|
||||
provider = _FakeLocalFileProvider()
|
||||
registry.register("ingest_provider", "local_file", provider, provider.manifest)
|
||||
self.service = IngestService(registry=registry, repo=repo)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.tempdir.cleanup()
|
||||
|
||||
def test_scan_stage_uses_moved_file_for_reference_timestamp(self) -> None:
|
||||
source_path = self.stage_dir / "王海颖唱歌录播 04月14日 17时49分.mp4"
|
||||
source_path.write_bytes(b"fake-video")
|
||||
|
||||
settings = {
|
||||
"provider": "local_file",
|
||||
"stage_dir": str(self.stage_dir),
|
||||
"backup_dir": str(self.backup_dir),
|
||||
"session_dir": str(self.session_dir),
|
||||
"allowed_extensions": [".mp4"],
|
||||
"ffprobe_bin": "ffprobe",
|
||||
"min_duration_seconds": 0,
|
||||
"stability_wait_seconds": 0,
|
||||
"meta_sidecar_enabled": True,
|
||||
}
|
||||
|
||||
self.service._probe_duration_seconds = lambda *_args, **_kwargs: 120.0 # type: ignore[method-assign]
|
||||
|
||||
result = self.service.scan_stage(settings)
|
||||
|
||||
self.assertEqual(len(result["accepted"]), 1)
|
||||
accepted = result["accepted"][0]
|
||||
moved_path = Path(str(accepted["source_path"]))
|
||||
self.assertTrue(moved_path.exists())
|
||||
self.assertFalse(source_path.exists())
|
||||
task = self.service.repo.get_task(moved_path.stem)
|
||||
self.assertIsNotNone(task)
|
||||
context = self.service.repo.get_task_context(moved_path.stem)
|
||||
self.assertIsNotNone(context)
|
||||
self.assertIsNotNone(context.segment_started_at)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
from pathlib import Path
|
||||
|
||||
from biliup_next.core.config import SettingsService
|
||||
@ -78,6 +79,146 @@ class SettingsServiceTests(unittest.TestCase):
|
||||
self.assertTrue((config_dir / "settings.staged.json").exists())
|
||||
self.assertEqual(bundle.settings["paths"]["cookies_file"], str((root / "runtime" / "cookies.json").resolve()))
|
||||
|
||||
def test_load_applies_environment_overrides_before_path_normalization(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
config_dir = root / "config"
|
||||
config_dir.mkdir(parents=True, exist_ok=True)
|
||||
(config_dir / "settings.schema.json").write_text(
|
||||
"""
|
||||
{
|
||||
"groups": {
|
||||
"runtime": {
|
||||
"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}
|
||||
},
|
||||
"paths": {
|
||||
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
|
||||
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
|
||||
"session_dir": {"type": "string", "default": "data/workspace/session"},
|
||||
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
|
||||
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
|
||||
},
|
||||
"ingest": {
|
||||
"ffprobe_bin": {"type": "string", "default": "ffprobe"},
|
||||
"yt_dlp_cmd": {"type": "string", "default": "yt-dlp"},
|
||||
"yt_dlp_format": {"type": "string", "default": ""}
|
||||
},
|
||||
"transcribe": {
|
||||
"groq_api_key": {"type": "string", "default": "", "sensitive": true},
|
||||
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
|
||||
},
|
||||
"split": {
|
||||
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
|
||||
},
|
||||
"song_detect": {
|
||||
"codex_cmd": {"type": "string", "default": "codex"},
|
||||
"qwen_cmd": {"type": "string", "default": "qwen"}
|
||||
},
|
||||
"publish": {
|
||||
"biliup_path": {"type": "string", "default": "runtime/biliup"},
|
||||
"cookie_file": {"type": "string", "default": "runtime/cookies.json"}
|
||||
},
|
||||
"collection": {
|
||||
"season_id_a": {"type": "integer", "default": 0},
|
||||
"season_id_b": {"type": "integer", "default": 0}
|
||||
}
|
||||
}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(config_dir / "settings.standalone.example.json").write_text(
|
||||
"""
|
||||
{
|
||||
"runtime": {"database_path": "data/workspace/biliup_next.db"},
|
||||
"paths": {
|
||||
"stage_dir": "data/workspace/stage",
|
||||
"backup_dir": "data/workspace/backup",
|
||||
"session_dir": "data/workspace/session",
|
||||
"cookies_file": "runtime/cookies.json",
|
||||
"upload_config_file": "runtime/upload_config.json"
|
||||
},
|
||||
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp", "yt_dlp_format": ""},
|
||||
"transcribe": {"groq_api_key": "", "ffmpeg_bin": "ffmpeg"},
|
||||
"split": {"ffmpeg_bin": "ffmpeg"},
|
||||
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
|
||||
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"},
|
||||
"collection": {"season_id_a": 0, "season_id_b": 0}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with patch.dict(
|
||||
"os.environ",
|
||||
{
|
||||
"GROQ_API_KEY": "gsk_test",
|
||||
"COLLECTION_SEASON_ID_A": "7196643",
|
||||
"BILIUP_NEXT__COLLECTION__SEASON_ID_B": "7196624",
|
||||
"BILIUP_NEXT__PATHS__STAGE_DIR": "data/custom-stage",
|
||||
},
|
||||
clear=True,
|
||||
):
|
||||
bundle = SettingsService(root).load()
|
||||
|
||||
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "gsk_test")
|
||||
self.assertEqual(bundle.settings["collection"]["season_id_a"], 7196643)
|
||||
self.assertEqual(bundle.settings["collection"]["season_id_b"], 7196624)
|
||||
self.assertEqual(bundle.settings["paths"]["stage_dir"], str((root / "data" / "custom-stage").resolve()))
|
||||
|
||||
def test_empty_environment_values_do_not_override_settings(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
config_dir = root / "config"
|
||||
config_dir.mkdir(parents=True, exist_ok=True)
|
||||
(config_dir / "settings.schema.json").write_text(
|
||||
"""
|
||||
{
|
||||
"groups": {
|
||||
"runtime": {"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}},
|
||||
"paths": {
|
||||
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
|
||||
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
|
||||
"session_dir": {"type": "string", "default": "data/workspace/session"},
|
||||
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
|
||||
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
|
||||
},
|
||||
"ingest": {"ffprobe_bin": {"type": "string", "default": "ffprobe"}, "yt_dlp_cmd": {"type": "string", "default": "yt-dlp"}},
|
||||
"transcribe": {"groq_api_key": {"type": "string", "default": ""}, "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
|
||||
"split": {"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
|
||||
"song_detect": {"codex_cmd": {"type": "string", "default": "codex"}, "qwen_cmd": {"type": "string", "default": "qwen"}},
|
||||
"publish": {"biliup_path": {"type": "string", "default": "runtime/biliup"}, "cookie_file": {"type": "string", "default": "runtime/cookies.json"}}
|
||||
}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(config_dir / "settings.standalone.example.json").write_text(
|
||||
"""
|
||||
{
|
||||
"runtime": {"database_path": "data/workspace/biliup_next.db"},
|
||||
"paths": {
|
||||
"stage_dir": "data/workspace/stage",
|
||||
"backup_dir": "data/workspace/backup",
|
||||
"session_dir": "data/workspace/session",
|
||||
"cookies_file": "runtime/cookies.json",
|
||||
"upload_config_file": "runtime/upload_config.json"
|
||||
},
|
||||
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp"},
|
||||
"transcribe": {"groq_api_key": "from-file", "ffmpeg_bin": "ffmpeg"},
|
||||
"split": {"ffmpeg_bin": "ffmpeg"},
|
||||
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
|
||||
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"}
|
||||
}
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with patch.dict("os.environ", {"GROQ_API_KEY": ""}, clear=True):
|
||||
bundle = SettingsService(root).load()
|
||||
|
||||
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "from-file")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@ -1,11 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from biliup_next.core.models import Artifact, Task, utc_now_iso
|
||||
from biliup_next.infra.adapters.codex_cli import CodexCliAdapter
|
||||
from biliup_next.modules.song_detect.providers.codex import CodexSongDetector
|
||||
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
|
||||
|
||||
|
||||
@ -38,6 +42,33 @@ class FakeQwenCliAdapter:
|
||||
return type("Result", (), {"returncode": self.returncode, "stdout": "ok", "stderr": ""})()
|
||||
|
||||
|
||||
class FakeCodexCliAdapter:
|
||||
def __init__(self, returncode: int = 0) -> None:
|
||||
self.returncode = returncode
|
||||
|
||||
def run_song_detect(self, *, codex_cmd: str, work_dir: Path, prompt: str): # noqa: ANN001
|
||||
songs_json_path = work_dir / "songs.json"
|
||||
songs_json_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"songs": [
|
||||
{
|
||||
"start": "00:01:23,000",
|
||||
"end": "00:03:45,000",
|
||||
"title": "测试歌曲",
|
||||
"artist": "测试歌手",
|
||||
"confidence": 0.93,
|
||||
"evidence": "歌词命中",
|
||||
}
|
||||
]
|
||||
},
|
||||
ensure_ascii=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return type("Result", (), {"returncode": self.returncode, "stdout": "codex stdout", "stderr": "codex stderr"})()
|
||||
|
||||
|
||||
class SongDetectProviderTests(unittest.TestCase):
|
||||
def test_qwen_cli_provider_generates_json_and_txt_artifacts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@ -72,6 +103,67 @@ class SongDetectProviderTests(unittest.TestCase):
|
||||
self.assertTrue(Path(songs_txt.path).exists())
|
||||
self.assertIn("测试歌曲", Path(songs_txt.path).read_text(encoding="utf-8"))
|
||||
|
||||
def test_codex_provider_writes_execution_output_to_session_log(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
work_dir = Path(tmpdir)
|
||||
subtitle_path = work_dir / "subtitle.srt"
|
||||
subtitle_path.write_text("1\n00:00:00,000 --> 00:00:03,000\n测试字幕\n", encoding="utf-8")
|
||||
provider = CodexSongDetector(adapter=FakeCodexCliAdapter())
|
||||
|
||||
task = Task(
|
||||
id="task-1",
|
||||
source_type="local_file",
|
||||
source_path=str(work_dir / "video.mp4"),
|
||||
title="task-1",
|
||||
status="transcribed",
|
||||
created_at=utc_now_iso(),
|
||||
updated_at=utc_now_iso(),
|
||||
)
|
||||
subtitle = Artifact(
|
||||
id=None,
|
||||
task_id=task.id,
|
||||
artifact_type="subtitle_srt",
|
||||
path=str(subtitle_path),
|
||||
metadata_json=None,
|
||||
created_at=utc_now_iso(),
|
||||
)
|
||||
|
||||
songs_json, songs_txt = provider.detect(task, subtitle, {"codex_cmd": "codex"})
|
||||
|
||||
json_metadata = json.loads(songs_json.metadata_json)
|
||||
txt_metadata = json.loads(songs_txt.metadata_json)
|
||||
self.assertEqual(json_metadata["provider"], "codex")
|
||||
self.assertEqual(txt_metadata["provider"], "codex")
|
||||
self.assertNotIn("execution", json_metadata)
|
||||
codex_log = work_dir / "codex.log"
|
||||
self.assertTrue(codex_log.exists())
|
||||
log_text = codex_log.read_text(encoding="utf-8")
|
||||
self.assertIn("returncode: 0", log_text)
|
||||
self.assertIn("codex stdout", log_text)
|
||||
self.assertIn("codex stderr", log_text)
|
||||
|
||||
def test_codex_cli_adapter_disables_inner_sandbox_and_normalizes_proxy_env(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, **kwargs): # noqa: ANN001
|
||||
calls.append((cmd, kwargs))
|
||||
return type("Result", (), {"returncode": 0, "stdout": "", "stderr": ""})()
|
||||
|
||||
with patch.dict(os.environ, {"HTTPS_PROXY": "192.168.1.100:7897"}, clear=True):
|
||||
with patch("subprocess.run", side_effect=fake_run):
|
||||
CodexCliAdapter().run_song_detect(
|
||||
codex_cmd="codex",
|
||||
work_dir=Path(tmpdir),
|
||||
prompt="detect songs",
|
||||
)
|
||||
|
||||
cmd, kwargs = calls[0]
|
||||
self.assertIn("--dangerously-bypass-approvals-and-sandbox", cmd)
|
||||
self.assertNotIn("--full-auto", cmd)
|
||||
self.assertNotIn("workspace-write", cmd)
|
||||
self.assertEqual(kwargs["env"]["HTTPS_PROXY"], "http://192.168.1.100:7897")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
103
tests/test_song_detect_retry_policy.py
Normal file
103
tests/test_song_detect_retry_policy.py
Normal file
@ -0,0 +1,103 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from biliup_next.app.retry_meta import retry_meta_for_step
|
||||
from biliup_next.app.task_engine import next_runnable_step
|
||||
from biliup_next.app.task_policies import resolve_failure
|
||||
from biliup_next.core.errors import ModuleError
|
||||
from biliup_next.core.models import TaskStep
|
||||
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
|
||||
|
||||
|
||||
class _Repo:
|
||||
def __init__(self) -> None:
|
||||
self.steps = [TaskStep(None, "task-1", "song_detect", "running", None, None, 0, None, None)]
|
||||
self.step_updates: list[tuple] = []
|
||||
self.task_updates: list[tuple] = []
|
||||
|
||||
def list_steps(self, task_id: str): # noqa: ANN001
|
||||
return list(self.steps)
|
||||
|
||||
def get_task(self, task_id: str): # noqa: ANN001
|
||||
return SimpleNamespace(id=task_id, status="running")
|
||||
|
||||
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
|
||||
self.step_updates.append((task_id, step_name, status, kwargs))
|
||||
self.steps = [
|
||||
TaskStep(
|
||||
None,
|
||||
task_id,
|
||||
step_name,
|
||||
status,
|
||||
kwargs.get("error_code"),
|
||||
kwargs.get("error_message"),
|
||||
kwargs.get("retry_count", 0),
|
||||
kwargs.get("started_at"),
|
||||
kwargs.get("finished_at"),
|
||||
)
|
||||
]
|
||||
|
||||
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||
self.task_updates.append((task_id, status, updated_at))
|
||||
|
||||
|
||||
class SongDetectRetryPolicyTests(unittest.TestCase):
|
||||
def test_retry_meta_reports_wait_window_for_song_detect(self) -> None:
|
||||
step = TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
|
||||
|
||||
payload = retry_meta_for_step(step, {"song_detect": {"retry_schedule_minutes": [10]}})
|
||||
|
||||
self.assertIsNotNone(payload)
|
||||
self.assertFalse(payload["retry_due"])
|
||||
self.assertEqual(payload["retry_wait_seconds"], 600)
|
||||
|
||||
def test_next_runnable_step_waits_for_retryable_song_detect(self) -> None:
|
||||
task = SimpleNamespace(id="task-1", status="failed_retryable")
|
||||
steps = {
|
||||
"song_detect": TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
|
||||
}
|
||||
state = {
|
||||
"settings": {
|
||||
"transcribe": {},
|
||||
"song_detect": {"retry_schedule_minutes": [10]},
|
||||
"comment": {"enabled": True},
|
||||
"collection": {"enabled": True},
|
||||
"paths": {},
|
||||
"publish": {},
|
||||
}
|
||||
}
|
||||
|
||||
step_name, waiting_payload = next_runnable_step(task, steps, state)
|
||||
|
||||
self.assertIsNone(step_name)
|
||||
self.assertIsNotNone(waiting_payload)
|
||||
self.assertEqual(waiting_payload["step"], "song_detect")
|
||||
|
||||
def test_resolve_failure_adds_song_detect_retry_delay(self) -> None:
|
||||
repo = _Repo()
|
||||
task = SimpleNamespace(id="task-1", status="running")
|
||||
state = {
|
||||
"settings": {
|
||||
"transcribe": {},
|
||||
"song_detect": {"retry_schedule_minutes": [5, 10]},
|
||||
"publish": {},
|
||||
"comment": {},
|
||||
"paths": {},
|
||||
"collection": {"enabled": True},
|
||||
}
|
||||
}
|
||||
|
||||
result = resolve_failure(task, repo, state, ModuleError(code="SONG_DETECT_FAILED", message="boom", retryable=True))
|
||||
|
||||
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
|
||||
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
|
||||
|
||||
def test_qwen_auth_errors_are_not_retryable(self) -> None:
|
||||
self.assertTrue(QwenCliSongDetector._is_auth_error("[API Error: 401 invalid access token or token expired]"))
|
||||
self.assertFalse(QwenCliSongDetector._is_auth_error("temporary network failure"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@ -51,6 +51,7 @@ class TaskEngineTests(unittest.TestCase):
|
||||
}
|
||||
state = {
|
||||
"settings": {
|
||||
"transcribe": {},
|
||||
"comment": {"enabled": True},
|
||||
"collection": {"enabled": True},
|
||||
"paths": {},
|
||||
|
||||
84
tests/test_transcribe_retry_policy.py
Normal file
84
tests/test_transcribe_retry_policy.py
Normal file
@ -0,0 +1,84 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from biliup_next.app.retry_meta import retry_meta_for_step
|
||||
from biliup_next.app.task_engine import next_runnable_step
|
||||
from biliup_next.app.task_policies import resolve_failure
|
||||
from biliup_next.core.errors import ModuleError
|
||||
from biliup_next.core.models import TaskStep
|
||||
|
||||
|
||||
class _Repo:
|
||||
def __init__(self) -> None:
|
||||
self.steps = [TaskStep(None, "task-1", "transcribe", "running", None, None, 0, None, None)]
|
||||
self.step_updates: list[tuple] = []
|
||||
self.task_updates: list[tuple] = []
|
||||
|
||||
def list_steps(self, task_id: str): # noqa: ANN001
|
||||
return list(self.steps)
|
||||
|
||||
def get_task(self, task_id: str): # noqa: ANN001
|
||||
return SimpleNamespace(id=task_id, status="running")
|
||||
|
||||
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
|
||||
self.step_updates.append((task_id, step_name, status, kwargs))
|
||||
self.steps = [TaskStep(None, task_id, step_name, status, kwargs.get("error_code"), kwargs.get("error_message"), kwargs.get("retry_count", 0), kwargs.get("started_at"), kwargs.get("finished_at"))]
|
||||
|
||||
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||
self.task_updates.append((task_id, status, updated_at))
|
||||
|
||||
|
||||
class TranscribeRetryPolicyTests(unittest.TestCase):
|
||||
def test_retry_meta_reports_wait_window_for_transcribe(self) -> None:
|
||||
step = TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
|
||||
|
||||
payload = retry_meta_for_step(step, {"transcribe": {"retry_schedule_minutes": [10]}})
|
||||
|
||||
self.assertIsNotNone(payload)
|
||||
self.assertFalse(payload["retry_due"])
|
||||
self.assertEqual(payload["retry_wait_seconds"], 600)
|
||||
|
||||
def test_next_runnable_step_waits_for_retryable_transcribe(self) -> None:
|
||||
task = SimpleNamespace(id="task-1", status="failed_retryable")
|
||||
steps = {
|
||||
"transcribe": TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
|
||||
}
|
||||
state = {
|
||||
"settings": {
|
||||
"transcribe": {"retry_schedule_minutes": [10]},
|
||||
"comment": {"enabled": True},
|
||||
"collection": {"enabled": True},
|
||||
"paths": {},
|
||||
"publish": {},
|
||||
}
|
||||
}
|
||||
|
||||
step_name, waiting_payload = next_runnable_step(task, steps, state)
|
||||
|
||||
self.assertIsNone(step_name)
|
||||
self.assertIsNotNone(waiting_payload)
|
||||
self.assertEqual(waiting_payload["step"], "transcribe")
|
||||
|
||||
def test_resolve_failure_adds_transcribe_retry_delay(self) -> None:
|
||||
repo = _Repo()
|
||||
task = SimpleNamespace(id="task-1", status="running")
|
||||
state = {
|
||||
"settings": {
|
||||
"transcribe": {"retry_schedule_minutes": [5, 10]},
|
||||
"publish": {},
|
||||
"comment": {},
|
||||
"paths": {},
|
||||
"collection": {"enabled": True},
|
||||
}
|
||||
}
|
||||
|
||||
result = resolve_failure(task, repo, state, ModuleError(code="GROQ_TRANSCRIBE_FAILED", message="boom", retryable=True))
|
||||
|
||||
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
|
||||
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
170
tests/test_video_links.py
Normal file
170
tests/test_video_links.py
Normal file
@ -0,0 +1,170 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import subprocess
|
||||
|
||||
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list
|
||||
from biliup_next.infra.video_links import link_context_for_task
|
||||
|
||||
|
||||
class VideoLinksTests(unittest.TestCase):
|
||||
def test_fetch_biliup_list_keeps_pubing_videos(self) -> None:
|
||||
output = (
|
||||
"2026-04-22 15:56:43 INFO biliup_cli::uploader: user: test\n"
|
||||
"BVREVIEW\t王海颖唱歌录播 04月22日 15时56分\t审核中\n"
|
||||
"BVPUB\t王海颖唱歌录播 04月20日 22时08分\t开放浏览\n"
|
||||
"BVPRIVATE\t私密视频\t仅自己可见\n"
|
||||
)
|
||||
with patch(
|
||||
"biliup_next.infra.adapters.full_video_locator.subprocess.run",
|
||||
return_value=subprocess.CompletedProcess(["biliup"], 0, stdout=output, stderr=""),
|
||||
):
|
||||
videos = fetch_biliup_list({"biliup_path": "biliup", "cookie_file": "cookies.json"}, max_pages=1)
|
||||
|
||||
self.assertEqual(
|
||||
videos,
|
||||
[
|
||||
{"bvid": "BVREVIEW", "title": "王海颖唱歌录播 04月22日 15时56分"},
|
||||
{"bvid": "BVPUB", "title": "王海颖唱歌录播 04月20日 22时08分"},
|
||||
],
|
||||
)
|
||||
|
||||
def test_previous_live_falls_back_to_biliup_list(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
source_path = Path(tmpdir) / "source.mp4"
|
||||
source_path.write_bytes(b"")
|
||||
task = SimpleNamespace(
|
||||
id="task-current",
|
||||
title="王海颖唱歌录播 04月19日 22时10分",
|
||||
source_path=str(source_path),
|
||||
)
|
||||
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||
|
||||
with patch(
|
||||
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||
return_value=[
|
||||
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月18日 22时06分 共10首歌"},
|
||||
{"bvid": "BVNEWER", "title": "王海颖唱歌录播 04月20日 22时00分"},
|
||||
{"bvid": "BVPREV", "title": "王海颖唱歌录播 04月18日 22时06分"},
|
||||
{"bvid": "BVOLDER", "title": "王海颖唱歌录播 04月17日 22时00分"},
|
||||
],
|
||||
):
|
||||
context = link_context_for_task(task, repo, settings)
|
||||
|
||||
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
|
||||
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVPREV")
|
||||
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
|
||||
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
|
||||
|
||||
def test_previous_live_merges_repo_and_biliup_list_links(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
current_path = root / "current" / "source.mp4"
|
||||
previous_path = root / "previous" / "source.mp4"
|
||||
current_path.parent.mkdir()
|
||||
previous_path.parent.mkdir()
|
||||
current_path.write_bytes(b"")
|
||||
previous_path.write_bytes(b"")
|
||||
(previous_path.parent / "full_video_bvid.txt").write_text("BVLOCALFULL", encoding="utf-8")
|
||||
|
||||
task = SimpleNamespace(
|
||||
id="task-current",
|
||||
title="王海颖唱歌录播 04月19日 22时10分",
|
||||
source_path=str(current_path),
|
||||
)
|
||||
previous_task = SimpleNamespace(
|
||||
id="task-previous",
|
||||
title="王海颖唱歌录播 04月18日 22时06分",
|
||||
source_path=str(previous_path),
|
||||
)
|
||||
current_context = SimpleNamespace(
|
||||
task_id=task.id,
|
||||
streamer="王海颖",
|
||||
session_key="王海颖-0419",
|
||||
segment_started_at="2026-04-19T22:10:00",
|
||||
)
|
||||
previous_context = SimpleNamespace(
|
||||
task_id=previous_task.id,
|
||||
streamer="王海颖",
|
||||
session_key="王海颖-0418",
|
||||
segment_started_at="2026-04-18T22:06:00",
|
||||
full_video_bvid="BVLOCALFULL",
|
||||
)
|
||||
tasks = {task.id: task, previous_task.id: previous_task}
|
||||
contexts = {task.id: current_context, previous_task.id: previous_context}
|
||||
repo = SimpleNamespace(
|
||||
get_task_context=lambda task_id: contexts.get(task_id),
|
||||
get_task=lambda task_id: tasks.get(task_id),
|
||||
find_recent_task_contexts=lambda streamer, limit=50: [current_context, previous_context],
|
||||
)
|
||||
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||
|
||||
with patch(
|
||||
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||
return_value=[
|
||||
{"bvid": "BVPURE", "title": "【王海颖(歌曲纯享版)】04月18日 22时06分 共18首歌"},
|
||||
],
|
||||
):
|
||||
context = link_context_for_task(task, repo, settings)
|
||||
|
||||
self.assertEqual(context["previous_full_video_bvid"], "BVLOCALFULL")
|
||||
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVLOCALFULL")
|
||||
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
|
||||
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
|
||||
|
||||
def test_previous_live_biliup_list_handles_year_boundary(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
source_path = Path(tmpdir) / "source.mp4"
|
||||
source_path.write_bytes(b"")
|
||||
task = SimpleNamespace(
|
||||
id="task-current",
|
||||
title="王海颖唱歌录播 01月01日 22时10分",
|
||||
source_path=str(source_path),
|
||||
)
|
||||
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||
|
||||
with patch(
|
||||
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||
return_value=[
|
||||
{"bvid": "BVPREV", "title": "王海颖唱歌录播 12月31日 22时06分"},
|
||||
],
|
||||
):
|
||||
context = link_context_for_task(task, repo, settings)
|
||||
|
||||
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
|
||||
|
||||
def test_current_full_video_falls_back_to_biliup_list(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
source_path = Path(tmpdir) / "source.mp4"
|
||||
source_path.write_bytes(b"")
|
||||
task = SimpleNamespace(
|
||||
id="task-current",
|
||||
title="王海颖唱歌录播 04月22日 15时56分",
|
||||
source_path=str(source_path),
|
||||
)
|
||||
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||
|
||||
with patch(
|
||||
"biliup_next.infra.adapters.full_video_locator.fetch_biliup_list",
|
||||
return_value=[
|
||||
{"bvid": "BVFULL", "title": "王海颖唱歌录播 04月22日 15时56分"},
|
||||
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月22日 15时56分 共20首歌"},
|
||||
],
|
||||
):
|
||||
context = link_context_for_task(task, repo, settings)
|
||||
|
||||
self.assertEqual(context["current_full_video_bvid"], "BVFULL")
|
||||
self.assertEqual(context["current_full_video_link"], "https://www.bilibili.com/video/BVFULL")
|
||||
self.assertEqual((source_path.parent / "full_video_bvid.txt").read_text(encoding="utf-8"), "BVFULL")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
116
tests/test_workspace_cleanup.py
Normal file
116
tests/test_workspace_cleanup.py
Normal file
@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from biliup_next.core.models import Task, utc_now_iso
|
||||
from biliup_next.infra.workspace_cleanup import WorkspaceCleanupService
|
||||
|
||||
|
||||
class _FakeRepo:
|
||||
def __init__(self, tasks: list[Task], session_key: str | None = None) -> None:
|
||||
self.tasks = {task.id: task for task in tasks}
|
||||
self.session_key = session_key
|
||||
self.deleted_artifacts: list[tuple[str, str]] = []
|
||||
self.deleted_artifact_paths: list[tuple[str, str]] = []
|
||||
|
||||
def get_task(self, task_id: str) -> Task | None:
|
||||
return self.tasks.get(task_id)
|
||||
|
||||
def get_task_context(self, task_id: str): # noqa: ANN201
|
||||
if self.session_key is None or task_id not in self.tasks:
|
||||
return None
|
||||
return SimpleNamespace(task_id=task_id, session_key=self.session_key)
|
||||
|
||||
def list_task_contexts_by_session_key(self, session_key: str): # noqa: ANN201
|
||||
if session_key != self.session_key:
|
||||
return []
|
||||
return [SimpleNamespace(task_id=task_id, session_key=session_key) for task_id in self.tasks]
|
||||
|
||||
def delete_artifacts(self, task_id: str, artifact_type: str) -> None:
|
||||
self.deleted_artifacts.append((task_id, artifact_type))
|
||||
|
||||
def delete_artifact_by_path(self, task_id: str, path: str) -> None:
|
||||
self.deleted_artifact_paths.append((task_id, path))
|
||||
|
||||
|
||||
def _make_task(task_id: str, root: Path) -> Task:
|
||||
now = utc_now_iso()
|
||||
work_dir = root / task_id
|
||||
work_dir.mkdir(parents=True)
|
||||
source = work_dir / "source.mp4"
|
||||
source.write_bytes(b"source")
|
||||
for dirname in ("split_video", "publish_video"):
|
||||
video_dir = work_dir / dirname
|
||||
video_dir.mkdir()
|
||||
(video_dir / "01_song.mp4").write_bytes(b"clip")
|
||||
return Task(task_id, "local_file", str(source), task_id, "collection_synced", now, now)
|
||||
|
||||
|
||||
class WorkspaceCleanupServiceTests(unittest.TestCase):
|
||||
def test_cleanup_removes_source_split_and_publish_video_for_single_task(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
task = _make_task("task-1", root)
|
||||
repo = _FakeRepo([task])
|
||||
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||
task.id,
|
||||
{
|
||||
"delete_source_video_after_collection_synced": True,
|
||||
"delete_split_videos_after_collection_synced": True,
|
||||
},
|
||||
)
|
||||
|
||||
work_dir = root / "task-1"
|
||||
self.assertFalse((work_dir / "source.mp4").exists())
|
||||
self.assertFalse((work_dir / "split_video").exists())
|
||||
self.assertFalse((work_dir / "publish_video").exists())
|
||||
self.assertEqual(result["task_ids"], ["task-1"])
|
||||
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video")])
|
||||
self.assertEqual(repo.deleted_artifact_paths, [("task-1", str((work_dir / "source.mp4").resolve()))])
|
||||
|
||||
def test_cleanup_removes_all_tasks_in_same_session(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
task_1 = _make_task("task-1", root)
|
||||
task_2 = _make_task("task-2", root)
|
||||
repo = _FakeRepo([task_1, task_2], session_key="session-1")
|
||||
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||
task_1.id,
|
||||
{
|
||||
"delete_source_video_after_collection_synced": True,
|
||||
"delete_split_videos_after_collection_synced": True,
|
||||
},
|
||||
)
|
||||
|
||||
for task_id in ("task-1", "task-2"):
|
||||
work_dir = root / task_id
|
||||
self.assertFalse((work_dir / "source.mp4").exists())
|
||||
self.assertFalse((work_dir / "split_video").exists())
|
||||
self.assertFalse((work_dir / "publish_video").exists())
|
||||
self.assertEqual(result["task_ids"], ["task-1", "task-2"])
|
||||
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video"), ("task-2", "clip_video")])
|
||||
|
||||
def test_cleanup_skips_missing_source_video(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
task = _make_task("task-1", root)
|
||||
source = Path(task.source_path)
|
||||
source.unlink()
|
||||
repo = _FakeRepo([task])
|
||||
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||
task.id,
|
||||
{
|
||||
"delete_source_video_after_collection_synced": True,
|
||||
"delete_split_videos_after_collection_synced": False,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertIn(str(source.resolve()), result["skipped"])
|
||||
self.assertEqual(repo.deleted_artifact_paths, [])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user