feat: package docker deployment and publish flow
This commit is contained in:
19
.dockerignore
Normal file
19
.dockerignore
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
.git
|
||||||
|
.venv
|
||||||
|
.pytest_cache
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
data/
|
||||||
|
runtime/cookies.json
|
||||||
|
runtime/upload_config.json
|
||||||
|
runtime/biliup
|
||||||
|
runtime/codex/
|
||||||
|
runtime/logs/
|
||||||
|
|
||||||
|
frontend/node_modules/
|
||||||
|
frontend/dist/
|
||||||
|
|
||||||
|
.env
|
||||||
|
config/settings.json
|
||||||
|
config/settings.staged.json
|
||||||
49
.env.example
Normal file
49
.env.example
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Web/API port exposed on the host.
|
||||||
|
BILIUP_NEXT_PORT=8000
|
||||||
|
|
||||||
|
# Image used by both api and worker. Override this when using a versioned tag
|
||||||
|
# or a private registry image, for example 192.168.1.100:25490/biliup-next:20260420.
|
||||||
|
BILIUP_NEXT_IMAGE=biliup-next:local
|
||||||
|
|
||||||
|
# Worker polling interval in seconds.
|
||||||
|
WORKER_INTERVAL=5
|
||||||
|
|
||||||
|
# Container timezone.
|
||||||
|
TZ=Asia/Shanghai
|
||||||
|
|
||||||
|
# Optional container outbound proxy. In Docker Desktop/WSL, host.docker.internal
|
||||||
|
# points to the Windows host; set this to your local proxy port.
|
||||||
|
# These values are also passed as Docker build args for apt/pip/npm.
|
||||||
|
# HTTP_PROXY=http://host.docker.internal:7897
|
||||||
|
# HTTPS_PROXY=http://host.docker.internal:7897
|
||||||
|
# ALL_PROXY=http://host.docker.internal:7897
|
||||||
|
# NO_PROXY=localhost,127.0.0.1,api,worker
|
||||||
|
#
|
||||||
|
# Docker build-time proxy. Separate names avoid being overridden by host
|
||||||
|
# HTTP_PROXY/HTTPS_PROXY when Compose interpolates build args.
|
||||||
|
# DOCKER_BUILD_HTTP_PROXY=http://host.docker.internal:7897
|
||||||
|
# DOCKER_BUILD_HTTPS_PROXY=http://host.docker.internal:7897
|
||||||
|
# DOCKER_BUILD_ALL_PROXY=http://host.docker.internal:7897
|
||||||
|
# DOCKER_BUILD_NO_PROXY=localhost,127.0.0.1,api,worker
|
||||||
|
|
||||||
|
# Required for Groq transcription. Prefer this env var over writing the key
|
||||||
|
# directly into config/settings.json.
|
||||||
|
GROQ_API_KEY=
|
||||||
|
# Optional key pool. Use a JSON array; keys here are tried before GROQ_API_KEY.
|
||||||
|
# GROQ_API_KEYS=["gsk_xxx","gsk_yyy"]
|
||||||
|
|
||||||
|
# Optional for the Codex song detector when you do not mount an existing
|
||||||
|
# Codex login state into runtime/codex.
|
||||||
|
OPENAI_API_KEY=
|
||||||
|
|
||||||
|
# Bilibili collection IDs.
|
||||||
|
# A: live full-video collection
|
||||||
|
# B: live split/pure-song collection
|
||||||
|
COLLECTION_SEASON_ID_A=7196643
|
||||||
|
COLLECTION_SEASON_ID_B=7196624
|
||||||
|
|
||||||
|
# Optional explicit config overrides. The generic format is:
|
||||||
|
# BILIUP_NEXT__GROUP__FIELD=value
|
||||||
|
#
|
||||||
|
# BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
|
||||||
|
# BILIUP_NEXT__PUBLISH__RATE_LIMIT_RETRY_SCHEDULE_MINUTES=[15,30,60]
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,4 +1,8 @@
|
|||||||
.venv/
|
.venv/
|
||||||
|
.codex
|
||||||
|
.codex/
|
||||||
|
.env
|
||||||
|
.tmp-tests/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
@ -12,6 +16,7 @@ systemd/rendered/
|
|||||||
runtime/cookies.json
|
runtime/cookies.json
|
||||||
runtime/upload_config.json
|
runtime/upload_config.json
|
||||||
runtime/biliup
|
runtime/biliup
|
||||||
|
runtime/codex/
|
||||||
runtime/logs/
|
runtime/logs/
|
||||||
|
|
||||||
frontend/node_modules/
|
frontend/node_modules/
|
||||||
|
|||||||
61
Dockerfile
Normal file
61
Dockerfile
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
FROM node:24-bookworm-slim AS frontend-builder
|
||||||
|
|
||||||
|
ARG HTTP_PROXY
|
||||||
|
ARG HTTPS_PROXY
|
||||||
|
ARG ALL_PROXY
|
||||||
|
ARG NO_PROXY
|
||||||
|
ARG http_proxy
|
||||||
|
ARG https_proxy
|
||||||
|
ARG all_proxy
|
||||||
|
ARG no_proxy
|
||||||
|
|
||||||
|
WORKDIR /build/frontend
|
||||||
|
COPY frontend/package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY frontend/ ./
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
FROM python:3.12-slim AS app
|
||||||
|
|
||||||
|
ARG HTTP_PROXY
|
||||||
|
ARG HTTPS_PROXY
|
||||||
|
ARG ALL_PROXY
|
||||||
|
ARG NO_PROXY
|
||||||
|
ARG http_proxy
|
||||||
|
ARG https_proxy
|
||||||
|
ARG all_proxy
|
||||||
|
ARG no_proxy
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PIP_NO_CACHE_DIR=1 \
|
||||||
|
BILIUP_NEXT_CONTAINER=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
ffmpeg \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY pyproject.toml README.md ./
|
||||||
|
COPY src ./src
|
||||||
|
COPY config ./config
|
||||||
|
COPY runtime/README.md runtime/cookies.example.json runtime/upload_config.example.json ./runtime/
|
||||||
|
COPY --from=frontend-builder /build/frontend/dist ./frontend/dist
|
||||||
|
COPY --from=frontend-builder /usr/local/bin/node /usr/local/bin/node
|
||||||
|
COPY --from=frontend-builder /usr/local/lib/node_modules /usr/local/lib/node_modules
|
||||||
|
|
||||||
|
RUN pip install --editable . \
|
||||||
|
&& pip install yt-dlp \
|
||||||
|
&& ln -sf ../lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm \
|
||||||
|
&& ln -sf ../lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx \
|
||||||
|
&& npm install -g @openai/codex
|
||||||
|
|
||||||
|
RUN mkdir -p /app/data/workspace/stage /app/data/workspace/session /app/data/workspace/backup /app/runtime/logs /root/.codex
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
43
README.md
43
README.md
@ -59,6 +59,10 @@ bash setup.sh
|
|||||||
|
|
||||||
- `docs/cold-start-checklist.md`
|
- `docs/cold-start-checklist.md`
|
||||||
|
|
||||||
|
发布流程、输出文案和评论示例见:
|
||||||
|
|
||||||
|
- `docs/publish-output-examples.md`
|
||||||
|
|
||||||
浏览器访问:
|
浏览器访问:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
@ -192,6 +196,29 @@ cd /home/theshy/biliup/biliup-next
|
|||||||
- 内容按 `P1/P2/P3` 分组
|
- 内容按 `P1/P2/P3` 分组
|
||||||
- 依赖 `full_video_bvid.txt` 或通过标题匹配解析到完整版 BV
|
- 依赖 `full_video_bvid.txt` 或通过标题匹配解析到完整版 BV
|
||||||
|
|
||||||
|
评论格式和投稿文案一样,优先从 `runtime/upload_config.json` 读取。可编辑字段:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||||
|
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
常用变量:
|
||||||
|
|
||||||
|
- 链接:`{current_full_video_link}`、`{current_pure_video_link}`、`{previous_full_video_link}`、`{previous_pure_video_link}`
|
||||||
|
- 分段与序号:`{part_index}`、`{song_index}`
|
||||||
|
- 纯享歌单:`{title}`、`{artist}`、`{artist_suffix}`、`{song_text}`
|
||||||
|
- 完整版时间轴:`{line_text}`
|
||||||
|
|
||||||
|
如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过。
|
||||||
|
|
||||||
清理默认关闭:
|
清理默认关闭:
|
||||||
|
|
||||||
- `cleanup.delete_source_video_after_collection_synced = false`
|
- `cleanup.delete_source_video_after_collection_synced = false`
|
||||||
@ -201,11 +228,14 @@ cd /home/theshy/biliup/biliup-next
|
|||||||
|
|
||||||
## Full Video BV Input
|
## Full Video BV Input
|
||||||
|
|
||||||
完整版 `BV` 目前支持 3 种来源:
|
完整版 `BV` 目前支持 4 种来源:
|
||||||
|
|
||||||
- `stage/*.meta.json` 中的 `full_video_bvid`
|
- `stage/*.meta.json` 中的 `full_video_bvid`
|
||||||
- 前端 / API 手工绑定
|
- 前端 / API 手工绑定
|
||||||
- webhook:`POST /webhooks/full-video-uploaded`
|
- webhook:`POST /webhooks/full-video-uploaded`
|
||||||
|
- `biliup list` 标题匹配,包含 `开放浏览` 和 `审核中` 状态
|
||||||
|
|
||||||
|
只要完整版上传后已经生成 BV,即使仍在审核中,也可以被用于纯享版简介、动态和评论互链。
|
||||||
|
|
||||||
推荐 webhook 负载:
|
推荐 webhook 负载:
|
||||||
|
|
||||||
@ -320,3 +350,14 @@ curl -X POST http://127.0.0.1:8787/tasks \
|
|||||||
|
|
||||||
- `ingest.provider = bilibili_url`
|
- `ingest.provider = bilibili_url`
|
||||||
- `ingest.yt_dlp_cmd = yt-dlp`
|
- `ingest.yt_dlp_cmd = yt-dlp`
|
||||||
|
|
||||||
|
## Docker Compose Deployment
|
||||||
|
|
||||||
|
如果希望用容器方式一键运行 API 和 worker,请参考 [README_DEPLOY.md](README_DEPLOY.md)。
|
||||||
|
|
||||||
|
快速入口:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/init-docker-config.sh
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|||||||
176
README_DEPLOY.md
Normal file
176
README_DEPLOY.md
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
# Docker Compose Deployment
|
||||||
|
|
||||||
|
This deployment runs the API and worker as two services from the same image.
|
||||||
|
Runtime state, credentials, staged videos, generated sessions, and the SQLite
|
||||||
|
database stay on the host through bind mounts.
|
||||||
|
|
||||||
|
## 1. Initialize Local Files
|
||||||
|
|
||||||
|
```bash
|
||||||
|
chmod +x scripts/init-docker-config.sh
|
||||||
|
./scripts/init-docker-config.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates these files if they do not already exist:
|
||||||
|
|
||||||
|
```text
|
||||||
|
.env
|
||||||
|
config/settings.json
|
||||||
|
runtime/cookies.json
|
||||||
|
runtime/upload_config.json
|
||||||
|
data/workspace/
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. Edit Required Secrets And IDs
|
||||||
|
|
||||||
|
Edit `.env`:
|
||||||
|
|
||||||
|
```env
|
||||||
|
GROQ_API_KEY=your_groq_key
|
||||||
|
OPENAI_API_KEY=your_openai_key_if_using_codex
|
||||||
|
COLLECTION_SEASON_ID_A=7196643
|
||||||
|
COLLECTION_SEASON_ID_B=7196624
|
||||||
|
```
|
||||||
|
|
||||||
|
Edit `runtime/cookies.json` and `runtime/upload_config.json` with real Bilibili
|
||||||
|
credentials and upload metadata.
|
||||||
|
|
||||||
|
`runtime/upload_config.json` also controls pure-video title, description,
|
||||||
|
dynamic text, and top-comment formatting. Existing deployments mount
|
||||||
|
`./runtime` from the host, so updating the image does not overwrite this file.
|
||||||
|
When you want to change output text, edit the host file directly.
|
||||||
|
|
||||||
|
Common output templates:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template": {
|
||||||
|
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||||
|
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}\n\n本视频为歌曲纯享切片,适合只听歌曲。",
|
||||||
|
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}"
|
||||||
|
},
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次直播:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次直播:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Supported comment variables:
|
||||||
|
|
||||||
|
- `{current_full_video_link}` / `{current_pure_video_link}`
|
||||||
|
- `{previous_full_video_link}` / `{previous_pure_video_link}`
|
||||||
|
- `{part_index}` / `{song_index}`
|
||||||
|
- `{title}` / `{artist}` / `{artist_suffix}` / `{song_text}` / `{line_text}`
|
||||||
|
|
||||||
|
If a comment header line contains an empty link variable, that whole line is
|
||||||
|
omitted. This prevents comments from showing blank `上次直播:` lines when the
|
||||||
|
previous live video cannot be found.
|
||||||
|
|
||||||
|
Provide the `biliup` binary at:
|
||||||
|
|
||||||
|
```text
|
||||||
|
runtime/biliup
|
||||||
|
```
|
||||||
|
|
||||||
|
It must be executable inside the container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
chmod +x runtime/biliup
|
||||||
|
```
|
||||||
|
|
||||||
|
The image installs the `codex` CLI for `song_detect.provider=codex`. Provide
|
||||||
|
Codex auth in one of these ways:
|
||||||
|
|
||||||
|
```text
|
||||||
|
OPENAI_API_KEY in .env
|
||||||
|
runtime/codex mounted to /root/.codex
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
Open:
|
||||||
|
|
||||||
|
```text
|
||||||
|
http://127.0.0.1:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
Drop videos into:
|
||||||
|
|
||||||
|
```text
|
||||||
|
data/workspace/stage/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f api
|
||||||
|
docker compose logs -f worker
|
||||||
|
docker compose restart worker
|
||||||
|
docker compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
Run one scheduler cycle manually:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose run --rm worker biliup-next run-once
|
||||||
|
```
|
||||||
|
|
||||||
|
Run doctor:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose run --rm api biliup-next doctor
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Overrides
|
||||||
|
|
||||||
|
`config/settings.json` is still the base configuration. Environment variables
|
||||||
|
override selected values at runtime.
|
||||||
|
|
||||||
|
The Compose file already forces container-safe paths such as
|
||||||
|
`/app/data/workspace` and `/app/runtime/cookies.json`, so an existing local
|
||||||
|
`config/settings.json` with host paths can still be mounted safely.
|
||||||
|
|
||||||
|
Generic format:
|
||||||
|
|
||||||
|
```text
|
||||||
|
BILIUP_NEXT__GROUP__FIELD=value
|
||||||
|
```
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
```env
|
||||||
|
BILIUP_NEXT__PATHS__STAGE_DIR=/app/data/workspace/stage
|
||||||
|
BILIUP_NEXT__PUBLISH__BILIUP_PATH=/app/runtime/biliup
|
||||||
|
BILIUP_NEXT__PUBLISH__RETRY_SCHEDULE_MINUTES=[15,5,5,5,5]
|
||||||
|
```
|
||||||
|
|
||||||
|
Convenience aliases:
|
||||||
|
|
||||||
|
```env
|
||||||
|
GROQ_API_KEY=...
|
||||||
|
COLLECTION_SEASON_ID_A=7196643
|
||||||
|
COLLECTION_SEASON_ID_B=7196624
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Persistence
|
||||||
|
|
||||||
|
These host paths are mounted into the containers:
|
||||||
|
|
||||||
|
```text
|
||||||
|
./config -> /app/config
|
||||||
|
./runtime -> /app/runtime
|
||||||
|
./data/workspace -> /app/data/workspace
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not store `cookies.json`, Groq keys, or generated workspace data in the image.
|
||||||
|
They should stay in the mounted host directories.
|
||||||
127
config/settings.docker.example.json
Normal file
127
config/settings.docker.example.json
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
{
|
||||||
|
"runtime": {
|
||||||
|
"database_path": "/app/data/workspace/biliup_next.db",
|
||||||
|
"control_token": "",
|
||||||
|
"log_level": "INFO"
|
||||||
|
},
|
||||||
|
"paths": {
|
||||||
|
"stage_dir": "/app/data/workspace/stage",
|
||||||
|
"backup_dir": "/app/data/workspace/backup",
|
||||||
|
"session_dir": "/app/data/workspace/session",
|
||||||
|
"cookies_file": "/app/runtime/cookies.json",
|
||||||
|
"upload_config_file": "/app/runtime/upload_config.json"
|
||||||
|
},
|
||||||
|
"scheduler": {
|
||||||
|
"candidate_scan_limit": 500,
|
||||||
|
"max_tasks_per_cycle": 50,
|
||||||
|
"prioritize_retry_due": true,
|
||||||
|
"oldest_first": true,
|
||||||
|
"status_priority": [
|
||||||
|
"failed_retryable",
|
||||||
|
"created",
|
||||||
|
"transcribed",
|
||||||
|
"songs_detected",
|
||||||
|
"split_done",
|
||||||
|
"published",
|
||||||
|
"commented",
|
||||||
|
"collection_synced"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"ingest": {
|
||||||
|
"provider": "local_file",
|
||||||
|
"min_duration_seconds": 900,
|
||||||
|
"ffprobe_bin": "ffprobe",
|
||||||
|
"yt_dlp_cmd": "yt-dlp",
|
||||||
|
"yt_dlp_format": "",
|
||||||
|
"allowed_extensions": [
|
||||||
|
".mp4",
|
||||||
|
".flv",
|
||||||
|
".mkv",
|
||||||
|
".mov"
|
||||||
|
],
|
||||||
|
"stage_min_free_space_mb": 1024,
|
||||||
|
"stability_wait_seconds": 30,
|
||||||
|
"session_gap_minutes": 60,
|
||||||
|
"meta_sidecar_enabled": true,
|
||||||
|
"meta_sidecar_suffix": ".meta.json"
|
||||||
|
},
|
||||||
|
"transcribe": {
|
||||||
|
"provider": "groq",
|
||||||
|
"groq_api_key": "",
|
||||||
|
"groq_api_keys": [],
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 12,
|
||||||
|
"request_timeout_seconds": 180,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 30,
|
||||||
|
"serialize_groq_requests": true,
|
||||||
|
"retry_count": 3,
|
||||||
|
"retry_backoff_seconds": 300,
|
||||||
|
"retry_schedule_minutes": [
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
15
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"song_detect": {
|
||||||
|
"provider": "codex",
|
||||||
|
"codex_cmd": "codex",
|
||||||
|
"qwen_cmd": "qwen",
|
||||||
|
"poll_interval_seconds": 2,
|
||||||
|
"retry_count": 3,
|
||||||
|
"retry_backoff_seconds": 300,
|
||||||
|
"retry_schedule_minutes": [
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
15
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"split": {
|
||||||
|
"provider": "ffmpeg_copy",
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"poll_interval_seconds": 2,
|
||||||
|
"min_free_space_mb": 2048
|
||||||
|
},
|
||||||
|
"publish": {
|
||||||
|
"provider": "biliup_cli",
|
||||||
|
"biliup_path": "/app/runtime/biliup",
|
||||||
|
"cookie_file": "/app/runtime/cookies.json",
|
||||||
|
"retry_count": 5,
|
||||||
|
"retry_schedule_minutes": [
|
||||||
|
15,
|
||||||
|
5,
|
||||||
|
5,
|
||||||
|
5,
|
||||||
|
5
|
||||||
|
],
|
||||||
|
"retry_backoff_seconds": 300,
|
||||||
|
"command_timeout_seconds": 1800,
|
||||||
|
"rate_limit_retry_schedule_minutes": [
|
||||||
|
15,
|
||||||
|
30,
|
||||||
|
60
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"comment": {
|
||||||
|
"provider": "bilibili_top_comment",
|
||||||
|
"enabled": true,
|
||||||
|
"max_retries": 5,
|
||||||
|
"base_delay_seconds": 180,
|
||||||
|
"poll_interval_seconds": 10,
|
||||||
|
"post_split_comment": true,
|
||||||
|
"post_full_video_timeline_comment": true
|
||||||
|
},
|
||||||
|
"collection": {
|
||||||
|
"provider": "bilibili_collection",
|
||||||
|
"enabled": true,
|
||||||
|
"season_id_a": 7196643,
|
||||||
|
"season_id_b": 7196624,
|
||||||
|
"allow_fuzzy_full_video_match": false,
|
||||||
|
"append_collection_a_new_to_end": true,
|
||||||
|
"append_collection_b_new_to_end": true
|
||||||
|
},
|
||||||
|
"cleanup": {
|
||||||
|
"delete_source_video_after_collection_synced": false,
|
||||||
|
"delete_split_videos_after_collection_synced": false
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,15 +1,15 @@
|
|||||||
{
|
{
|
||||||
"runtime": {
|
"runtime": {
|
||||||
"database_path": "data/workspace/biliup_next.db",
|
"database_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/biliup_next.db",
|
||||||
"control_token": "",
|
"control_token": "",
|
||||||
"log_level": "INFO"
|
"log_level": "INFO"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"stage_dir": "data/workspace/stage",
|
"stage_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/stage",
|
||||||
"backup_dir": "data/workspace/backup",
|
"backup_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/backup",
|
||||||
"session_dir": "data/workspace/session",
|
"session_dir": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/data/workspace/session",
|
||||||
"cookies_file": "runtime/cookies.json",
|
"cookies_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
|
||||||
"upload_config_file": "runtime/upload_config.json"
|
"upload_config_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/upload_config.json"
|
||||||
},
|
},
|
||||||
"scheduler": {
|
"scheduler": {
|
||||||
"candidate_scan_limit": 500,
|
"candidate_scan_limit": 500,
|
||||||
@ -31,7 +31,7 @@
|
|||||||
"provider": "local_file",
|
"provider": "local_file",
|
||||||
"min_duration_seconds": 900,
|
"min_duration_seconds": 900,
|
||||||
"ffprobe_bin": "ffprobe",
|
"ffprobe_bin": "ffprobe",
|
||||||
"yt_dlp_cmd": "yt-dlp",
|
"yt_dlp_cmd": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/.venv/bin/yt-dlp",
|
||||||
"yt_dlp_format": "",
|
"yt_dlp_format": "",
|
||||||
"allowed_extensions": [
|
"allowed_extensions": [
|
||||||
".mp4",
|
".mp4",
|
||||||
@ -47,15 +47,34 @@
|
|||||||
},
|
},
|
||||||
"transcribe": {
|
"transcribe": {
|
||||||
"provider": "groq",
|
"provider": "groq",
|
||||||
"groq_api_key": "",
|
"groq_api_key": "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",
|
||||||
|
"groq_api_keys": [],
|
||||||
"ffmpeg_bin": "ffmpeg",
|
"ffmpeg_bin": "ffmpeg",
|
||||||
"max_file_size_mb": 23
|
"max_file_size_mb": 12,
|
||||||
|
"request_timeout_seconds": 180,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 30,
|
||||||
|
"serialize_groq_requests": true,
|
||||||
|
"retry_count": 3,
|
||||||
|
"retry_backoff_seconds": 300,
|
||||||
|
"retry_schedule_minutes": [
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
15
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"song_detect": {
|
"song_detect": {
|
||||||
"provider": "qwen_cli",
|
"provider": "codex",
|
||||||
"codex_cmd": "codex",
|
"codex_cmd": "codex",
|
||||||
"qwen_cmd": "qwen",
|
"qwen_cmd": "qwen",
|
||||||
"poll_interval_seconds": 2
|
"poll_interval_seconds": 2,
|
||||||
|
"retry_count": 3,
|
||||||
|
"retry_backoff_seconds": 300,
|
||||||
|
"retry_schedule_minutes": [
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
15
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"split": {
|
"split": {
|
||||||
"provider": "ffmpeg_copy",
|
"provider": "ffmpeg_copy",
|
||||||
@ -65,8 +84,8 @@
|
|||||||
},
|
},
|
||||||
"publish": {
|
"publish": {
|
||||||
"provider": "biliup_cli",
|
"provider": "biliup_cli",
|
||||||
"biliup_path": "runtime/biliup",
|
"biliup_path": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/biliup",
|
||||||
"cookie_file": "runtime/cookies.json",
|
"cookie_file": "/mnt/f/Codecases/2026-04-14_biliup-next/biliup-next/runtime/cookies.json",
|
||||||
"retry_count": 5,
|
"retry_count": 5,
|
||||||
"retry_schedule_minutes": [
|
"retry_schedule_minutes": [
|
||||||
15,
|
15,
|
||||||
@ -78,9 +97,9 @@
|
|||||||
"retry_backoff_seconds": 300,
|
"retry_backoff_seconds": 300,
|
||||||
"command_timeout_seconds": 1800,
|
"command_timeout_seconds": 1800,
|
||||||
"rate_limit_retry_schedule_minutes": [
|
"rate_limit_retry_schedule_minutes": [
|
||||||
|
15,
|
||||||
30,
|
30,
|
||||||
60,
|
60
|
||||||
120
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"comment": {
|
"comment": {
|
||||||
@ -95,8 +114,8 @@
|
|||||||
"collection": {
|
"collection": {
|
||||||
"provider": "bilibili_collection",
|
"provider": "bilibili_collection",
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"season_id_a": 0,
|
"season_id_a": 7196643,
|
||||||
"season_id_b": 0,
|
"season_id_b": 7196624,
|
||||||
"allow_fuzzy_full_video_match": false,
|
"allow_fuzzy_full_video_match": false,
|
||||||
"append_collection_a_new_to_end": true,
|
"append_collection_a_new_to_end": true,
|
||||||
"append_collection_b_new_to_end": true
|
"append_collection_b_new_to_end": true
|
||||||
|
|||||||
@ -229,6 +229,16 @@
|
|||||||
"description": "用于调用 Groq 转录 API。",
|
"description": "用于调用 Groq 转录 API。",
|
||||||
"sensitive": true
|
"sensitive": true
|
||||||
},
|
},
|
||||||
|
"groq_api_keys": {
|
||||||
|
"type": "array",
|
||||||
|
"default": [],
|
||||||
|
"title": "Groq API Keys",
|
||||||
|
"ui_order": 12,
|
||||||
|
"ui_widget": "secret_list",
|
||||||
|
"items": { "type": "string" },
|
||||||
|
"description": "可选 Groq API Key 池。遇到单个 key 限流时会自动切换下一个 key;为空时使用 groq_api_key。",
|
||||||
|
"sensitive": true
|
||||||
|
},
|
||||||
"ffmpeg_bin": {
|
"ffmpeg_bin": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "ffmpeg",
|
"default": "ffmpeg",
|
||||||
@ -238,10 +248,66 @@
|
|||||||
},
|
},
|
||||||
"max_file_size_mb": {
|
"max_file_size_mb": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 23,
|
"default": 12,
|
||||||
"title": "Max File Size MB",
|
"title": "Max File Size MB",
|
||||||
"ui_order": 40,
|
"ui_order": 40,
|
||||||
"minimum": 1
|
"minimum": 1,
|
||||||
|
"description": "Groq 音频分片目标上限。实际切分会额外保留安全余量,避免贴近上传限制。"
|
||||||
|
},
|
||||||
|
"request_timeout_seconds": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 180,
|
||||||
|
"title": "Request Timeout Seconds",
|
||||||
|
"ui_order": 50,
|
||||||
|
"minimum": 1,
|
||||||
|
"description": "单个 Groq 转录请求的超时时间。"
|
||||||
|
},
|
||||||
|
"request_max_retries": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 1,
|
||||||
|
"title": "Request Max Retries",
|
||||||
|
"ui_order": 60,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "单个音频分片在超时、限流或连接错误时的请求级重试次数。"
|
||||||
|
},
|
||||||
|
"request_retry_backoff_seconds": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 30,
|
||||||
|
"title": "Request Retry Backoff Seconds",
|
||||||
|
"ui_order": 70,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "Groq 请求级重试之间的等待时间。"
|
||||||
|
},
|
||||||
|
"serialize_groq_requests": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": true,
|
||||||
|
"title": "Serialize Groq Requests",
|
||||||
|
"ui_order": 75,
|
||||||
|
"description": "是否串行化 Groq 分片上传请求,避免多个 worker 或多个任务同时上传导致超时。"
|
||||||
|
},
|
||||||
|
"retry_count": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 3,
|
||||||
|
"title": "Task Retry Count",
|
||||||
|
"ui_order": 80,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "transcribe 步骤允许的任务级失败重试次数。"
|
||||||
|
},
|
||||||
|
"retry_backoff_seconds": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 300,
|
||||||
|
"title": "Task Retry Backoff Seconds",
|
||||||
|
"ui_order": 90,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "未配置 retry_schedule_minutes 时,transcribe 任务级重试的等待时间。"
|
||||||
|
},
|
||||||
|
"retry_schedule_minutes": {
|
||||||
|
"type": "array",
|
||||||
|
"default": [5, 10, 15],
|
||||||
|
"title": "Task Retry Schedule Minutes",
|
||||||
|
"ui_order": 100,
|
||||||
|
"items": { "type": "integer", "minimum": 0 },
|
||||||
|
"description": "transcribe 任务级失败后的自动重试等待时间。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"song_detect": {
|
"song_detect": {
|
||||||
@ -275,6 +341,30 @@
|
|||||||
"title": "Poll Interval Seconds",
|
"title": "Poll Interval Seconds",
|
||||||
"ui_order": 30,
|
"ui_order": 30,
|
||||||
"minimum": 1
|
"minimum": 1
|
||||||
|
},
|
||||||
|
"retry_count": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 3,
|
||||||
|
"title": "Task Retry Count",
|
||||||
|
"ui_order": 40,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "song_detect 步骤允许的任务级失败重试次数。认证失败会直接进入人工失败,不会重试。"
|
||||||
|
},
|
||||||
|
"retry_backoff_seconds": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 300,
|
||||||
|
"title": "Task Retry Backoff Seconds",
|
||||||
|
"ui_order": 50,
|
||||||
|
"minimum": 0,
|
||||||
|
"description": "未配置 retry_schedule_minutes 时,song_detect 任务级重试的等待时间。"
|
||||||
|
},
|
||||||
|
"retry_schedule_minutes": {
|
||||||
|
"type": "array",
|
||||||
|
"default": [5, 10, 15],
|
||||||
|
"title": "Task Retry Schedule Minutes",
|
||||||
|
"ui_order": 60,
|
||||||
|
"items": { "type": "integer", "minimum": 0 },
|
||||||
|
"description": "song_detect 任务级失败后的自动重试等待时间。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"split": {
|
"split": {
|
||||||
@ -375,9 +465,9 @@
|
|||||||
"rate_limit_retry_schedule_minutes": {
|
"rate_limit_retry_schedule_minutes": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"default": [
|
"default": [
|
||||||
|
15,
|
||||||
30,
|
30,
|
||||||
60,
|
60
|
||||||
120
|
|
||||||
],
|
],
|
||||||
"title": "Rate Limit Retry Schedule Minutes",
|
"title": "Rate Limit Retry Schedule Minutes",
|
||||||
"ui_order": 70,
|
"ui_order": 70,
|
||||||
|
|||||||
@ -27,6 +27,7 @@
|
|||||||
"transcribe": {
|
"transcribe": {
|
||||||
"provider": "groq",
|
"provider": "groq",
|
||||||
"groq_api_key": "",
|
"groq_api_key": "",
|
||||||
|
"groq_api_keys": [],
|
||||||
"ffmpeg_bin": "ffmpeg",
|
"ffmpeg_bin": "ffmpeg",
|
||||||
"max_file_size_mb": 23
|
"max_file_size_mb": 23
|
||||||
},
|
},
|
||||||
|
|||||||
74
docker-compose.yml
Normal file
74
docker-compose.yml
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
args:
|
||||||
|
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||||
|
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||||
|
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||||
|
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
|
||||||
|
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||||
|
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||||
|
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||||
|
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
|
||||||
|
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
|
||||||
|
command: ["biliup-next", "serve", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
env_file:
|
||||||
|
- path: .env
|
||||||
|
required: false
|
||||||
|
environment:
|
||||||
|
TZ: ${TZ:-Asia/Shanghai}
|
||||||
|
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
|
||||||
|
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
|
||||||
|
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
|
||||||
|
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
|
||||||
|
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
|
||||||
|
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
|
||||||
|
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
|
||||||
|
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
|
||||||
|
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
|
||||||
|
ports:
|
||||||
|
- "${BILIUP_NEXT_PORT:-8000}:8000"
|
||||||
|
volumes:
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./runtime:/app/runtime
|
||||||
|
- ./data/workspace:/app/data/workspace
|
||||||
|
- ./runtime/codex:/root/.codex
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
worker:
|
||||||
|
image: ${BILIUP_NEXT_IMAGE:-biliup-next:local}
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
args:
|
||||||
|
HTTP_PROXY: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||||
|
HTTPS_PROXY: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||||
|
ALL_PROXY: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||||
|
NO_PROXY: ${DOCKER_BUILD_NO_PROXY:-}
|
||||||
|
http_proxy: ${DOCKER_BUILD_HTTP_PROXY:-}
|
||||||
|
https_proxy: ${DOCKER_BUILD_HTTPS_PROXY:-}
|
||||||
|
all_proxy: ${DOCKER_BUILD_ALL_PROXY:-}
|
||||||
|
no_proxy: ${DOCKER_BUILD_NO_PROXY:-}
|
||||||
|
command: ["sh", "-c", "biliup-next worker --interval ${WORKER_INTERVAL:-5}"]
|
||||||
|
env_file:
|
||||||
|
- path: .env
|
||||||
|
required: false
|
||||||
|
environment:
|
||||||
|
TZ: ${TZ:-Asia/Shanghai}
|
||||||
|
BILIUP_NEXT__RUNTIME__DATABASE_PATH: /app/data/workspace/biliup_next.db
|
||||||
|
BILIUP_NEXT__PATHS__STAGE_DIR: /app/data/workspace/stage
|
||||||
|
BILIUP_NEXT__PATHS__BACKUP_DIR: /app/data/workspace/backup
|
||||||
|
BILIUP_NEXT__PATHS__SESSION_DIR: /app/data/workspace/session
|
||||||
|
BILIUP_NEXT__PATHS__COOKIES_FILE: /app/runtime/cookies.json
|
||||||
|
BILIUP_NEXT__PATHS__UPLOAD_CONFIG_FILE: /app/runtime/upload_config.json
|
||||||
|
BILIUP_NEXT__INGEST__YT_DLP_CMD: yt-dlp
|
||||||
|
BILIUP_NEXT__PUBLISH__BILIUP_PATH: /app/runtime/biliup
|
||||||
|
BILIUP_NEXT__PUBLISH__COOKIE_FILE: /app/runtime/cookies.json
|
||||||
|
volumes:
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./runtime:/app/runtime
|
||||||
|
- ./data/workspace:/app/data/workspace
|
||||||
|
- ./runtime/codex:/root/.codex
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
@ -155,6 +155,60 @@ User edits config
|
|||||||
- `base_delay_seconds`
|
- `base_delay_seconds`
|
||||||
- `poll_interval_seconds`
|
- `poll_interval_seconds`
|
||||||
|
|
||||||
|
## Upload And Comment Templates
|
||||||
|
|
||||||
|
`paths.upload_config_file` 指向 `runtime/upload_config.json`。这个文件不只控制 `biliup upload` 的标题、简介、动态和标签,也控制 B 站置顶评论格式。
|
||||||
|
|
||||||
|
投稿字段在 `template` 中:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template": {
|
||||||
|
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||||
|
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}",
|
||||||
|
"tag": "可爱,王海颖,唱歌,音乐",
|
||||||
|
"dynamic": "{streamer} {date} 歌曲纯享版已发布。\n直播完整版:{current_full_video_link}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
评论字段在 `comment_template` 中:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||||
|
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
可用变量:
|
||||||
|
|
||||||
|
- `streamer`:主播名。
|
||||||
|
- `date`:从文件名解析出来的日期和时间。
|
||||||
|
- `song_count`:识别到的歌曲数量。
|
||||||
|
- `songs_list`:`songs.txt` 原始歌单内容。
|
||||||
|
- `daily_quote` / `quote_author`:随机引用文本。
|
||||||
|
- `current_full_video_bvid` / `current_full_video_link`:本场直播完整版 BV 和链接。
|
||||||
|
- `current_pure_video_bvid` / `current_pure_video_link`:本场歌曲纯享版 BV 和链接。
|
||||||
|
- `previous_full_video_bvid` / `previous_full_video_link`:上一场直播完整版 BV 和链接。
|
||||||
|
- `previous_pure_video_bvid` / `previous_pure_video_link`:上一场歌曲纯享版 BV 和链接。
|
||||||
|
- `part_index`:评论中的 `P1/P2/P3` 分段序号。
|
||||||
|
- `song_index`:全局歌曲序号。
|
||||||
|
- `title` / `artist` / `artist_suffix`:从 `songs.json` 生成纯享歌单时使用。
|
||||||
|
- `song_text`:从 `songs.txt` 兜底生成纯享歌单时使用,通常不含时间戳。
|
||||||
|
- `line_text`:完整版时间轴的原始行,通常包含时间戳。
|
||||||
|
|
||||||
|
评论头部模板有一条额外规则:如果某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动跳过,避免发出空链接提示。
|
||||||
|
|
||||||
|
Docker 部署时 `./runtime` 是宿主机挂载目录。镜像更新不会覆盖已有 `runtime/upload_config.json`,因此调整文案或评论格式时应修改宿主机上的这个文件,然后重启容器。
|
||||||
|
|
||||||
### collection
|
### collection
|
||||||
|
|
||||||
- `enabled`
|
- `enabled`
|
||||||
|
|||||||
@ -75,7 +75,7 @@
|
|||||||
"platform": "bilibili",
|
"platform": "bilibili",
|
||||||
"aid": 123456,
|
"aid": 123456,
|
||||||
"bvid": "BV1xxxx",
|
"bvid": "BV1xxxx",
|
||||||
"title": "【王海颖 (歌曲纯享版)】_03月29日 22时02分 共18首歌",
|
"title": "【王海颖 (歌曲纯享版)】 03月29日 22时02分 共18首歌",
|
||||||
"published_at": "2026-03-30T07:56:13+08:00"
|
"published_at": "2026-03-30T07:56:13+08:00"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|||||||
321
docs/publish-output-examples.md
Normal file
321
docs/publish-output-examples.md
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
# 发布输出示例与流程说明
|
||||||
|
|
||||||
|
本文档面向使用者说明 `biliup-next` 的主流程、输入输出、当前已实现功能,以及一次多段同场直播发布后的示例文案。
|
||||||
|
|
||||||
|
## 项目功能
|
||||||
|
|
||||||
|
`biliup-next` 将一场直播录播拆成两个最终发布目标:
|
||||||
|
|
||||||
|
- 直播完整版:由外部流程或人工上传到 B 站,本项目负责记录/绑定它的 BV 号,并给它补充置顶时间轴评论、加入完整版合集。
|
||||||
|
- 歌曲纯享版:由本项目从直播录播中识别歌曲、切出歌曲片段、合并发布为一个分 P 视频,并给它补充置顶歌单评论、加入纯享版合集。
|
||||||
|
|
||||||
|
当前主链路:
|
||||||
|
|
||||||
|
```text
|
||||||
|
stage 输入视频
|
||||||
|
-> ingest 导入并归并 session
|
||||||
|
-> transcribe 语音转字幕
|
||||||
|
-> song_detect 识别歌曲
|
||||||
|
-> split 切出歌曲片段
|
||||||
|
-> publish 发布歌曲纯享版
|
||||||
|
-> comment 发布/置顶评论
|
||||||
|
-> collection 加入合集
|
||||||
|
```
|
||||||
|
|
||||||
|
## 输入
|
||||||
|
|
||||||
|
最常见输入是把录播视频放入 `data/workspace/stage/`。
|
||||||
|
|
||||||
|
支持的形式:
|
||||||
|
|
||||||
|
- 单个视频文件:一场直播只有一个录播文件。
|
||||||
|
- 多个视频文件:同一场直播被分成多段录播文件。
|
||||||
|
- 浏览器上传:通过控制台上传到 stage。
|
||||||
|
- 本机复制:通过控制台把服务器上的文件复制到 stage。
|
||||||
|
|
||||||
|
输入文件名会用于推测主播和直播开始时间,例如:
|
||||||
|
|
||||||
|
```text
|
||||||
|
王海颖唱歌录播 04月19日 22时10分.mp4
|
||||||
|
王海颖唱歌录播 04月19日 23时05分.mp4
|
||||||
|
王海颖唱歌录播 04月20日 00时01分.mp4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Session 归并
|
||||||
|
|
||||||
|
同一主播、时间接近的多个录播片段会归入同一个 session。
|
||||||
|
|
||||||
|
同一 session 的行为:
|
||||||
|
|
||||||
|
- 只发布一个歌曲纯享版 BV。
|
||||||
|
- 多段录播的歌曲会按时间顺序聚合。
|
||||||
|
- 评论按 `P1`、`P2`、`P3` 分段展示。
|
||||||
|
- 歌曲序号全局递增,不在每个 P 内重新从 1 开始。
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
P1:
|
||||||
|
1. 程艾影 — 赵雷
|
||||||
|
2. 钟无艳 — 谢安琪
|
||||||
|
|
||||||
|
P2:
|
||||||
|
3. 慢慢喜欢你 — 莫文蔚
|
||||||
|
|
||||||
|
P3:
|
||||||
|
4. 空白格 — 蔡健雅
|
||||||
|
```
|
||||||
|
|
||||||
|
## BV 获取
|
||||||
|
|
||||||
|
### 歌曲纯享版 BV
|
||||||
|
|
||||||
|
歌曲纯享版由本项目调用 `biliup upload` 发布。
|
||||||
|
|
||||||
|
发布成功后,项目会从 `biliup` 输出中提取 BV 号,并写入当前 session 目录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
bvid.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
这个 BV 会用于:
|
||||||
|
|
||||||
|
- 纯享版评论发布。
|
||||||
|
- 完整版评论顶部反向链接。
|
||||||
|
- 纯享版合集同步。
|
||||||
|
|
||||||
|
### 直播完整版 BV
|
||||||
|
|
||||||
|
完整版 BV 可以来自三种方式:
|
||||||
|
|
||||||
|
- 控制台手动绑定。
|
||||||
|
- API/webhook 传入。
|
||||||
|
- `biliup list` 标题匹配。
|
||||||
|
|
||||||
|
`biliup list` 会同时接受 `开放浏览` 和 `审核中` 状态。完整版视频只要上传后生成了 BV,即使仍在审核中,也可以被写入纯享版简介、动态和评论互链。
|
||||||
|
|
||||||
|
成功解析后会写入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
full_video_bvid.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
默认标题匹配是保守的精确匹配:会先去掉空格、标点、括号、冒号等,只保留中文、英文、数字,再比较标题是否相等。
|
||||||
|
|
||||||
|
如果 `allow_fuzzy_full_video_match=false`,不会做包含式模糊匹配。为了避免误匹配,推荐在完整版上传完成后手动绑定 BV。
|
||||||
|
|
||||||
|
## 示例场景
|
||||||
|
|
||||||
|
假设本次直播由三段录播组成:
|
||||||
|
|
||||||
|
```text
|
||||||
|
王海颖唱歌录播 04月19日 22时10分
|
||||||
|
王海颖唱歌录播 04月19日 23时05分
|
||||||
|
王海颖唱歌录播 04月20日 00时01分
|
||||||
|
```
|
||||||
|
|
||||||
|
假设 BV 绑定结果如下:
|
||||||
|
|
||||||
|
```text
|
||||||
|
本次直播完整版:BVFULLCURR
|
||||||
|
本次歌曲纯享版:BVPURECURR
|
||||||
|
上次直播完整版:BVFULLPREV
|
||||||
|
```
|
||||||
|
|
||||||
|
假设识别出的歌曲如下:
|
||||||
|
|
||||||
|
```text
|
||||||
|
P1:
|
||||||
|
00:06:32 程艾影 — 赵雷
|
||||||
|
00:14:45 钟无艳 — 谢安琪
|
||||||
|
|
||||||
|
P2:
|
||||||
|
00:20:57 慢慢喜欢你 — 莫文蔚
|
||||||
|
|
||||||
|
P3:
|
||||||
|
00:27:16 空白格 — 蔡健雅
|
||||||
|
```
|
||||||
|
|
||||||
|
## 歌曲纯享版标题
|
||||||
|
|
||||||
|
当前模板:
|
||||||
|
|
||||||
|
```text
|
||||||
|
【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌
|
||||||
|
```
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
【王海颖 (歌曲纯享版)】 04月19日 22时10分 共4首歌
|
||||||
|
```
|
||||||
|
|
||||||
|
## 歌曲纯享版简介
|
||||||
|
|
||||||
|
当前模板会保持简介较短,完整歌单放到置顶评论中,避免 B 站简介截断。
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
王海颖 04月19日 22时10分 歌曲纯享版。
|
||||||
|
|
||||||
|
完整歌单与时间轴见置顶评论。
|
||||||
|
直播完整版:https://www.bilibili.com/video/BVFULLCURR
|
||||||
|
上次直播:https://www.bilibili.com/video/BVFULLPREV
|
||||||
|
|
||||||
|
本视频为歌曲纯享切片,适合只听歌曲。
|
||||||
|
```
|
||||||
|
|
||||||
|
如果某个链接暂时没有 BV,项目会自动移除对应的空链接行。
|
||||||
|
|
||||||
|
## 歌曲纯享版动态
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
王海颖 04月19日 22时10分 歌曲纯享版已发布。完整歌单见置顶评论。
|
||||||
|
直播完整版:https://www.bilibili.com/video/BVFULLCURR
|
||||||
|
上次直播:https://www.bilibili.com/video/BVFULLPREV
|
||||||
|
```
|
||||||
|
|
||||||
|
## 歌曲纯享版置顶评论
|
||||||
|
|
||||||
|
纯享版评论主要给听歌用户看,不带歌曲时间轴,只展示歌名、歌手和互链。
|
||||||
|
|
||||||
|
默认由 `runtime/upload_config.json` 的 `comment_template.split_header`、`comment_template.split_part_header`、`comment_template.split_song_line` 生成。
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。
|
||||||
|
直播完整版:https://www.bilibili.com/video/BVFULLCURR (完整录播,含聊天/互动/完整流程)
|
||||||
|
上次纯享:https://www.bilibili.com/video/BVPUREPREV (上一场歌曲纯享版)
|
||||||
|
|
||||||
|
P1:
|
||||||
|
1. 程艾影 — 赵雷
|
||||||
|
2. 钟无艳 — 谢安琪
|
||||||
|
|
||||||
|
P2:
|
||||||
|
3. 慢慢喜欢你 — 莫文蔚
|
||||||
|
|
||||||
|
P3:
|
||||||
|
4. 空白格 — 蔡健雅
|
||||||
|
```
|
||||||
|
|
||||||
|
## 直播完整版置顶评论
|
||||||
|
|
||||||
|
完整版评论主要给看完整录播的用户跳转歌曲纯享版,并提供完整时间轴。
|
||||||
|
|
||||||
|
默认由 `runtime/upload_config.json` 的 `comment_template.full_header`、`comment_template.full_part_header`、`comment_template.full_timeline_line` 生成。
|
||||||
|
|
||||||
|
示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。
|
||||||
|
歌曲纯享版:https://www.bilibili.com/video/BVPURECURR (只听歌曲看这里)
|
||||||
|
上次完整版:https://www.bilibili.com/video/BVFULLPREV (上一场完整录播)
|
||||||
|
|
||||||
|
P1:
|
||||||
|
1. 00:06:32 程艾影 — 赵雷
|
||||||
|
2. 00:14:45 钟无艳 — 谢安琪
|
||||||
|
|
||||||
|
P2:
|
||||||
|
3. 00:20:57 慢慢喜欢你 — 莫文蔚
|
||||||
|
|
||||||
|
P3:
|
||||||
|
4. 00:27:16 空白格 — 蔡健雅
|
||||||
|
```
|
||||||
|
|
||||||
|
## 评论格式配置
|
||||||
|
|
||||||
|
评论格式可以像标题、简介、动态一样通过 `runtime/upload_config.json` 修改:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||||
|
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
字段含义:
|
||||||
|
|
||||||
|
- `split_header`:纯享版评论顶部说明。
|
||||||
|
- `full_header`:完整版评论顶部说明。
|
||||||
|
- `split_part_header` / `full_part_header`:多片段 session 的分段标题,例如 `P1:`。
|
||||||
|
- `split_song_line`:从 `songs.json` 生成纯享歌单时的单行格式。
|
||||||
|
- `split_text_song_line`:`songs.json` 不可用时,从 `songs.txt` 兜底生成纯享歌单的单行格式。
|
||||||
|
- `full_timeline_line`:完整版时间轴评论的单行格式。
|
||||||
|
|
||||||
|
常用变量:
|
||||||
|
|
||||||
|
- `{current_full_video_link}`:本场直播完整版链接。
|
||||||
|
- `{current_pure_video_link}`:本场歌曲纯享版链接。
|
||||||
|
- `{previous_full_video_link}`:上一场直播完整版链接。
|
||||||
|
- `{previous_pure_video_link}`:上一场歌曲纯享版链接。
|
||||||
|
- `{part_index}`:P 分段序号。
|
||||||
|
- `{song_index}`:歌曲全局序号。
|
||||||
|
- `{title}` / `{artist}` / `{artist_suffix}`:歌曲标题、歌手、带分隔符的歌手后缀。
|
||||||
|
- `{song_text}`:不带时间戳的歌曲文本。
|
||||||
|
- `{line_text}`:原始时间轴行,通常包含时间戳。
|
||||||
|
|
||||||
|
如果评论头部某一行包含空链接变量,例如 `{previous_full_video_link}` 为空,这一整行会自动省略。
|
||||||
|
|
||||||
|
## 合集同步
|
||||||
|
|
||||||
|
项目维护两个合集目标:
|
||||||
|
|
||||||
|
- 合集 A:直播完整版。
|
||||||
|
- 合集 B:歌曲纯享版。
|
||||||
|
|
||||||
|
当前配置中的示例 ID:
|
||||||
|
|
||||||
|
```text
|
||||||
|
直播完整版合集:7196643
|
||||||
|
歌曲纯享版合集:7196624
|
||||||
|
```
|
||||||
|
|
||||||
|
合集同步完成后,如果启用了清理策略,项目可以删除本地原视频或切片视频以节省空间。当前默认不删除。
|
||||||
|
|
||||||
|
## 幂等与重试
|
||||||
|
|
||||||
|
项目会在 session 目录写入标记文件,避免重复上传和重复评论。
|
||||||
|
|
||||||
|
常见标记:
|
||||||
|
|
||||||
|
```text
|
||||||
|
bvid.txt
|
||||||
|
full_video_bvid.txt
|
||||||
|
upload_done.flag
|
||||||
|
comment_split_done.flag
|
||||||
|
comment_full_done.flag
|
||||||
|
collection_a_done.flag
|
||||||
|
collection_b_done.flag
|
||||||
|
```
|
||||||
|
|
||||||
|
发布阶段的关键行为:
|
||||||
|
|
||||||
|
- 首批最多上传 5 个分 P。
|
||||||
|
- 超过 5 个分 P 时,后续通过 append 追加。
|
||||||
|
- 已经写入 `bvid.txt` 后,重试会优先 append 到已有视频,而不是重新发布。
|
||||||
|
- `publish_progress.json` 记录 append 进度,避免重试时重复追加已完成批次。
|
||||||
|
|
||||||
|
评论阶段的关键行为:
|
||||||
|
|
||||||
|
- 同一 session 只由最早片段负责聚合评论。
|
||||||
|
- 非 anchor 片段进入评论步骤时会跳过实际发评。
|
||||||
|
- 这样可以避免同一场直播的多个片段重复发布相同评论。
|
||||||
|
|
||||||
|
## 使用建议
|
||||||
|
|
||||||
|
发布前建议确认:
|
||||||
|
|
||||||
|
- stage 中的视频文件名能解析出主播和时间。
|
||||||
|
- `runtime/upload_config.json` 中标题、简介、动态符合预期。
|
||||||
|
- 完整版上传完成后,尽量手动绑定 `full_video_bvid`。
|
||||||
|
- worker 重启前确认已有 `bvid.txt` 和 `publish_progress.json` 是否符合当前发布进度。
|
||||||
|
- 如需自动匹配完整版 BV,确认 `biliup list` 中完整视频标题与任务标题标准化后相等。
|
||||||
@ -25,3 +25,11 @@ cd /home/theshy/biliup/biliup-next
|
|||||||
- `upload_config.json` <- `upload_config.example.json`
|
- `upload_config.json` <- `upload_config.example.json`
|
||||||
|
|
||||||
它们只用于占位,能保证项目进入“可配置、可 doctor”的状态,但不代表上传链路已经可用。
|
它们只用于占位,能保证项目进入“可配置、可 doctor”的状态,但不代表上传链路已经可用。
|
||||||
|
|
||||||
|
`upload_config.json` 同时控制:
|
||||||
|
|
||||||
|
- 纯享版投稿标题、简介、动态、标签:`template`
|
||||||
|
- 纯享版和完整版置顶评论格式:`comment_template`
|
||||||
|
- 文件名解析规则:`filename_patterns`
|
||||||
|
|
||||||
|
Docker 部署时这个目录通常会作为 `./runtime:/app/runtime` 挂载到容器内。镜像更新不会覆盖已有 `upload_config.json`,所以修改评论、动态、简介格式时,应直接改宿主机上的 `runtime/upload_config.json`。
|
||||||
|
|||||||
@ -1,5 +1,95 @@
|
|||||||
{
|
{
|
||||||
"line": "AUTO",
|
"comment": "B站投稿配置文件 - 根据您的需要修改模板内容",
|
||||||
"limit": 3,
|
"upload_settings": {
|
||||||
"threads": 3
|
"tid": 31,
|
||||||
|
"copyright": 1,
|
||||||
|
"source": "王海颖好听的歌声分享",
|
||||||
|
"cover": ""
|
||||||
|
},
|
||||||
|
"template": {
|
||||||
|
"title": "【{streamer} (歌曲纯享版)】 {date} 共{song_count}首歌",
|
||||||
|
"description": "{streamer} {date} 歌曲纯享版。\n\n完整歌单与时间轴见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}\n\n本视频为歌曲纯享切片,适合只听歌曲。",
|
||||||
|
"tag": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐",
|
||||||
|
"dynamic": "{streamer} {date} 歌曲纯享版已发布。完整歌单见置顶评论。\n直播完整版:{current_full_video_link}\n上次直播:{previous_full_video_link}"
|
||||||
|
},
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)",
|
||||||
|
"full_header": "当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n上次完整版:{previous_full_video_link} (上一场完整录播)",
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}"
|
||||||
|
},
|
||||||
|
"streamers": {
|
||||||
|
"王海颖": {
|
||||||
|
"display_name": "王海颖",
|
||||||
|
"tags": "可爱,聒噪的王海颖,王海颖,宸哥ovo,好听的歌声,吉他弹唱,纯享版,唱歌,音乐"
|
||||||
|
},
|
||||||
|
"示例主播": {
|
||||||
|
"display_name": "示例主播",
|
||||||
|
"tags": "示例,标签1,标签2,唱歌,音乐"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"quotes": [
|
||||||
|
{
|
||||||
|
"text": "此心安处是吾乡。",
|
||||||
|
"author": "苏轼《定风波·南海归赠王定国侍人寓娘》"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "山重水复疑无路,柳暗花明又一村。",
|
||||||
|
"author": "陆游《游山西村》"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "长风破浪会有时,直挂云帆济沧海。",
|
||||||
|
"author": "李白《行路难·其一》"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"filename_patterns": {
|
||||||
|
"comment": "从文件名提取信息的正则表达式模式 - 按优先级从高到低排列",
|
||||||
|
"patterns": [
|
||||||
|
{
|
||||||
|
"name": "主播名唱歌录播 日期 时间",
|
||||||
|
"regex": "^(?P<streamer>.+?)唱歌录播 (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分",
|
||||||
|
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||||
|
"example": "王海颖唱歌录播 01月28日 22时06分"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "日期 时间 主播名 唱歌录播",
|
||||||
|
"regex": "^(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 (?P<streamer>.+?)唱歌录播",
|
||||||
|
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||||
|
"example": "01月25日 09时20分 王海颖唱歌录播"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "主播名唱歌录播: 年月日 时分 [BV号]",
|
||||||
|
"regex": "^(?P<streamer>.+?)唱歌录播[::] (?P<year>\\d{4})年(?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})时(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
|
||||||
|
"date_format": "{month}月{day}日 {hour}时{minute}分",
|
||||||
|
"example": "王海颖唱歌录播: 2026年01月22日 22时09分 [BV1wEzcBqEhW]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "主播名 日期 时分 [BV号]",
|
||||||
|
"regex": "^(?P<streamer>.+?) (?P<month>\\d{2})月(?P<day>\\d{2})日 (?P<hour>\\d{2})点(?P<minute>\\d{2})分 \\[(?P<video_id>BV[A-Za-z0-9]+)\\]",
|
||||||
|
"date_format": "{month}月{day}日 {hour}点{minute}分",
|
||||||
|
"example": "王海颖 01月25日 02点24分 [BV1KCzQBpEXC]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "主播名_日期",
|
||||||
|
"regex": "^(?P<streamer>.+?)_(?P<date>\\d{1,2}月\\d{1,2}日)",
|
||||||
|
"date_format": "{date}",
|
||||||
|
"example": "王海颖_1月20日"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "主播名_完整日期",
|
||||||
|
"regex": "^(?P<streamer>.+?)_(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})",
|
||||||
|
"date_format": "{month}月{day}日",
|
||||||
|
"example": "王海颖_2026-01-20"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "主播名_描述",
|
||||||
|
"regex": "^(?P<streamer>.+?)_(?P<desc>.+)",
|
||||||
|
"date_format": "{desc}",
|
||||||
|
"example": "测试搬运_前15分钟"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
28
scripts/init-docker-config.sh
Normal file
28
scripts/init-docker-config.sh
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
mkdir -p config runtime/codex data/workspace/stage data/workspace/session data/workspace/backup
|
||||||
|
|
||||||
|
if [ ! -f .env ]; then
|
||||||
|
cp .env.example .env
|
||||||
|
echo "created .env from .env.example"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f config/settings.json ]; then
|
||||||
|
cp config/settings.docker.example.json config/settings.json
|
||||||
|
echo "created config/settings.json from config/settings.docker.example.json"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f runtime/cookies.json ]; then
|
||||||
|
cp runtime/cookies.example.json runtime/cookies.json
|
||||||
|
echo "created runtime/cookies.json placeholder"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f runtime/upload_config.json ]; then
|
||||||
|
cp runtime/upload_config.example.json runtime/upload_config.json
|
||||||
|
echo "created runtime/upload_config.json placeholder"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -x runtime/biliup ]; then
|
||||||
|
echo "warning: runtime/biliup is missing or not executable; publish will fail until you provide it" >&2
|
||||||
|
fi
|
||||||
@ -4,3 +4,4 @@ Version: 0.1.0
|
|||||||
Summary: Next-generation control-plane-first biliup pipeline
|
Summary: Next-generation control-plane-first biliup pipeline
|
||||||
Requires-Python: >=3.11
|
Requires-Python: >=3.11
|
||||||
Requires-Dist: requests>=2.32.0
|
Requires-Dist: requests>=2.32.0
|
||||||
|
Requires-Dist: groq>=0.18.0
|
||||||
|
|||||||
@ -10,7 +10,19 @@ src/biliup_next.egg-info/top_level.txt
|
|||||||
src/biliup_next/app/api_server.py
|
src/biliup_next/app/api_server.py
|
||||||
src/biliup_next/app/bootstrap.py
|
src/biliup_next/app/bootstrap.py
|
||||||
src/biliup_next/app/cli.py
|
src/biliup_next/app/cli.py
|
||||||
|
src/biliup_next/app/control_plane_get_dispatcher.py
|
||||||
|
src/biliup_next/app/control_plane_post_dispatcher.py
|
||||||
src/biliup_next/app/dashboard.py
|
src/biliup_next/app/dashboard.py
|
||||||
|
src/biliup_next/app/retry_meta.py
|
||||||
|
src/biliup_next/app/scheduler.py
|
||||||
|
src/biliup_next/app/serializers.py
|
||||||
|
src/biliup_next/app/session_delivery_service.py
|
||||||
|
src/biliup_next/app/task_actions.py
|
||||||
|
src/biliup_next/app/task_audit.py
|
||||||
|
src/biliup_next/app/task_control_service.py
|
||||||
|
src/biliup_next/app/task_engine.py
|
||||||
|
src/biliup_next/app/task_policies.py
|
||||||
|
src/biliup_next/app/task_runner.py
|
||||||
src/biliup_next/app/worker.py
|
src/biliup_next/app/worker.py
|
||||||
src/biliup_next/core/config.py
|
src/biliup_next/core/config.py
|
||||||
src/biliup_next/core/errors.py
|
src/biliup_next/core/errors.py
|
||||||
@ -18,25 +30,56 @@ src/biliup_next/core/models.py
|
|||||||
src/biliup_next/core/providers.py
|
src/biliup_next/core/providers.py
|
||||||
src/biliup_next/core/registry.py
|
src/biliup_next/core/registry.py
|
||||||
src/biliup_next/infra/db.py
|
src/biliup_next/infra/db.py
|
||||||
|
src/biliup_next/infra/legacy_asset_sync.py
|
||||||
src/biliup_next/infra/log_reader.py
|
src/biliup_next/infra/log_reader.py
|
||||||
src/biliup_next/infra/plugin_loader.py
|
src/biliup_next/infra/plugin_loader.py
|
||||||
src/biliup_next/infra/runtime_doctor.py
|
src/biliup_next/infra/runtime_doctor.py
|
||||||
src/biliup_next/infra/stage_importer.py
|
src/biliup_next/infra/stage_importer.py
|
||||||
|
src/biliup_next/infra/storage_guard.py
|
||||||
src/biliup_next/infra/systemd_runtime.py
|
src/biliup_next/infra/systemd_runtime.py
|
||||||
src/biliup_next/infra/task_repository.py
|
src/biliup_next/infra/task_repository.py
|
||||||
src/biliup_next/infra/task_reset.py
|
src/biliup_next/infra/task_reset.py
|
||||||
|
src/biliup_next/infra/workspace_cleanup.py
|
||||||
|
src/biliup_next/infra/workspace_paths.py
|
||||||
|
src/biliup_next/infra/adapters/bilibili_api.py
|
||||||
|
src/biliup_next/infra/adapters/biliup_cli.py
|
||||||
|
src/biliup_next/infra/adapters/codex_cli.py
|
||||||
src/biliup_next/infra/adapters/full_video_locator.py
|
src/biliup_next/infra/adapters/full_video_locator.py
|
||||||
|
src/biliup_next/infra/adapters/qwen_cli.py
|
||||||
|
src/biliup_next/infra/adapters/yt_dlp.py
|
||||||
src/biliup_next/modules/collection/service.py
|
src/biliup_next/modules/collection/service.py
|
||||||
src/biliup_next/modules/collection/providers/bilibili_collection.py
|
src/biliup_next/modules/collection/providers/bilibili_collection.py
|
||||||
src/biliup_next/modules/comment/service.py
|
src/biliup_next/modules/comment/service.py
|
||||||
src/biliup_next/modules/comment/providers/bilibili_top_comment.py
|
src/biliup_next/modules/comment/providers/bilibili_top_comment.py
|
||||||
src/biliup_next/modules/ingest/service.py
|
src/biliup_next/modules/ingest/service.py
|
||||||
|
src/biliup_next/modules/ingest/providers/bilibili_url.py
|
||||||
src/biliup_next/modules/ingest/providers/local_file.py
|
src/biliup_next/modules/ingest/providers/local_file.py
|
||||||
src/biliup_next/modules/publish/service.py
|
src/biliup_next/modules/publish/service.py
|
||||||
src/biliup_next/modules/publish/providers/biliup_cli.py
|
src/biliup_next/modules/publish/providers/biliup_cli.py
|
||||||
src/biliup_next/modules/song_detect/service.py
|
src/biliup_next/modules/song_detect/service.py
|
||||||
src/biliup_next/modules/song_detect/providers/codex.py
|
src/biliup_next/modules/song_detect/providers/codex.py
|
||||||
|
src/biliup_next/modules/song_detect/providers/common.py
|
||||||
|
src/biliup_next/modules/song_detect/providers/qwen_cli.py
|
||||||
src/biliup_next/modules/split/service.py
|
src/biliup_next/modules/split/service.py
|
||||||
src/biliup_next/modules/split/providers/ffmpeg_copy.py
|
src/biliup_next/modules/split/providers/ffmpeg_copy.py
|
||||||
src/biliup_next/modules/transcribe/service.py
|
src/biliup_next/modules/transcribe/service.py
|
||||||
src/biliup_next/modules/transcribe/providers/groq.py
|
src/biliup_next/modules/transcribe/providers/groq.py
|
||||||
|
tests/test_api_server.py
|
||||||
|
tests/test_bilibili_top_comment_provider.py
|
||||||
|
tests/test_biliup_cli_publish_provider.py
|
||||||
|
tests/test_control_plane_get_dispatcher.py
|
||||||
|
tests/test_control_plane_post_dispatcher.py
|
||||||
|
tests/test_ingest_bilibili_url.py
|
||||||
|
tests/test_ingest_session_grouping.py
|
||||||
|
tests/test_publish_service.py
|
||||||
|
tests/test_retry_meta.py
|
||||||
|
tests/test_serializers.py
|
||||||
|
tests/test_session_delivery_service.py
|
||||||
|
tests/test_settings_service.py
|
||||||
|
tests/test_song_detect_providers.py
|
||||||
|
tests/test_task_actions.py
|
||||||
|
tests/test_task_control_service.py
|
||||||
|
tests/test_task_engine.py
|
||||||
|
tests/test_task_policies.py
|
||||||
|
tests/test_task_repository_sqlite.py
|
||||||
|
tests/test_task_runner.py
|
||||||
@ -1 +1,2 @@
|
|||||||
requests>=2.32.0
|
requests>=2.32.0
|
||||||
|
groq>=0.18.0
|
||||||
|
|||||||
@ -3,6 +3,8 @@ from __future__ import annotations
|
|||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
STEP_SETTINGS_GROUP = {
|
STEP_SETTINGS_GROUP = {
|
||||||
|
"transcribe": "transcribe",
|
||||||
|
"song_detect": "song_detect",
|
||||||
"publish": "publish",
|
"publish": "publish",
|
||||||
"comment": "comment",
|
"comment": "comment",
|
||||||
}
|
}
|
||||||
@ -54,6 +56,26 @@ def publish_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||||
|
return retry_schedule_seconds(
|
||||||
|
settings,
|
||||||
|
count_key="retry_count",
|
||||||
|
backoff_key="retry_backoff_seconds",
|
||||||
|
default_count=3,
|
||||||
|
default_backoff=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def song_detect_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||||
|
return retry_schedule_seconds(
|
||||||
|
settings,
|
||||||
|
count_key="retry_count",
|
||||||
|
backoff_key="retry_backoff_seconds",
|
||||||
|
default_count=3,
|
||||||
|
default_backoff=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def comment_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
def comment_retry_schedule_seconds(settings: dict[str, object]) -> list[int]:
|
||||||
return retry_schedule_seconds(
|
return retry_schedule_seconds(
|
||||||
settings,
|
settings,
|
||||||
@ -77,7 +99,11 @@ def retry_meta_for_step(step, settings_by_group: dict[str, object]) -> dict[str,
|
|||||||
if not isinstance(group_settings, dict):
|
if not isinstance(group_settings, dict):
|
||||||
group_settings = {}
|
group_settings = {}
|
||||||
|
|
||||||
if step_name == "publish":
|
if step_name == "transcribe":
|
||||||
|
schedule = transcribe_retry_schedule_seconds(group_settings)
|
||||||
|
elif step_name == "song_detect":
|
||||||
|
schedule = song_detect_retry_schedule_seconds(group_settings)
|
||||||
|
elif step_name == "publish":
|
||||||
schedule = publish_retry_schedule_seconds(group_settings)
|
schedule = publish_retry_schedule_seconds(group_settings)
|
||||||
elif step_name == "comment":
|
elif step_name == "comment":
|
||||||
schedule = comment_retry_schedule_seconds(group_settings)
|
schedule = comment_retry_schedule_seconds(group_settings)
|
||||||
|
|||||||
@ -52,7 +52,16 @@ def infer_error_step_name(task, steps: dict[str, object]) -> str: # type: ignor
|
|||||||
def retry_wait_payload(task_id: str, step, state: dict[str, object]) -> dict[str, object] | None: # type: ignore[no-untyped-def]
|
def retry_wait_payload(task_id: str, step, state: dict[str, object]) -> dict[str, object] | None: # type: ignore[no-untyped-def]
|
||||||
if step.status != "failed_retryable":
|
if step.status != "failed_retryable":
|
||||||
return None
|
return None
|
||||||
meta = retry_meta_for_step(step, {"publish": settings_for(state, "publish")})
|
step_settings_group = {
|
||||||
|
"transcribe": "transcribe",
|
||||||
|
"song_detect": "song_detect",
|
||||||
|
"publish": "publish",
|
||||||
|
"comment": "comment",
|
||||||
|
}.get(step.step_name)
|
||||||
|
settings_by_group = {}
|
||||||
|
if step_settings_group is not None and step_settings_group in state["settings"]:
|
||||||
|
settings_by_group[step_settings_group] = settings_for(state, step_settings_group)
|
||||||
|
meta = retry_meta_for_step(step, settings_by_group)
|
||||||
if meta is None or meta["retry_due"]:
|
if meta is None or meta["retry_due"]:
|
||||||
return None
|
return None
|
||||||
return {
|
return {
|
||||||
|
|||||||
@ -2,6 +2,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from biliup_next.app.retry_meta import comment_retry_schedule_seconds
|
from biliup_next.app.retry_meta import comment_retry_schedule_seconds
|
||||||
from biliup_next.app.retry_meta import publish_retry_schedule_seconds
|
from biliup_next.app.retry_meta import publish_retry_schedule_seconds
|
||||||
|
from biliup_next.app.retry_meta import song_detect_retry_schedule_seconds
|
||||||
|
from biliup_next.app.retry_meta import transcribe_retry_schedule_seconds
|
||||||
from biliup_next.app.task_engine import infer_error_step_name, settings_for as task_engine_settings_for
|
from biliup_next.app.task_engine import infer_error_step_name, settings_for as task_engine_settings_for
|
||||||
from biliup_next.core.models import utc_now_iso
|
from biliup_next.core.models import utc_now_iso
|
||||||
|
|
||||||
@ -35,6 +37,18 @@ def resolve_failure(task, repo, state: dict[str, object], exc) -> dict[str, obje
|
|||||||
next_retry_count = current_retry + 1
|
next_retry_count = current_retry + 1
|
||||||
next_status = "failed_retryable" if exc.retryable else "failed_manual"
|
next_status = "failed_retryable" if exc.retryable else "failed_manual"
|
||||||
next_retry_delay_seconds: int | None = None
|
next_retry_delay_seconds: int | None = None
|
||||||
|
if exc.retryable and step_name == "transcribe":
|
||||||
|
schedule = transcribe_retry_schedule_seconds(settings_for(state, "transcribe"))
|
||||||
|
if next_retry_count > len(schedule):
|
||||||
|
next_status = "failed_manual"
|
||||||
|
else:
|
||||||
|
next_retry_delay_seconds = schedule[next_retry_count - 1]
|
||||||
|
if exc.retryable and step_name == "song_detect":
|
||||||
|
schedule = song_detect_retry_schedule_seconds(settings_for(state, "song_detect"))
|
||||||
|
if next_retry_count > len(schedule):
|
||||||
|
next_status = "failed_manual"
|
||||||
|
else:
|
||||||
|
next_retry_delay_seconds = schedule[next_retry_count - 1]
|
||||||
if exc.retryable and step_name == "publish":
|
if exc.retryable and step_name == "publish":
|
||||||
publish_settings = settings_for(state, "publish")
|
publish_settings = settings_for(state, "publish")
|
||||||
if exc.code == "PUBLISH_RATE_LIMITED":
|
if exc.code == "PUBLISH_RATE_LIMITED":
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -32,6 +33,7 @@ class SettingsService:
|
|||||||
schema = self._read_json(self.schema_path)
|
schema = self._read_json(self.schema_path)
|
||||||
settings = self._read_json(self.settings_path)
|
settings = self._read_json(self.settings_path)
|
||||||
settings = self._apply_schema_defaults(settings, schema)
|
settings = self._apply_schema_defaults(settings, schema)
|
||||||
|
settings = self._apply_env_overrides(settings, schema)
|
||||||
settings = self._normalize_paths(settings)
|
settings = self._normalize_paths(settings)
|
||||||
self.validate(settings, schema)
|
self.validate(settings, schema)
|
||||||
return SettingsBundle(schema=schema, settings=settings)
|
return SettingsBundle(schema=schema, settings=settings)
|
||||||
@ -125,6 +127,57 @@ class SettingsService:
|
|||||||
group_value[field_name] = self._clone_default(field_schema["default"])
|
group_value[field_name] = self._clone_default(field_schema["default"])
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
def _apply_env_overrides(self, settings: dict[str, Any], schema: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
merged = json.loads(json.dumps(settings))
|
||||||
|
aliases = {
|
||||||
|
("transcribe", "groq_api_key"): ["GROQ_API_KEY"],
|
||||||
|
("transcribe", "groq_api_keys"): ["GROQ_API_KEYS"],
|
||||||
|
("collection", "season_id_a"): ["COLLECTION_SEASON_ID_A"],
|
||||||
|
("collection", "season_id_b"): ["COLLECTION_SEASON_ID_B"],
|
||||||
|
}
|
||||||
|
for group_name, fields in schema.get("groups", {}).items():
|
||||||
|
group_value = merged.setdefault(group_name, {})
|
||||||
|
if not isinstance(group_value, dict):
|
||||||
|
continue
|
||||||
|
for field_name, field_schema in fields.items():
|
||||||
|
env_names = [
|
||||||
|
f"BILIUP_NEXT__{group_name}__{field_name}".upper(),
|
||||||
|
f"BILIUP_NEXT_{group_name}_{field_name}".upper(),
|
||||||
|
*aliases.get((group_name, field_name), []),
|
||||||
|
]
|
||||||
|
raw_value = self._first_env_value(env_names)
|
||||||
|
if raw_value is None:
|
||||||
|
continue
|
||||||
|
group_value[field_name] = self._parse_env_value(raw_value, field_schema)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _first_env_value(names: list[str]) -> str | None:
|
||||||
|
for name in names:
|
||||||
|
value = os.environ.get(name)
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_env_value(value: str, field_schema: dict[str, Any]) -> Any:
|
||||||
|
expected = field_schema.get("type")
|
||||||
|
if expected == "integer":
|
||||||
|
return int(value)
|
||||||
|
if expected == "boolean":
|
||||||
|
normalized = value.strip().lower()
|
||||||
|
if normalized in {"1", "true", "yes", "on"}:
|
||||||
|
return True
|
||||||
|
if normalized in {"0", "false", "no", "off"}:
|
||||||
|
return False
|
||||||
|
raise ConfigError(f"无法解析布尔环境变量值: {value}")
|
||||||
|
if expected == "array":
|
||||||
|
stripped = value.strip()
|
||||||
|
if stripped.startswith("["):
|
||||||
|
return json.loads(stripped)
|
||||||
|
return [item.strip() for item in value.split(",") if item.strip()]
|
||||||
|
return value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clone_default(value: Any) -> Any:
|
def _clone_default(value: Any) -> Any:
|
||||||
return json.loads(json.dumps(value))
|
return json.loads(json.dumps(value))
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -18,9 +19,7 @@ class CodexCliAdapter:
|
|||||||
codex_cmd,
|
codex_cmd,
|
||||||
"exec",
|
"exec",
|
||||||
prompt.replace("\n", " "),
|
prompt.replace("\n", " "),
|
||||||
"--full-auto",
|
"--dangerously-bypass-approvals-and-sandbox",
|
||||||
"--sandbox",
|
|
||||||
"workspace-write",
|
|
||||||
"--output-schema",
|
"--output-schema",
|
||||||
"./song_schema.json",
|
"./song_schema.json",
|
||||||
"-o",
|
"-o",
|
||||||
@ -35,6 +34,7 @@ class CodexCliAdapter:
|
|||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
check=False,
|
check=False,
|
||||||
|
env=self._subprocess_env(),
|
||||||
)
|
)
|
||||||
except FileNotFoundError as exc:
|
except FileNotFoundError as exc:
|
||||||
raise ModuleError(
|
raise ModuleError(
|
||||||
@ -42,3 +42,12 @@ class CodexCliAdapter:
|
|||||||
message=f"找不到 codex 命令: {codex_cmd}",
|
message=f"找不到 codex 命令: {codex_cmd}",
|
||||||
retryable=False,
|
retryable=False,
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _subprocess_env() -> dict[str, str]:
|
||||||
|
env = os.environ.copy()
|
||||||
|
for key in ("HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"):
|
||||||
|
value = env.get(key)
|
||||||
|
if value and "://" not in value:
|
||||||
|
env[key] = f"http://{value}"
|
||||||
|
return env
|
||||||
|
|||||||
@ -8,6 +8,9 @@ from typing import Any
|
|||||||
from biliup_next.core.errors import ModuleError
|
from biliup_next.core.errors import ModuleError
|
||||||
|
|
||||||
|
|
||||||
|
VISIBLE_BILIUP_LIST_STATES = {"开放浏览", "审核中"}
|
||||||
|
|
||||||
|
|
||||||
def normalize_title(text: str) -> str:
|
def normalize_title(text: str) -> str:
|
||||||
return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()
|
return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9]", "", text).lower()
|
||||||
|
|
||||||
@ -38,7 +41,7 @@ def fetch_biliup_list(settings: dict[str, Any], *, max_pages: int = 5) -> list[d
|
|||||||
if not line.startswith("BV"):
|
if not line.startswith("BV"):
|
||||||
continue
|
continue
|
||||||
parts = line.split("\t")
|
parts = line.split("\t")
|
||||||
if len(parts) >= 3 and "开放浏览" not in parts[2]:
|
if len(parts) >= 3 and not any(state in parts[2] for state in VISIBLE_BILIUP_LIST_STATES):
|
||||||
continue
|
continue
|
||||||
if len(parts) >= 2:
|
if len(parts) >= 2:
|
||||||
videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
|
videos.append({"bvid": parts[0].strip(), "title": parts[1].strip()})
|
||||||
|
|||||||
@ -115,7 +115,6 @@ class TaskResetService:
|
|||||||
work_dir / "comment_full_done.flag",
|
work_dir / "comment_full_done.flag",
|
||||||
work_dir / "collection_a_done.flag",
|
work_dir / "collection_a_done.flag",
|
||||||
work_dir / "collection_b_done.flag",
|
work_dir / "collection_b_done.flag",
|
||||||
work_dir / "bvid.txt",
|
|
||||||
],
|
],
|
||||||
"comment": [
|
"comment": [
|
||||||
work_dir / "comment_done.flag",
|
work_dir / "comment_done.flag",
|
||||||
|
|||||||
199
src/biliup_next/infra/video_links.py
Normal file
199
src/biliup_next/infra/video_links.py
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list, resolve_full_video_bvid
|
||||||
|
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||||
|
|
||||||
|
|
||||||
|
def bilibili_video_url(bvid: str | None) -> str:
|
||||||
|
bvid = (bvid or "").strip()
|
||||||
|
return f"https://www.bilibili.com/video/{bvid}" if bvid.startswith("BV") else ""
|
||||||
|
|
||||||
|
|
||||||
|
def read_task_split_bvid(task: Any) -> str:
|
||||||
|
path = resolve_task_work_dir(task) / "bvid.txt"
|
||||||
|
return _read_bvid(path)
|
||||||
|
|
||||||
|
|
||||||
|
def read_task_full_bvid(task: Any, context: Any | None = None) -> str:
|
||||||
|
if context is not None and getattr(context, "full_video_bvid", None):
|
||||||
|
return str(context.full_video_bvid).strip()
|
||||||
|
path = resolve_task_work_dir(task) / "full_video_bvid.txt"
|
||||||
|
return _read_bvid(path)
|
||||||
|
|
||||||
|
|
||||||
|
def link_context_for_task(task: Any, repo: Any | None, settings: dict[str, Any] | None = None) -> dict[str, str]:
|
||||||
|
context = _get_context(repo, task.id)
|
||||||
|
full_bvid = read_task_full_bvid(task, context)
|
||||||
|
if not full_bvid:
|
||||||
|
full_bvid = resolve_current_full_video_bvid(task, settings)
|
||||||
|
split_bvid = read_task_split_bvid(task)
|
||||||
|
previous = previous_live_links(task, repo, context, settings)
|
||||||
|
return {
|
||||||
|
"current_full_video_bvid": full_bvid,
|
||||||
|
"current_full_video_link": bilibili_video_url(full_bvid),
|
||||||
|
"current_pure_video_bvid": split_bvid,
|
||||||
|
"current_pure_video_link": bilibili_video_url(split_bvid),
|
||||||
|
"previous_full_video_bvid": previous.get("previous_full_video_bvid", ""),
|
||||||
|
"previous_full_video_link": previous.get("previous_full_video_link", ""),
|
||||||
|
"previous_pure_video_bvid": previous.get("previous_pure_video_bvid", ""),
|
||||||
|
"previous_pure_video_link": previous.get("previous_pure_video_link", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_current_full_video_bvid(task: Any, settings: dict[str, Any] | None = None) -> str:
|
||||||
|
if not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
return resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings) or ""
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def previous_live_links(
|
||||||
|
task: Any,
|
||||||
|
repo: Any | None,
|
||||||
|
context: Any | None = None,
|
||||||
|
settings: dict[str, Any] | None = None,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
context = context or _get_context(repo, task.id)
|
||||||
|
streamer = _context_streamer(context) or _parse_streamer_from_title(task.title)
|
||||||
|
if not streamer:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
current_started = _parse_datetime(getattr(context, "segment_started_at", None)) if context is not None else None
|
||||||
|
if current_started is None:
|
||||||
|
current_started = _parse_title_datetime(task.title)
|
||||||
|
current_session_key = getattr(context, "session_key", None) if context is not None else None
|
||||||
|
|
||||||
|
previous: dict[str, str] = {}
|
||||||
|
if repo is not None and hasattr(repo, "find_recent_task_contexts") and hasattr(repo, "get_task"):
|
||||||
|
for candidate in repo.find_recent_task_contexts(streamer, limit=50):
|
||||||
|
if candidate.task_id == task.id:
|
||||||
|
continue
|
||||||
|
if current_session_key and getattr(candidate, "session_key", None) == current_session_key:
|
||||||
|
continue
|
||||||
|
candidate_started = _parse_datetime(getattr(candidate, "segment_started_at", None))
|
||||||
|
if current_started is not None and candidate_started is not None and candidate_started >= current_started:
|
||||||
|
continue
|
||||||
|
candidate_task = repo.get_task(candidate.task_id)
|
||||||
|
if candidate_task is None:
|
||||||
|
continue
|
||||||
|
full_bvid = read_task_full_bvid(candidate_task, candidate)
|
||||||
|
split_bvid = read_task_split_bvid(candidate_task)
|
||||||
|
if full_bvid or split_bvid:
|
||||||
|
previous = {
|
||||||
|
"previous_full_video_bvid": full_bvid,
|
||||||
|
"previous_full_video_link": bilibili_video_url(full_bvid),
|
||||||
|
"previous_pure_video_bvid": split_bvid,
|
||||||
|
"previous_pure_video_link": bilibili_video_url(split_bvid),
|
||||||
|
}
|
||||||
|
break
|
||||||
|
if not previous.get("previous_full_video_bvid") or not previous.get("previous_pure_video_bvid"):
|
||||||
|
listed_previous = _previous_live_from_biliup_list(streamer, current_started, settings)
|
||||||
|
for key, value in listed_previous.items():
|
||||||
|
if value and not previous.get(key):
|
||||||
|
previous[key] = value
|
||||||
|
return previous
|
||||||
|
|
||||||
|
|
||||||
|
def _get_context(repo: Any | None, task_id: str) -> Any | None:
|
||||||
|
if repo is None or not hasattr(repo, "get_task_context"):
|
||||||
|
return None
|
||||||
|
return repo.get_task_context(task_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _context_streamer(context: Any | None) -> str:
|
||||||
|
if context is None:
|
||||||
|
return ""
|
||||||
|
return str(getattr(context, "streamer", "") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _read_bvid(path: Path) -> str:
|
||||||
|
if not path.exists():
|
||||||
|
return ""
|
||||||
|
bvid = path.read_text(encoding="utf-8").strip()
|
||||||
|
return bvid if bvid.startswith("BV") else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_datetime(value: str | None) -> datetime | None:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(value)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_title_datetime(title: str) -> datetime | None:
|
||||||
|
patterns = (
|
||||||
|
r"(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
|
||||||
|
r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日\s+(?P<hour>\d{1,2})[时点](?P<minute>\d{1,2})分",
|
||||||
|
)
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, title)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
data = match.groupdict()
|
||||||
|
year = int(data.get("year") or datetime.now().year)
|
||||||
|
try:
|
||||||
|
return datetime(year, int(data["month"]), int(data["day"]), int(data["hour"]), int(data["minute"]))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_streamer_from_title(title: str) -> str:
|
||||||
|
marker = "唱歌录播"
|
||||||
|
if marker in title:
|
||||||
|
return title.split(marker, 1)[0].strip()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _previous_live_from_biliup_list(
|
||||||
|
streamer: str,
|
||||||
|
current_started: datetime | None,
|
||||||
|
settings: dict[str, Any] | None,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
if current_started is None or not settings or not settings.get("biliup_path") or not settings.get("cookie_file"):
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
videos = fetch_biliup_list(settings)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
current_compare = current_started.replace(tzinfo=None)
|
||||||
|
full_candidates: list[tuple[datetime, str]] = []
|
||||||
|
pure_candidates: list[tuple[datetime, str]] = []
|
||||||
|
for video in videos:
|
||||||
|
title = video.get("title", "")
|
||||||
|
bvid = video.get("bvid", "")
|
||||||
|
if not bvid.startswith("BV"):
|
||||||
|
continue
|
||||||
|
if streamer not in title:
|
||||||
|
continue
|
||||||
|
started = _parse_title_datetime(title)
|
||||||
|
if started is not None and started > current_compare and "年" not in title:
|
||||||
|
started = started.replace(year=started.year - 1)
|
||||||
|
if started is None or started >= current_compare:
|
||||||
|
continue
|
||||||
|
if "纯享" in title:
|
||||||
|
pure_candidates.append((started, bvid))
|
||||||
|
elif "唱歌录播" in title:
|
||||||
|
full_candidates.append((started, bvid))
|
||||||
|
|
||||||
|
if not full_candidates and not pure_candidates:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
full_bvid = max(full_candidates, key=lambda item: item[0])[1] if full_candidates else ""
|
||||||
|
pure_bvid = max(pure_candidates, key=lambda item: item[0])[1] if pure_candidates else ""
|
||||||
|
return {
|
||||||
|
"previous_full_video_bvid": full_bvid,
|
||||||
|
"previous_full_video_link": bilibili_video_url(full_bvid),
|
||||||
|
"previous_pure_video_bvid": pure_bvid,
|
||||||
|
"previous_pure_video_link": bilibili_video_url(pure_bvid),
|
||||||
|
}
|
||||||
@ -1,6 +1,8 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from biliup_next.infra.task_repository import TaskRepository
|
from biliup_next.infra.task_repository import TaskRepository
|
||||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||||
@ -11,35 +13,59 @@ class WorkspaceCleanupService:
|
|||||||
self.repo = repo
|
self.repo = repo
|
||||||
|
|
||||||
def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
|
def cleanup_task_outputs(self, task_id: str, settings: dict[str, object]) -> dict[str, object]:
|
||||||
task = self.repo.get_task(task_id)
|
cleanup_tasks = self._cleanup_tasks(task_id)
|
||||||
if task is None:
|
if not cleanup_tasks:
|
||||||
raise RuntimeError(f"task not found: {task_id}")
|
raise RuntimeError(f"task not found: {task_id}")
|
||||||
|
|
||||||
session_dir = resolve_task_work_dir(task)
|
|
||||||
removed: list[str] = []
|
removed: list[str] = []
|
||||||
skipped: list[str] = []
|
skipped: list[str] = []
|
||||||
|
cleaned_task_ids: list[str] = []
|
||||||
|
|
||||||
if settings.get("delete_source_video_after_collection_synced", False):
|
for task in cleanup_tasks:
|
||||||
source_path = Path(task.source_path).resolve()
|
session_dir = resolve_task_work_dir(task)
|
||||||
try:
|
cleaned_task_ids.append(task.id)
|
||||||
source_path.relative_to(session_dir)
|
|
||||||
source_managed = True
|
|
||||||
except ValueError:
|
|
||||||
source_managed = False
|
|
||||||
if source_path.exists() and source_managed:
|
|
||||||
source_path.unlink()
|
|
||||||
self.repo.delete_artifact_by_path(task_id, str(source_path.resolve()))
|
|
||||||
removed.append(str(source_path))
|
|
||||||
else:
|
|
||||||
skipped.append(str(source_path))
|
|
||||||
|
|
||||||
if settings.get("delete_split_videos_after_collection_synced", False):
|
if settings.get("delete_source_video_after_collection_synced", False):
|
||||||
split_dir = session_dir / "split_video"
|
source_path = Path(task.source_path).resolve()
|
||||||
if split_dir.exists():
|
try:
|
||||||
shutil.rmtree(split_dir, ignore_errors=True)
|
source_path.relative_to(session_dir)
|
||||||
self.repo.delete_artifacts(task_id, "clip_video")
|
source_managed = True
|
||||||
removed.append(str(split_dir))
|
except ValueError:
|
||||||
else:
|
source_managed = False
|
||||||
skipped.append(str(split_dir))
|
if source_path.exists() and source_managed:
|
||||||
|
source_path.unlink()
|
||||||
|
self.repo.delete_artifact_by_path(task.id, str(source_path.resolve()))
|
||||||
|
removed.append(str(source_path))
|
||||||
|
else:
|
||||||
|
skipped.append(str(source_path))
|
||||||
|
|
||||||
return {"removed": removed, "skipped": skipped}
|
if settings.get("delete_split_videos_after_collection_synced", False):
|
||||||
|
for video_dir_name in ("split_video", "publish_video"):
|
||||||
|
video_dir = session_dir / video_dir_name
|
||||||
|
if video_dir.exists():
|
||||||
|
shutil.rmtree(video_dir, ignore_errors=True)
|
||||||
|
removed.append(str(video_dir))
|
||||||
|
else:
|
||||||
|
skipped.append(str(video_dir))
|
||||||
|
self.repo.delete_artifacts(task.id, "clip_video")
|
||||||
|
|
||||||
|
return {"removed": removed, "skipped": skipped, "task_ids": cleaned_task_ids}
|
||||||
|
|
||||||
|
def _cleanup_tasks(self, task_id: str) -> list[Any]:
|
||||||
|
task = self.repo.get_task(task_id)
|
||||||
|
if task is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not hasattr(self.repo, "get_task_context") or not hasattr(self.repo, "list_task_contexts_by_session_key"):
|
||||||
|
return [task]
|
||||||
|
|
||||||
|
context = self.repo.get_task_context(task_id)
|
||||||
|
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||||
|
return [task]
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
for session_context in self.repo.list_task_contexts_by_session_key(context.session_key):
|
||||||
|
session_task = self.repo.get_task(session_context.task_id)
|
||||||
|
if session_task is not None:
|
||||||
|
tasks.append(session_task)
|
||||||
|
return tasks or [task]
|
||||||
|
|||||||
@ -31,4 +31,5 @@ class CollectionService:
|
|||||||
self.repo.update_task_status(task_id, "collection_synced", finished_at)
|
self.repo.update_task_status(task_id, "collection_synced", finished_at)
|
||||||
cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
|
cleanup_result = self.cleanup.cleanup_task_outputs(task_id, settings)
|
||||||
return {**result, "cleanup": cleanup_result}
|
return {**result, "cleanup": cleanup_result}
|
||||||
|
self.repo.update_task_status(task_id, "commented", finished_at)
|
||||||
return result
|
return result
|
||||||
|
|||||||
@ -11,9 +11,34 @@ from biliup_next.core.models import Task
|
|||||||
from biliup_next.core.providers import ProviderManifest
|
from biliup_next.core.providers import ProviderManifest
|
||||||
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
|
from biliup_next.infra.adapters.bilibili_api import BilibiliApiAdapter
|
||||||
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
|
from biliup_next.infra.adapters.full_video_locator import resolve_full_video_bvid
|
||||||
|
from biliup_next.infra.video_links import bilibili_video_url, link_context_for_task
|
||||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_COMMENT_TEMPLATE = {
|
||||||
|
"split_header": (
|
||||||
|
"当前视频:歌曲纯享版:只保留本场直播中的歌曲片段,歌单见下方。\n"
|
||||||
|
"直播完整版:{current_full_video_link} (完整录播,含聊天/互动/完整流程)\n"
|
||||||
|
"上次纯享:{previous_pure_video_link} (上一场歌曲纯享版)"
|
||||||
|
),
|
||||||
|
"full_header": (
|
||||||
|
"当前视频:直播完整版:保留本场完整录播内容,歌曲时间轴见下方。\n"
|
||||||
|
"歌曲纯享版:{current_pure_video_link} (只听歌曲看这里)\n"
|
||||||
|
"上次完整版:{previous_full_video_link} (上一场完整录播)"
|
||||||
|
),
|
||||||
|
"split_part_header": "P{part_index}:",
|
||||||
|
"full_part_header": "P{part_index}:",
|
||||||
|
"split_song_line": "{song_index}. {title}{artist_suffix}",
|
||||||
|
"split_text_song_line": "{song_index}. {song_text}",
|
||||||
|
"full_timeline_line": "{song_index}. {line_text}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _SafeFormatDict(dict):
|
||||||
|
def __missing__(self, key: str) -> str:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
class BilibiliTopCommentProvider:
|
class BilibiliTopCommentProvider:
|
||||||
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
|
def __init__(self, bilibili_api: BilibiliApiAdapter | None = None) -> None:
|
||||||
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
|
self.bilibili_api = bilibili_api or BilibiliApiAdapter()
|
||||||
@ -41,7 +66,8 @@ class BilibiliTopCommentProvider:
|
|||||||
)
|
)
|
||||||
|
|
||||||
timeline_content = songs_path.read_text(encoding="utf-8").strip()
|
timeline_content = songs_path.read_text(encoding="utf-8").strip()
|
||||||
split_content, split_reason = self._build_split_comment(task, settings)
|
comment_template = self._load_comment_template(settings)
|
||||||
|
split_content, split_reason = self._build_split_comment(task, settings, comment_template)
|
||||||
if not timeline_content and not split_content:
|
if not timeline_content and not split_content:
|
||||||
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
|
self._touch_comment_flags(session_dir, split_done=True, full_done=True)
|
||||||
return {"status": "skipped", "reason": "comment_content_empty"}
|
return {"status": "skipped", "reason": "comment_content_empty"}
|
||||||
@ -78,7 +104,7 @@ class BilibiliTopCommentProvider:
|
|||||||
|
|
||||||
if settings.get("post_full_video_timeline_comment", True) and not full_done:
|
if settings.get("post_full_video_timeline_comment", True) and not full_done:
|
||||||
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
|
full_bvid = resolve_full_video_bvid(task.title, session_dir, settings)
|
||||||
full_content, full_reason = self._build_full_comment_content(task, settings)
|
full_content, full_reason = self._build_full_comment_content(task, settings, comment_template)
|
||||||
if full_reason is not None:
|
if full_reason is not None:
|
||||||
full_result = {"status": "skipped", "reason": full_reason}
|
full_result = {"status": "skipped", "reason": full_reason}
|
||||||
elif full_bvid and full_content:
|
elif full_bvid and full_content:
|
||||||
@ -135,44 +161,116 @@ class BilibiliTopCommentProvider:
|
|||||||
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
|
return {"status": "ok", "bvid": bvid, "aid": aid, "rpid": rpid}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_split_comment_content(songs_json_path: Path, songs_txt_path: Path) -> str:
|
def _build_split_comment_content(
|
||||||
|
songs_json_path: Path,
|
||||||
|
songs_txt_path: Path,
|
||||||
|
*,
|
||||||
|
start_index: int = 1,
|
||||||
|
comment_template: dict[str, str] | None = None,
|
||||||
|
) -> tuple[str, int]:
|
||||||
|
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
|
||||||
|
next_index = start_index
|
||||||
if songs_json_path.exists():
|
if songs_json_path.exists():
|
||||||
try:
|
try:
|
||||||
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
|
data = json.loads(songs_json_path.read_text(encoding="utf-8"))
|
||||||
lines = []
|
lines = []
|
||||||
for index, song in enumerate(data.get("songs", []), 1):
|
for song in data.get("songs", []):
|
||||||
title = str(song.get("title", "")).strip()
|
title = str(song.get("title", "")).strip()
|
||||||
artist = str(song.get("artist", "")).strip()
|
artist = str(song.get("artist", "")).strip()
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
suffix = f" — {artist}" if artist else ""
|
suffix = f" — {artist}" if artist else ""
|
||||||
lines.append(f"{index}. {title}{suffix}")
|
lines.append(
|
||||||
|
BilibiliTopCommentProvider._format_template(
|
||||||
|
comment_template.get("split_song_line", DEFAULT_COMMENT_TEMPLATE["split_song_line"]),
|
||||||
|
{
|
||||||
|
"song_index": str(next_index),
|
||||||
|
"title": title,
|
||||||
|
"artist": artist,
|
||||||
|
"artist_suffix": suffix,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
next_index += 1
|
||||||
if lines:
|
if lines:
|
||||||
return "\n".join(lines)
|
return "\n".join(lines), next_index
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
if songs_txt_path.exists():
|
if songs_txt_path.exists():
|
||||||
lines = []
|
lines = []
|
||||||
for index, raw in enumerate(songs_txt_path.read_text(encoding="utf-8").splitlines(), 1):
|
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
|
||||||
text = raw.strip()
|
text = raw.strip()
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
parts = text.split(" ", 1)
|
parts = text.split(" ", 1)
|
||||||
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
|
song_text = parts[1] if len(parts) == 2 and ":" in parts[0] else text
|
||||||
lines.append(f"{index}. {song_text}")
|
lines.append(
|
||||||
return "\n".join(lines)
|
BilibiliTopCommentProvider._format_template(
|
||||||
return ""
|
comment_template.get("split_text_song_line", DEFAULT_COMMENT_TEMPLATE["split_text_song_line"]),
|
||||||
|
{
|
||||||
|
"song_index": str(next_index),
|
||||||
|
"song_text": song_text,
|
||||||
|
"line_text": text,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
next_index += 1
|
||||||
|
return "\n".join(lines), next_index
|
||||||
|
return "", next_index
|
||||||
|
|
||||||
def _build_split_comment(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
|
@staticmethod
|
||||||
|
def _build_full_timeline_content(
|
||||||
|
songs_txt_path: Path,
|
||||||
|
*,
|
||||||
|
start_index: int = 1,
|
||||||
|
comment_template: dict[str, str] | None = None,
|
||||||
|
) -> tuple[str, int]:
|
||||||
|
if not songs_txt_path.exists():
|
||||||
|
return "", start_index
|
||||||
|
comment_template = comment_template or DEFAULT_COMMENT_TEMPLATE
|
||||||
|
next_index = start_index
|
||||||
|
lines = []
|
||||||
|
for raw in songs_txt_path.read_text(encoding="utf-8").splitlines():
|
||||||
|
text = raw.strip()
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
lines.append(
|
||||||
|
BilibiliTopCommentProvider._format_template(
|
||||||
|
comment_template.get("full_timeline_line", DEFAULT_COMMENT_TEMPLATE["full_timeline_line"]),
|
||||||
|
{
|
||||||
|
"song_index": str(next_index),
|
||||||
|
"line_text": text,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
next_index += 1
|
||||||
|
return "\n".join(lines), next_index
|
||||||
|
|
||||||
|
def _build_split_comment(
|
||||||
|
self,
|
||||||
|
task: Task,
|
||||||
|
settings: dict[str, Any],
|
||||||
|
comment_template: dict[str, str],
|
||||||
|
) -> tuple[str, str | None]:
|
||||||
repo = settings.get("__repo")
|
repo = settings.get("__repo")
|
||||||
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
||||||
session_dir = resolve_task_work_dir(task)
|
session_dir = resolve_task_work_dir(task)
|
||||||
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
|
content, _ = self._build_split_comment_content(
|
||||||
|
session_dir / "songs.json",
|
||||||
|
session_dir / "songs.txt",
|
||||||
|
comment_template=comment_template,
|
||||||
|
)
|
||||||
|
return self._with_split_footer(content, task, settings, comment_template), None
|
||||||
|
|
||||||
context = repo.get_task_context(task.id)
|
context = repo.get_task_context(task.id)
|
||||||
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||||
session_dir = resolve_task_work_dir(task)
|
session_dir = resolve_task_work_dir(task)
|
||||||
return self._build_split_comment_content(session_dir / "songs.json", session_dir / "songs.txt"), None
|
content, _ = self._build_split_comment_content(
|
||||||
|
session_dir / "songs.json",
|
||||||
|
session_dir / "songs.txt",
|
||||||
|
comment_template=comment_template,
|
||||||
|
)
|
||||||
|
return self._with_split_footer(content, task, settings, comment_template), None
|
||||||
|
|
||||||
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
||||||
if not ordered_contexts:
|
if not ordered_contexts:
|
||||||
@ -182,31 +280,42 @@ class BilibiliTopCommentProvider:
|
|||||||
return "", "session_split_comment_owned_by_anchor"
|
return "", "session_split_comment_owned_by_anchor"
|
||||||
|
|
||||||
blocks: list[str] = []
|
blocks: list[str] = []
|
||||||
|
next_song_index = 1
|
||||||
for index, session_context in enumerate(ordered_contexts, start=1):
|
for index, session_context in enumerate(ordered_contexts, start=1):
|
||||||
session_task = repo.get_task(session_context.task_id)
|
session_task = repo.get_task(session_context.task_id)
|
||||||
if session_task is None:
|
if session_task is None:
|
||||||
continue
|
continue
|
||||||
task_dir = resolve_task_work_dir(session_task)
|
task_dir = resolve_task_work_dir(session_task)
|
||||||
content = self._build_split_comment_content(task_dir / "songs.json", task_dir / "songs.txt")
|
content, next_song_index = self._build_split_comment_content(
|
||||||
|
task_dir / "songs.json",
|
||||||
|
task_dir / "songs.txt",
|
||||||
|
start_index=next_song_index,
|
||||||
|
comment_template=comment_template,
|
||||||
|
)
|
||||||
if not content:
|
if not content:
|
||||||
continue
|
continue
|
||||||
blocks.append(f"P{index}:\n{content}")
|
blocks.append(f"{self._part_header(comment_template, 'split_part_header', index)}\n{content}")
|
||||||
if not blocks:
|
if not blocks:
|
||||||
return "", "split_comment_empty"
|
return "", "split_comment_empty"
|
||||||
return "\n\n".join(blocks), None
|
return self._with_split_footer("\n\n".join(blocks), task, settings, comment_template), None
|
||||||
|
|
||||||
def _build_full_comment_content(self, task: Task, settings: dict[str, Any]) -> tuple[str, str | None]:
|
def _build_full_comment_content(
|
||||||
|
self,
|
||||||
|
task: Task,
|
||||||
|
settings: dict[str, Any],
|
||||||
|
comment_template: dict[str, str],
|
||||||
|
) -> tuple[str, str | None]:
|
||||||
repo = settings.get("__repo")
|
repo = settings.get("__repo")
|
||||||
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
if repo is None or not hasattr(repo, "get_task_context") or not hasattr(repo, "list_task_contexts_by_session_key"):
|
||||||
session_dir = resolve_task_work_dir(task)
|
session_dir = resolve_task_work_dir(task)
|
||||||
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
|
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
|
||||||
return content, None if content else "timeline_comment_empty"
|
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
|
||||||
|
|
||||||
context = repo.get_task_context(task.id)
|
context = repo.get_task_context(task.id)
|
||||||
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
if context is None or not context.session_key or context.session_key.startswith("task:"):
|
||||||
session_dir = resolve_task_work_dir(task)
|
session_dir = resolve_task_work_dir(task)
|
||||||
content = session_dir.joinpath("songs.txt").read_text(encoding="utf-8").strip()
|
content, _ = self._build_full_timeline_content(session_dir / "songs.txt", comment_template=comment_template)
|
||||||
return content, None if content else "timeline_comment_empty"
|
return self._with_full_footer(content, task, settings, comment_template), None if content else "timeline_comment_empty"
|
||||||
|
|
||||||
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
ordered_contexts = self._ordered_session_contexts(repo, context.session_key)
|
||||||
if not ordered_contexts:
|
if not ordered_contexts:
|
||||||
@ -216,21 +325,109 @@ class BilibiliTopCommentProvider:
|
|||||||
return "", "session_full_comment_owned_by_anchor"
|
return "", "session_full_comment_owned_by_anchor"
|
||||||
|
|
||||||
blocks: list[str] = []
|
blocks: list[str] = []
|
||||||
|
next_song_index = 1
|
||||||
for index, session_context in enumerate(ordered_contexts, start=1):
|
for index, session_context in enumerate(ordered_contexts, start=1):
|
||||||
session_task = repo.get_task(session_context.task_id)
|
session_task = repo.get_task(session_context.task_id)
|
||||||
if session_task is None:
|
if session_task is None:
|
||||||
continue
|
continue
|
||||||
task_dir = resolve_task_work_dir(session_task)
|
task_dir = resolve_task_work_dir(session_task)
|
||||||
songs_path = task_dir / "songs.txt"
|
songs_path = task_dir / "songs.txt"
|
||||||
if not songs_path.exists():
|
content, next_song_index = self._build_full_timeline_content(
|
||||||
continue
|
songs_path,
|
||||||
content = songs_path.read_text(encoding="utf-8").strip()
|
start_index=next_song_index,
|
||||||
|
comment_template=comment_template,
|
||||||
|
)
|
||||||
if not content:
|
if not content:
|
||||||
continue
|
continue
|
||||||
blocks.append(f"P{index}:\n{content}")
|
blocks.append(f"{self._part_header(comment_template, 'full_part_header', index)}\n{content}")
|
||||||
if not blocks:
|
if not blocks:
|
||||||
return "", "timeline_comment_empty"
|
return "", "timeline_comment_empty"
|
||||||
return "\n\n".join(blocks), None
|
return self._with_full_footer("\n\n".join(blocks), task, settings, comment_template), None
|
||||||
|
|
||||||
|
def _with_split_footer(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
task: Task,
|
||||||
|
settings: dict[str, Any],
|
||||||
|
comment_template: dict[str, str],
|
||||||
|
) -> str:
|
||||||
|
links = link_context_for_task(task, settings.get("__repo"), settings)
|
||||||
|
current_full_link = links.get("current_full_video_link", "")
|
||||||
|
if not current_full_link and settings.get("biliup_path") and settings.get("cookie_file"):
|
||||||
|
full_bvid = resolve_full_video_bvid(task.title, resolve_task_work_dir(task), settings)
|
||||||
|
current_full_link = bilibili_video_url(full_bvid)
|
||||||
|
header_vars = dict(links)
|
||||||
|
header_vars["current_full_video_link"] = current_full_link
|
||||||
|
header = self._format_header_template(
|
||||||
|
comment_template.get("split_header", DEFAULT_COMMENT_TEMPLATE["split_header"]),
|
||||||
|
header_vars,
|
||||||
|
)
|
||||||
|
return self._prepend_header(content, header)
|
||||||
|
|
||||||
|
def _with_full_footer(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
task: Task,
|
||||||
|
settings: dict[str, Any],
|
||||||
|
comment_template: dict[str, str],
|
||||||
|
) -> str:
|
||||||
|
links = link_context_for_task(task, settings.get("__repo"), settings)
|
||||||
|
header = self._format_header_template(
|
||||||
|
comment_template.get("full_header", DEFAULT_COMMENT_TEMPLATE["full_header"]),
|
||||||
|
links,
|
||||||
|
)
|
||||||
|
return self._prepend_header(content, header)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prepend_header(content: str, header: str) -> str:
|
||||||
|
content = content.strip()
|
||||||
|
lines = [line.rstrip() for line in header.splitlines() if line.strip()]
|
||||||
|
if not content:
|
||||||
|
return "\n".join(lines)
|
||||||
|
if not lines:
|
||||||
|
return content
|
||||||
|
return "\n".join(lines) + f"\n\n{content}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _part_header(comment_template: dict[str, str], key: str, part_index: int) -> str:
|
||||||
|
return BilibiliTopCommentProvider._format_template(
|
||||||
|
comment_template.get(key, DEFAULT_COMMENT_TEMPLATE[key]),
|
||||||
|
{"part_index": str(part_index)},
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_template(template: str, values: dict[str, str]) -> str:
|
||||||
|
return template.format_map(_SafeFormatDict(values)).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_header_template(template: str, values: dict[str, str]) -> str:
|
||||||
|
lines = []
|
||||||
|
for raw_line in template.splitlines():
|
||||||
|
if any(f"{{{key}}}" in raw_line and not value for key, value in values.items()):
|
||||||
|
continue
|
||||||
|
lines.append(BilibiliTopCommentProvider._format_template(raw_line, values))
|
||||||
|
return "\n".join(line for line in lines if line.strip()).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_comment_template(settings: dict[str, Any]) -> dict[str, str]:
|
||||||
|
merged = dict(DEFAULT_COMMENT_TEMPLATE)
|
||||||
|
path_value = settings.get("upload_config_file")
|
||||||
|
if not path_value:
|
||||||
|
return merged
|
||||||
|
path = Path(str(path_value))
|
||||||
|
if not path.exists():
|
||||||
|
return merged
|
||||||
|
try:
|
||||||
|
config = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return merged
|
||||||
|
template = config.get("comment_template", {})
|
||||||
|
if not isinstance(template, dict):
|
||||||
|
return merged
|
||||||
|
for key, value in template.items():
|
||||||
|
if key in merged and isinstance(value, str):
|
||||||
|
merged[key] = value
|
||||||
|
return merged
|
||||||
|
|
||||||
def _ordered_session_contexts(self, repo, session_key: str) -> list[object]: # type: ignore[no-untyped-def]
|
def _ordered_session_contexts(self, repo, session_key: str) -> list[object]: # type: ignore[no-untyped-def]
|
||||||
contexts = list(repo.list_task_contexts_by_session_key(session_key))
|
contexts = list(repo.list_task_contexts_by_session_key(session_key))
|
||||||
|
|||||||
@ -217,7 +217,7 @@ class IngestService:
|
|||||||
"room_id": sidecar_meta["payload"].get("room_id"),
|
"room_id": sidecar_meta["payload"].get("room_id"),
|
||||||
"session_key": sidecar_meta["payload"].get("session_key"),
|
"session_key": sidecar_meta["payload"].get("session_key"),
|
||||||
"full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
|
"full_video_bvid": sidecar_meta["payload"].get("full_video_bvid"),
|
||||||
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or source_path.stat().st_mtime,
|
"reference_timestamp": sidecar_meta["payload"].get("reference_timestamp") or target_source.stat().st_mtime,
|
||||||
}
|
}
|
||||||
task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
|
task = self.create_task_from_file(target_source, settings, context_payload=context_payload)
|
||||||
accepted.append(
|
accepted.append(
|
||||||
|
|||||||
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -11,9 +12,13 @@ from biliup_next.core.errors import ModuleError
|
|||||||
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
|
from biliup_next.core.models import PublishRecord, Task, utc_now_iso
|
||||||
from biliup_next.core.providers import ProviderManifest
|
from biliup_next.core.providers import ProviderManifest
|
||||||
from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
|
from biliup_next.infra.adapters.biliup_cli import BiliupCliAdapter
|
||||||
|
from biliup_next.infra.video_links import link_context_for_task
|
||||||
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
from biliup_next.infra.workspace_paths import resolve_task_work_dir
|
||||||
|
|
||||||
|
|
||||||
|
DESC_MAX_CHARS = 1900
|
||||||
|
|
||||||
|
|
||||||
class BiliupCliPublishProvider:
|
class BiliupCliPublishProvider:
|
||||||
def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
|
def __init__(self, adapter: BiliupCliAdapter | None = None) -> None:
|
||||||
self.adapter = adapter or BiliupCliAdapter()
|
self.adapter = adapter or BiliupCliAdapter()
|
||||||
@ -36,7 +41,7 @@ class BiliupCliPublishProvider:
|
|||||||
publish_progress = work_dir / "publish_progress.json"
|
publish_progress = work_dir / "publish_progress.json"
|
||||||
config = self._load_upload_config(Path(str(settings["upload_config_file"])))
|
config = self._load_upload_config(Path(str(settings["upload_config_file"])))
|
||||||
|
|
||||||
video_files = [artifact.path for artifact in clip_videos]
|
video_files = self._prepare_publish_video_files(work_dir, [artifact.path for artifact in clip_videos])
|
||||||
if not video_files:
|
if not video_files:
|
||||||
raise ModuleError(
|
raise ModuleError(
|
||||||
code="PUBLISH_NO_CLIPS",
|
code="PUBLISH_NO_CLIPS",
|
||||||
@ -64,10 +69,13 @@ class BiliupCliPublishProvider:
|
|||||||
"daily_quote": quote.get("text", ""),
|
"daily_quote": quote.get("text", ""),
|
||||||
"quote_author": quote.get("author", ""),
|
"quote_author": quote.get("author", ""),
|
||||||
}
|
}
|
||||||
|
template_vars.update(link_context_for_task(task, settings.get("__repo"), settings))
|
||||||
template = config.get("template", {})
|
template = config.get("template", {})
|
||||||
title = template.get("title", "{streamer}_{date}").format(**template_vars)
|
title = template.get("title", "{streamer}_{date}").format(**template_vars)
|
||||||
description = template.get("description", "{songs_list}").format(**template_vars)
|
description = self._fit_bilibili_desc(
|
||||||
dynamic = template.get("dynamic", "").format(**template_vars)
|
self._drop_empty_link_lines(template.get("description", "{songs_list}").format(**template_vars))
|
||||||
|
)
|
||||||
|
dynamic = self._drop_empty_link_lines(template.get("dynamic", "").format(**template_vars))
|
||||||
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
|
tags = template.get("tag", "翻唱,唱歌,音乐").format(**template_vars)
|
||||||
streamer_cfg = config.get("streamers", {})
|
streamer_cfg = config.get("streamers", {})
|
||||||
if streamer in streamer_cfg:
|
if streamer in streamer_cfg:
|
||||||
@ -90,8 +98,12 @@ class BiliupCliPublishProvider:
|
|||||||
first_batch = video_files[:5]
|
first_batch = video_files[:5]
|
||||||
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
|
remaining_batches = [video_files[i:i + 5] for i in range(5, len(video_files), 5)]
|
||||||
|
|
||||||
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
|
|
||||||
progress = self._load_publish_progress(publish_progress)
|
progress = self._load_publish_progress(publish_progress)
|
||||||
|
existing_bvid = bvid_file.read_text(encoding="utf-8").strip() if bvid_file.exists() else ""
|
||||||
|
progress_bvid = str(progress.get("bvid", "")).strip()
|
||||||
|
if not existing_bvid.startswith("BV") and progress_bvid.startswith("BV"):
|
||||||
|
existing_bvid = progress_bvid
|
||||||
|
bvid_file.write_text(existing_bvid, encoding="utf-8")
|
||||||
if upload_done.exists() and existing_bvid.startswith("BV"):
|
if upload_done.exists() and existing_bvid.startswith("BV"):
|
||||||
return PublishRecord(
|
return PublishRecord(
|
||||||
id=None,
|
id=None,
|
||||||
@ -201,6 +213,7 @@ class BiliupCliPublishProvider:
|
|||||||
upload_cmd.extend(["--cover", cover])
|
upload_cmd.extend(["--cover", cover])
|
||||||
|
|
||||||
for attempt in range(1, retry_count + 1):
|
for attempt in range(1, retry_count + 1):
|
||||||
|
self._append_description_summary(publish_log, description)
|
||||||
result = self.adapter.run(
|
result = self.adapter.run(
|
||||||
upload_cmd,
|
upload_cmd,
|
||||||
label=f"首批上传[{attempt}/{retry_count}]",
|
label=f"首批上传[{attempt}/{retry_count}]",
|
||||||
@ -253,6 +266,29 @@ class BiliupCliPublishProvider:
|
|||||||
def _wait_seconds(retry_index: int) -> int:
|
def _wait_seconds(retry_index: int) -> int:
|
||||||
return min(300 * (2**retry_index), 3600)
|
return min(300 * (2**retry_index), 3600)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prepare_publish_video_files(work_dir: Path, video_files: list[str]) -> list[str]:
|
||||||
|
publish_dir = work_dir / "publish_video"
|
||||||
|
if publish_dir.exists():
|
||||||
|
shutil.rmtree(publish_dir)
|
||||||
|
publish_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
prepared: list[str] = []
|
||||||
|
for index, video_file in enumerate(video_files, start=1):
|
||||||
|
source = Path(video_file)
|
||||||
|
name = BiliupCliPublishProvider._strip_clip_number_prefix(source.name)
|
||||||
|
target = publish_dir / f"{index:02d}_{name}"
|
||||||
|
try:
|
||||||
|
target.hardlink_to(source)
|
||||||
|
except OSError:
|
||||||
|
shutil.copy2(source, target)
|
||||||
|
prepared.append(str(target))
|
||||||
|
return prepared
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _strip_clip_number_prefix(filename: str) -> str:
|
||||||
|
return re.sub(r"^\d+[_-]+", "", filename, count=1)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_upload_config(path: Path) -> dict[str, Any]:
|
def _load_upload_config(path: Path) -> dict[str, Any]:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
@ -262,6 +298,9 @@ class BiliupCliPublishProvider:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
|
def _parse_filename(filename: str, config: dict[str, Any] | None = None) -> dict[str, str]:
|
||||||
config = config or {}
|
config = config or {}
|
||||||
|
builtin = BiliupCliPublishProvider._parse_builtin_filename(filename)
|
||||||
|
if builtin:
|
||||||
|
return builtin
|
||||||
patterns = config.get("filename_patterns", {}).get("patterns", [])
|
patterns = config.get("filename_patterns", {}).get("patterns", [])
|
||||||
for pattern_config in patterns:
|
for pattern_config in patterns:
|
||||||
regex = pattern_config.get("regex")
|
regex = pattern_config.get("regex")
|
||||||
@ -278,6 +317,48 @@ class BiliupCliPublishProvider:
|
|||||||
return data
|
return data
|
||||||
return {"streamer": filename, "date": ""}
|
return {"streamer": filename, "date": ""}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_builtin_filename(filename: str) -> dict[str, str]:
|
||||||
|
patterns = (
|
||||||
|
r"^(?P<streamer>.+?)唱歌录播\s+(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
|
||||||
|
r"^(?P<streamer>.+?)唱歌录播[::]\s*(?P<year>\d{4})年(?P<month>\d{2})月(?P<day>\d{2})日\s+(?P<hour>\d{2})时(?P<minute>\d{2})分",
|
||||||
|
)
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.match(pattern, filename)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
data = match.groupdict()
|
||||||
|
data["date"] = f"{data['month']}月{data['day']}日 {data['hour']}时{data['minute']}分"
|
||||||
|
return data
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _drop_empty_link_lines(text: str) -> str:
|
||||||
|
lines = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped in {"直播完整版:", "歌曲纯享版:", "上次直播:", "上次纯享:", "上次完整版:"}:
|
||||||
|
continue
|
||||||
|
lines.append(line.rstrip())
|
||||||
|
return "\n".join(lines).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _fit_bilibili_desc(text: str, max_chars: int = DESC_MAX_CHARS) -> str:
|
||||||
|
text = text.strip()
|
||||||
|
if len(text) <= max_chars:
|
||||||
|
return text
|
||||||
|
suffix = "\n\n完整歌单见置顶评论。"
|
||||||
|
return text[: max(0, max_chars - len(suffix))].rstrip() + suffix
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _append_description_summary(log_path: Path, description: str) -> None:
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
line = f"description_chars: {len(description)}\n"
|
||||||
|
if log_path.exists():
|
||||||
|
log_path.write_text(log_path.read_text(encoding="utf-8") + line, encoding="utf-8")
|
||||||
|
else:
|
||||||
|
log_path.write_text(line, encoding="utf-8")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
|
def _get_random_quote(config: dict[str, Any]) -> dict[str, str]:
|
||||||
quotes = config.get("quotes", [])
|
quotes = config.get("quotes", [])
|
||||||
|
|||||||
@ -26,7 +26,9 @@ class PublishService:
|
|||||||
session_contexts = self._session_contexts(task_id)
|
session_contexts = self._session_contexts(task_id)
|
||||||
if len(session_contexts) <= 1:
|
if len(session_contexts) <= 1:
|
||||||
clip_videos = self._clip_videos_for_task(task_id)
|
clip_videos = self._clip_videos_for_task(task_id)
|
||||||
record = provider.publish(task, clip_videos, settings)
|
provider_settings = dict(settings)
|
||||||
|
provider_settings["__repo"] = self.repo
|
||||||
|
record = provider.publish(task, clip_videos, provider_settings)
|
||||||
self._persist_publish_success(task, record)
|
self._persist_publish_success(task, record)
|
||||||
return record
|
return record
|
||||||
|
|
||||||
@ -50,6 +52,7 @@ class PublishService:
|
|||||||
if anchor_task is None:
|
if anchor_task is None:
|
||||||
raise RuntimeError(f"anchor task not found: {anchor_context.task_id}")
|
raise RuntimeError(f"anchor task not found: {anchor_context.task_id}")
|
||||||
session_settings = dict(settings)
|
session_settings = dict(settings)
|
||||||
|
session_settings["__repo"] = self.repo
|
||||||
session_settings.update(self._session_publish_metadata(anchor_task, session_contexts, settings))
|
session_settings.update(self._session_publish_metadata(anchor_task, session_contexts, settings))
|
||||||
record = provider.publish(anchor_task, clip_videos, session_settings)
|
record = provider.publish(anchor_task, clip_videos, session_settings)
|
||||||
for context in session_contexts:
|
for context in session_contexts:
|
||||||
|
|||||||
@ -37,13 +37,17 @@ class CodexSongDetector:
|
|||||||
work_dir=work_dir,
|
work_dir=work_dir,
|
||||||
prompt=TASK_PROMPT,
|
prompt=TASK_PROMPT,
|
||||||
)
|
)
|
||||||
|
self._write_codex_log(work_dir, result)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
|
stderr = result.stderr[-2000:]
|
||||||
|
stdout = result.stdout[-2000:]
|
||||||
|
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
|
||||||
raise ModuleError(
|
raise ModuleError(
|
||||||
code="SONG_DETECT_FAILED",
|
code="SONG_DETECT_FAILED",
|
||||||
message="codex exec 执行失败",
|
message="codex exec 执行失败",
|
||||||
retryable=True,
|
retryable=retryable,
|
||||||
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
|
details={"stdout": stdout, "stderr": stderr},
|
||||||
)
|
)
|
||||||
|
|
||||||
ensure_song_outputs(
|
ensure_song_outputs(
|
||||||
@ -72,3 +76,37 @@ class CodexSongDetector:
|
|||||||
created_at=utc_now_iso(),
|
created_at=utc_now_iso(),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _write_codex_log(work_dir: Path, result) -> None: # noqa: ANN001
|
||||||
|
log_path = work_dir / "codex.log"
|
||||||
|
log_path.write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"codex song_detect",
|
||||||
|
f"returncode: {result.returncode}",
|
||||||
|
"",
|
||||||
|
"stdout:",
|
||||||
|
result.stdout,
|
||||||
|
"",
|
||||||
|
"stderr:",
|
||||||
|
result.stderr,
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_auth_error(text: str) -> bool:
|
||||||
|
lowered = text.lower()
|
||||||
|
return any(
|
||||||
|
needle in lowered
|
||||||
|
for needle in (
|
||||||
|
"401",
|
||||||
|
"invalid access token",
|
||||||
|
"token expired",
|
||||||
|
"unauthorized",
|
||||||
|
"authentication",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|||||||
@ -43,11 +43,14 @@ class QwenCliSongDetector:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
|
stderr = result.stderr[-2000:]
|
||||||
|
stdout = result.stdout[-2000:]
|
||||||
|
retryable = not self._is_auth_error(f"{stdout}\n{stderr}")
|
||||||
raise ModuleError(
|
raise ModuleError(
|
||||||
code="SONG_DETECT_FAILED",
|
code="SONG_DETECT_FAILED",
|
||||||
message="qwen -p 执行失败",
|
message="qwen -p 执行失败",
|
||||||
retryable=True,
|
retryable=retryable,
|
||||||
details={"stdout": result.stdout[-2000:], "stderr": result.stderr[-2000:]},
|
details={"stdout": stdout, "stderr": stderr},
|
||||||
)
|
)
|
||||||
|
|
||||||
ensure_song_outputs(
|
ensure_song_outputs(
|
||||||
@ -76,3 +79,17 @@ class QwenCliSongDetector:
|
|||||||
created_at=utc_now_iso(),
|
created_at=utc_now_iso(),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_auth_error(text: str) -> bool:
|
||||||
|
lowered = text.lower()
|
||||||
|
return any(
|
||||||
|
needle in lowered
|
||||||
|
for needle in (
|
||||||
|
"401",
|
||||||
|
"invalid access token",
|
||||||
|
"token expired",
|
||||||
|
"unauthorized",
|
||||||
|
"authentication",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|||||||
@ -2,9 +2,12 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
from contextlib import suppress
|
||||||
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@ -16,6 +19,7 @@ from biliup_next.core.providers import ProviderManifest
|
|||||||
LANGUAGE = "zh"
|
LANGUAGE = "zh"
|
||||||
BITRATE_KBPS = 64
|
BITRATE_KBPS = 64
|
||||||
MODEL_NAME = "whisper-large-v3-turbo"
|
MODEL_NAME = "whisper-large-v3-turbo"
|
||||||
|
SEGMENT_SIZE_SAFETY_RATIO = 0.75
|
||||||
|
|
||||||
|
|
||||||
class GroqTranscribeProvider:
|
class GroqTranscribeProvider:
|
||||||
@ -30,11 +34,11 @@ class GroqTranscribeProvider:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
|
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
|
||||||
groq_api_key = str(settings.get("groq_api_key", "")).strip()
|
groq_api_keys = self._groq_api_keys(settings)
|
||||||
if not groq_api_key:
|
if not groq_api_keys:
|
||||||
raise ModuleError(
|
raise ModuleError(
|
||||||
code="GROQ_API_KEY_MISSING",
|
code="GROQ_API_KEY_MISSING",
|
||||||
message="未配置 transcribe.groq_api_key",
|
message="未配置 transcribe.groq_api_key 或 transcribe.groq_api_keys",
|
||||||
retryable=False,
|
retryable=False,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
@ -55,18 +59,23 @@ class GroqTranscribeProvider:
|
|||||||
)
|
)
|
||||||
|
|
||||||
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
|
ffmpeg_bin = str(settings.get("ffmpeg_bin", "ffmpeg"))
|
||||||
max_file_size_mb = int(settings.get("max_file_size_mb", 23))
|
max_file_size_mb = int(settings.get("max_file_size_mb", 12))
|
||||||
work_dir = source_path.parent
|
work_dir = source_path.parent
|
||||||
temp_audio_dir = work_dir / "temp_audio"
|
temp_audio_dir = work_dir / "temp_audio"
|
||||||
|
checkpoint_dir = work_dir / "transcribe_segments"
|
||||||
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
||||||
segment_duration = max(1, math.floor((max_file_size_mb * 8 * 1024) / BITRATE_KBPS))
|
checkpoint_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
max_segment_bytes = max(1, max_file_size_mb) * 1024 * 1024
|
||||||
|
segment_duration = self._initial_segment_duration(max_file_size_mb)
|
||||||
output_pattern = temp_audio_dir / "part_%03d.mp3"
|
output_pattern = temp_audio_dir / "part_%03d.mp3"
|
||||||
|
|
||||||
self._extract_audio_segments(
|
segment_duration = self._extract_audio_segments_with_size_guard(
|
||||||
ffmpeg_bin=ffmpeg_bin,
|
ffmpeg_bin=ffmpeg_bin,
|
||||||
source_path=source_path,
|
source_path=source_path,
|
||||||
output_pattern=output_pattern,
|
output_pattern=output_pattern,
|
||||||
segment_duration=segment_duration,
|
temp_audio_dir=temp_audio_dir,
|
||||||
|
initial_segment_duration=segment_duration,
|
||||||
|
max_segment_bytes=max_segment_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
segments = sorted(temp_audio_dir.glob("part_*.mp3"))
|
segments = sorted(temp_audio_dir.glob("part_*.mp3"))
|
||||||
@ -77,22 +86,47 @@ class GroqTranscribeProvider:
|
|||||||
retryable=False,
|
retryable=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
client = Groq(api_key=groq_api_key)
|
request_timeout_seconds = max(1, int(settings.get("request_timeout_seconds", 180)))
|
||||||
|
request_max_retries = max(0, int(settings.get("request_max_retries", 1)))
|
||||||
|
request_retry_backoff_seconds = max(0, int(settings.get("request_retry_backoff_seconds", 30)))
|
||||||
|
lock_enabled = bool(settings.get("serialize_groq_requests", True))
|
||||||
|
lock_path = self._groq_lock_path(settings, work_dir)
|
||||||
|
clients = [Groq(api_key=key, timeout=request_timeout_seconds, max_retries=0) for key in groq_api_keys]
|
||||||
srt_path = work_dir / f"{task.title}.srt"
|
srt_path = work_dir / f"{task.title}.srt"
|
||||||
|
temp_srt_path = work_dir / f".{task.title}.srt.tmp"
|
||||||
global_idx = 1
|
global_idx = 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with srt_path.open("w", encoding="utf-8") as srt_file:
|
with temp_srt_path.open("w", encoding="utf-8") as srt_file:
|
||||||
for index, segment in enumerate(segments):
|
for index, segment in enumerate(segments):
|
||||||
offset_seconds = index * segment_duration
|
offset_seconds = index * segment_duration
|
||||||
segment_data = self._transcribe_with_retry(client, segment)
|
segment_checkpoint = checkpoint_dir / f"{segment.stem}.json"
|
||||||
|
segment_data = self._load_segment_checkpoint(segment_checkpoint, segment_duration=segment_duration)
|
||||||
|
if segment_data is None:
|
||||||
|
with self._optional_groq_lock(lock_path, enabled=lock_enabled):
|
||||||
|
segment_data = self._transcribe_with_retry(
|
||||||
|
clients,
|
||||||
|
segment,
|
||||||
|
request_timeout_seconds=request_timeout_seconds,
|
||||||
|
request_max_retries=request_max_retries,
|
||||||
|
request_retry_backoff_seconds=request_retry_backoff_seconds,
|
||||||
|
)
|
||||||
|
self._write_segment_checkpoint(
|
||||||
|
segment_checkpoint,
|
||||||
|
segment_data,
|
||||||
|
segment_duration=segment_duration,
|
||||||
|
audio_file=segment,
|
||||||
|
)
|
||||||
for chunk in segment_data:
|
for chunk in segment_data:
|
||||||
start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
|
start = self._format_srt_time(float(chunk["start"]) + offset_seconds)
|
||||||
end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
|
end = self._format_srt_time(float(chunk["end"]) + offset_seconds)
|
||||||
text = str(chunk["text"]).strip()
|
text = str(chunk["text"]).strip()
|
||||||
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
|
srt_file.write(f"{global_idx}\n{start} --> {end}\n{text}\n\n")
|
||||||
global_idx += 1
|
global_idx += 1
|
||||||
|
temp_srt_path.replace(srt_path)
|
||||||
finally:
|
finally:
|
||||||
|
with suppress(FileNotFoundError):
|
||||||
|
temp_srt_path.unlink()
|
||||||
shutil.rmtree(temp_audio_dir, ignore_errors=True)
|
shutil.rmtree(temp_audio_dir, ignore_errors=True)
|
||||||
|
|
||||||
return Artifact(
|
return Artifact(
|
||||||
@ -104,12 +138,126 @@ class GroqTranscribeProvider:
|
|||||||
{
|
{
|
||||||
"provider": "groq",
|
"provider": "groq",
|
||||||
"model": MODEL_NAME,
|
"model": MODEL_NAME,
|
||||||
|
"api_key_count": len(groq_api_keys),
|
||||||
"segment_duration_seconds": segment_duration,
|
"segment_duration_seconds": segment_duration,
|
||||||
|
"checkpoint_dir": str(checkpoint_dir.resolve()),
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
created_at=utc_now_iso(),
|
created_at=utc_now_iso(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _groq_api_keys(settings: dict[str, Any]) -> list[str]:
|
||||||
|
keys: list[str] = []
|
||||||
|
raw_keys = settings.get("groq_api_keys")
|
||||||
|
if isinstance(raw_keys, list):
|
||||||
|
keys.extend(str(key).strip() for key in raw_keys if str(key).strip())
|
||||||
|
legacy_key = str(settings.get("groq_api_key", "")).strip()
|
||||||
|
if legacy_key:
|
||||||
|
keys.append(legacy_key)
|
||||||
|
deduped: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for key in keys:
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
deduped.append(key)
|
||||||
|
return deduped
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _initial_segment_duration(max_file_size_mb: int) -> int:
|
||||||
|
safe_target_mb = max_file_size_mb * SEGMENT_SIZE_SAFETY_RATIO
|
||||||
|
return max(1, math.floor((safe_target_mb * 8 * 1024) / BITRATE_KBPS))
|
||||||
|
|
||||||
|
def _extract_audio_segments_with_size_guard(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
ffmpeg_bin: str,
|
||||||
|
source_path: Path,
|
||||||
|
output_pattern: Path,
|
||||||
|
temp_audio_dir: Path,
|
||||||
|
initial_segment_duration: int,
|
||||||
|
max_segment_bytes: int,
|
||||||
|
) -> int:
|
||||||
|
segment_duration = initial_segment_duration
|
||||||
|
for _attempt in range(4):
|
||||||
|
self._clear_audio_segments(temp_audio_dir)
|
||||||
|
self._extract_audio_segments(
|
||||||
|
ffmpeg_bin=ffmpeg_bin,
|
||||||
|
source_path=source_path,
|
||||||
|
output_pattern=output_pattern,
|
||||||
|
segment_duration=segment_duration,
|
||||||
|
)
|
||||||
|
largest_segment = self._largest_audio_segment(temp_audio_dir)
|
||||||
|
if largest_segment is None or largest_segment.stat().st_size <= max_segment_bytes:
|
||||||
|
return segment_duration
|
||||||
|
next_duration = max(1, math.floor(segment_duration * 0.75))
|
||||||
|
if next_duration == segment_duration:
|
||||||
|
break
|
||||||
|
segment_duration = next_duration
|
||||||
|
largest_segment = self._largest_audio_segment(temp_audio_dir)
|
||||||
|
largest_size = largest_segment.stat().st_size if largest_segment else 0
|
||||||
|
raise ModuleError(
|
||||||
|
code="TRANSCRIBE_AUDIO_SEGMENT_TOO_LARGE",
|
||||||
|
message="音频分片超过 Groq 上传安全阈值",
|
||||||
|
retryable=False,
|
||||||
|
details={
|
||||||
|
"largest_segment": str(largest_segment) if largest_segment else None,
|
||||||
|
"largest_segment_bytes": largest_size,
|
||||||
|
"max_segment_bytes": max_segment_bytes,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clear_audio_segments(temp_audio_dir: Path) -> None:
|
||||||
|
for path in temp_audio_dir.glob("part_*.mp3"):
|
||||||
|
path.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _largest_audio_segment(temp_audio_dir: Path) -> Path | None:
|
||||||
|
segments = list(temp_audio_dir.glob("part_*.mp3"))
|
||||||
|
if not segments:
|
||||||
|
return None
|
||||||
|
return max(segments, key=lambda path: path.stat().st_size)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_segment_checkpoint(checkpoint_path: Path, *, segment_duration: int) -> list[dict[str, Any]] | None:
|
||||||
|
if not checkpoint_path.exists():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = json.loads(checkpoint_path.read_text(encoding="utf-8"))
|
||||||
|
if data.get("model") != MODEL_NAME or data.get("language") != LANGUAGE:
|
||||||
|
return None
|
||||||
|
if data.get("segment_duration_seconds") != segment_duration:
|
||||||
|
return None
|
||||||
|
segments = data.get("segments")
|
||||||
|
if not isinstance(segments, list):
|
||||||
|
return None
|
||||||
|
return [dict(segment) for segment in segments]
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _write_segment_checkpoint(
|
||||||
|
checkpoint_path: Path,
|
||||||
|
segments: list[dict[str, Any]],
|
||||||
|
*,
|
||||||
|
segment_duration: int,
|
||||||
|
audio_file: Path,
|
||||||
|
) -> None:
|
||||||
|
checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
temp_path = checkpoint_path.with_suffix(f"{checkpoint_path.suffix}.tmp")
|
||||||
|
payload = {
|
||||||
|
"provider": "groq",
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"language": LANGUAGE,
|
||||||
|
"audio_file": audio_file.name,
|
||||||
|
"segment_duration_seconds": segment_duration,
|
||||||
|
"segments": segments,
|
||||||
|
}
|
||||||
|
temp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
temp_path.replace(checkpoint_path)
|
||||||
|
|
||||||
def _extract_audio_segments(
|
def _extract_audio_segments(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@ -156,31 +304,141 @@ class GroqTranscribeProvider:
|
|||||||
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
|
details={"stderr": exc.stderr[-2000:], "stdout": exc.stdout[-2000:]},
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
def _transcribe_with_retry(self, client: Any, audio_file: Path) -> list[dict[str, Any]]:
|
@staticmethod
|
||||||
retry_count = 0
|
def _groq_lock_path(settings: dict[str, Any], fallback_work_dir: Path) -> Path:
|
||||||
while True:
|
session_dir = settings.get("session_dir")
|
||||||
|
if isinstance(session_dir, str) and session_dir:
|
||||||
|
return Path(session_dir).resolve().parent / "groq_transcribe.lock"
|
||||||
|
return fallback_work_dir / "groq_transcribe.lock"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
@contextmanager
|
||||||
|
def _optional_groq_lock(lock_path: Path, *, enabled: bool):
|
||||||
|
if not enabled:
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with lock_path.open("w", encoding="utf-8") as lock_file:
|
||||||
try:
|
try:
|
||||||
with audio_file.open("rb") as file_handle:
|
import fcntl
|
||||||
response = client.audio.transcriptions.create(
|
|
||||||
file=(audio_file.name, file_handle.read()),
|
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||||
model=MODEL_NAME,
|
lock_file.write(f"{os.getpid()}\n")
|
||||||
response_format="verbose_json",
|
lock_file.flush()
|
||||||
language=LANGUAGE,
|
yield
|
||||||
temperature=0.0,
|
finally:
|
||||||
)
|
with suppress(Exception):
|
||||||
return [dict(segment) for segment in response.segments]
|
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||||
except Exception as exc: # noqa: BLE001
|
|
||||||
retry_count += 1
|
def _transcribe_with_retry(
|
||||||
err_str = str(exc)
|
self,
|
||||||
if "429" in err_str or "rate_limit" in err_str.lower():
|
clients: list[Any],
|
||||||
time.sleep(25)
|
audio_file: Path,
|
||||||
continue
|
*,
|
||||||
raise ModuleError(
|
request_timeout_seconds: int,
|
||||||
code="GROQ_TRANSCRIBE_FAILED",
|
request_max_retries: int,
|
||||||
message=f"Groq 转录失败: {audio_file.name}",
|
request_retry_backoff_seconds: int,
|
||||||
retryable=True,
|
) -> list[dict[str, Any]]:
|
||||||
details={"error": err_str, "retry_count": retry_count},
|
attempt = 0
|
||||||
) from exc
|
key_attempts = 0
|
||||||
|
last_error = ""
|
||||||
|
while True:
|
||||||
|
attempt += 1
|
||||||
|
for key_index, client in enumerate(clients):
|
||||||
|
key_attempts += 1
|
||||||
|
try:
|
||||||
|
with audio_file.open("rb") as file_handle:
|
||||||
|
response = client.audio.transcriptions.create(
|
||||||
|
file=(audio_file.name, file_handle.read()),
|
||||||
|
model=MODEL_NAME,
|
||||||
|
response_format="verbose_json",
|
||||||
|
language=LANGUAGE,
|
||||||
|
temperature=0.0,
|
||||||
|
timeout=request_timeout_seconds,
|
||||||
|
)
|
||||||
|
return [dict(segment) for segment in response.segments]
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
err_str = str(exc)
|
||||||
|
last_error = err_str
|
||||||
|
if self._is_rate_limit_error(err_str) and key_index < len(clients) - 1:
|
||||||
|
continue
|
||||||
|
if not self._should_retry_request(err_str):
|
||||||
|
raise self._transcribe_failed(
|
||||||
|
audio_file,
|
||||||
|
err_str,
|
||||||
|
request_attempts=attempt,
|
||||||
|
key_attempts=key_attempts,
|
||||||
|
api_key_count=len(clients),
|
||||||
|
request_timeout_seconds=request_timeout_seconds,
|
||||||
|
) from exc
|
||||||
|
break
|
||||||
|
if attempt <= request_max_retries:
|
||||||
|
if request_retry_backoff_seconds > 0:
|
||||||
|
time.sleep(request_retry_backoff_seconds)
|
||||||
|
continue
|
||||||
|
raise self._transcribe_failed(
|
||||||
|
audio_file,
|
||||||
|
last_error,
|
||||||
|
request_attempts=attempt,
|
||||||
|
key_attempts=key_attempts,
|
||||||
|
api_key_count=len(clients),
|
||||||
|
request_timeout_seconds=request_timeout_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transcribe_failed(
|
||||||
|
audio_file: Path,
|
||||||
|
error_text: str,
|
||||||
|
*,
|
||||||
|
request_attempts: int,
|
||||||
|
key_attempts: int,
|
||||||
|
api_key_count: int,
|
||||||
|
request_timeout_seconds: int,
|
||||||
|
) -> ModuleError:
|
||||||
|
return ModuleError(
|
||||||
|
code="GROQ_TRANSCRIBE_FAILED",
|
||||||
|
message=f"Groq 转录失败: {audio_file.name}",
|
||||||
|
retryable=True,
|
||||||
|
details={
|
||||||
|
"error": error_text,
|
||||||
|
"request_attempts": request_attempts,
|
||||||
|
"key_attempts": key_attempts,
|
||||||
|
"api_key_count": api_key_count,
|
||||||
|
"request_timeout_seconds": request_timeout_seconds,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_rate_limit_error(error_text: str) -> bool:
|
||||||
|
lowered = error_text.lower()
|
||||||
|
return any(
|
||||||
|
needle in lowered
|
||||||
|
for needle in (
|
||||||
|
"429",
|
||||||
|
"rate_limit",
|
||||||
|
"rate limit",
|
||||||
|
"too many requests",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _should_retry_request(error_text: str) -> bool:
|
||||||
|
lowered = error_text.lower()
|
||||||
|
return any(
|
||||||
|
needle in lowered
|
||||||
|
for needle in (
|
||||||
|
"429",
|
||||||
|
"rate_limit",
|
||||||
|
"timed out",
|
||||||
|
"timeout",
|
||||||
|
"connection error",
|
||||||
|
"connect error",
|
||||||
|
"server disconnected",
|
||||||
|
"502",
|
||||||
|
"503",
|
||||||
|
"504",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _format_srt_time(seconds: float) -> str:
|
def _format_srt_time(seconds: float) -> str:
|
||||||
|
|||||||
@ -88,7 +88,7 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
|||||||
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||||
self.assertEqual(len(api.reply_calls), 1)
|
self.assertEqual(len(api.reply_calls), 1)
|
||||||
self.assertIn("P1:\n1. Song A — Artist A", api.reply_calls[0]["content"])
|
self.assertIn("P1:\n1. Song A — Artist A", api.reply_calls[0]["content"])
|
||||||
self.assertIn("P2:\n1. Song B — Artist B", api.reply_calls[0]["content"])
|
self.assertIn("P2:\n2. Song B — Artist B", api.reply_calls[0]["content"])
|
||||||
|
|
||||||
def test_split_comment_skips_on_non_anchor_task(self) -> None:
|
def test_split_comment_skips_on_non_anchor_task(self) -> None:
|
||||||
api = _FakeBilibiliApi()
|
api = _FakeBilibiliApi()
|
||||||
@ -212,6 +212,63 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
|||||||
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||||
self.assertTrue((work_dir / "comment_done.flag").exists())
|
self.assertTrue((work_dir / "comment_done.flag").exists())
|
||||||
|
|
||||||
|
def test_comment_format_can_be_configured_from_upload_config(self) -> None:
|
||||||
|
api = _FakeBilibiliApi()
|
||||||
|
provider = BilibiliTopCommentProvider(bilibili_api=api)
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
work_dir = root / "task-1"
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
task = Task(
|
||||||
|
id="task-1",
|
||||||
|
source_type="local_file",
|
||||||
|
source_path=str(work_dir / "source.mp4"),
|
||||||
|
title="task-1",
|
||||||
|
status="published",
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
updated_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
(work_dir / "songs.txt").write_text("00:00:00 Song From Text — Artist T\n", encoding="utf-8")
|
||||||
|
(work_dir / "songs.json").write_text(
|
||||||
|
json.dumps({"songs": [{"title": "Song A", "artist": "Artist A"}]}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(work_dir / "bvid.txt").write_text("BV1COMMENT123", encoding="utf-8")
|
||||||
|
(work_dir / "full_video_bvid.txt").write_text("BV1FULL12345", encoding="utf-8")
|
||||||
|
cookies_file = root / "cookies.json"
|
||||||
|
cookies_file.write_text("{}", encoding="utf-8")
|
||||||
|
upload_config = root / "upload_config.json"
|
||||||
|
upload_config.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"comment_template": {
|
||||||
|
"split_header": "这是纯享:{current_full_video_link}\n上一场:{previous_full_video_link}",
|
||||||
|
"split_song_line": "#{song_index} {title} / {artist}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = provider.comment(
|
||||||
|
task,
|
||||||
|
{
|
||||||
|
"session_dir": str(root),
|
||||||
|
"cookies_file": str(cookies_file),
|
||||||
|
"upload_config_file": str(upload_config),
|
||||||
|
"post_split_comment": True,
|
||||||
|
"post_full_video_timeline_comment": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(result["status"], "ok")
|
||||||
|
self.assertEqual(result["split"]["reason"], "comment_disabled")
|
||||||
|
self.assertEqual(len(api.reply_calls), 1)
|
||||||
|
content = str(api.reply_calls[0]["content"])
|
||||||
|
self.assertIn("这是纯享:https://www.bilibili.com/video/BV1FULL12345", content)
|
||||||
|
self.assertNotIn("上一场:", content)
|
||||||
|
self.assertIn("#1 Song A / Artist A", content)
|
||||||
|
|
||||||
def test_full_comment_aggregates_session_parts_on_anchor_task(self) -> None:
|
def test_full_comment_aggregates_session_parts_on_anchor_task(self) -> None:
|
||||||
api = _FakeBilibiliApi()
|
api = _FakeBilibiliApi()
|
||||||
provider = BilibiliTopCommentProvider(bilibili_api=api)
|
provider = BilibiliTopCommentProvider(bilibili_api=api)
|
||||||
@ -263,8 +320,8 @@ class BilibiliTopCommentProviderTests(unittest.TestCase):
|
|||||||
self.assertEqual(result["full"]["status"], "skipped")
|
self.assertEqual(result["full"]["status"], "skipped")
|
||||||
self.assertEqual(result["full"]["reason"], "comment_disabled")
|
self.assertEqual(result["full"]["reason"], "comment_disabled")
|
||||||
self.assertEqual(len(api.reply_calls), 1)
|
self.assertEqual(len(api.reply_calls), 1)
|
||||||
self.assertIn("P1:\n00:00:01 Song A\n00:02:00 Song B", api.reply_calls[0]["content"])
|
self.assertIn("P1:\n1. 00:00:01 Song A\n2. 00:02:00 Song B", api.reply_calls[0]["content"])
|
||||||
self.assertIn("P2:\n00:00:03 Song C", api.reply_calls[0]["content"])
|
self.assertIn("P2:\n3. 00:00:03 Song C", api.reply_calls[0]["content"])
|
||||||
|
|
||||||
def test_full_comment_skips_on_non_anchor_task(self) -> None:
|
def test_full_comment_skips_on_non_anchor_task(self) -> None:
|
||||||
api = _FakeBilibiliApi()
|
api = _FakeBilibiliApi()
|
||||||
|
|||||||
@ -269,6 +269,117 @@ class BiliupCliPublishProviderTests(unittest.TestCase):
|
|||||||
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
|
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
|
||||||
self.assertTrue((work_dir / "upload_done.flag").exists())
|
self.assertTrue((work_dir / "upload_done.flag").exists())
|
||||||
|
|
||||||
|
def test_publish_recovers_bvid_from_progress_when_bvid_file_was_removed(self) -> None:
|
||||||
|
adapter = _FakeBiliupAdapter()
|
||||||
|
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
work_dir = root / "task-1"
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
task = Task(
|
||||||
|
id="task-1",
|
||||||
|
source_type="local_file",
|
||||||
|
source_path=str(work_dir / "source.mp4"),
|
||||||
|
title="task-1",
|
||||||
|
status="split_done",
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
updated_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
|
||||||
|
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
|
||||||
|
(work_dir / "publish_progress.json").write_text(
|
||||||
|
json.dumps({"bvid": "BV1RESUME1234", "completed_append_batches": [2]}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
upload_config = root / "upload_config.json"
|
||||||
|
upload_config.write_text("{}", encoding="utf-8")
|
||||||
|
clips = []
|
||||||
|
for index in range(1, 16):
|
||||||
|
clip_path = work_dir / f"clip-{index}.mp4"
|
||||||
|
clip_path.write_text("fake", encoding="utf-8")
|
||||||
|
clips.append(
|
||||||
|
Artifact(
|
||||||
|
id=None,
|
||||||
|
task_id=task.id,
|
||||||
|
artifact_type="clip_video",
|
||||||
|
path=str(clip_path),
|
||||||
|
metadata_json="{}",
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
|
||||||
|
record = provider.publish(
|
||||||
|
task,
|
||||||
|
clips,
|
||||||
|
{
|
||||||
|
"session_dir": str(root),
|
||||||
|
"upload_config_file": str(upload_config),
|
||||||
|
"biliup_path": "runtime/biliup",
|
||||||
|
"cookie_file": "runtime/cookies.json",
|
||||||
|
"retry_count": 2,
|
||||||
|
"command_timeout_seconds": 123,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(record.bvid, "BV1RESUME1234")
|
||||||
|
self.assertEqual((work_dir / "bvid.txt").read_text(encoding="utf-8"), "BV1RESUME1234")
|
||||||
|
self.assertEqual(len(adapter.run_calls), 1)
|
||||||
|
self.assertIn("append", adapter.run_calls[0]["cmd"])
|
||||||
|
self.assertIn("BV1RESUME1234", adapter.run_calls[0]["cmd"])
|
||||||
|
|
||||||
|
def test_publish_renumbers_clip_filenames_across_aggregated_sessions(self) -> None:
|
||||||
|
adapter = _FakeBiliupAdapter()
|
||||||
|
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
work_dir = root / "task-1"
|
||||||
|
second_dir = root / "task-2"
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
second_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
task = Task(
|
||||||
|
id="task-1",
|
||||||
|
source_type="local_file",
|
||||||
|
source_path=str(work_dir / "source.mp4"),
|
||||||
|
title="task-1",
|
||||||
|
status="split_done",
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
updated_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
(work_dir / "songs.txt").write_text("00:00:00 Test Song - Tester\n", encoding="utf-8")
|
||||||
|
(work_dir / "songs.json").write_text(json.dumps({"songs": [{"title": "Test Song"}]}), encoding="utf-8")
|
||||||
|
upload_config = root / "upload_config.json"
|
||||||
|
upload_config.write_text("{}", encoding="utf-8")
|
||||||
|
clips = []
|
||||||
|
for index in range(1, 11):
|
||||||
|
clip_path = work_dir / f"{index:02d}_first-{index}.mp4"
|
||||||
|
clip_path.write_text("fake", encoding="utf-8")
|
||||||
|
clips.append(Artifact(None, task.id, "clip_video", str(clip_path), "{}", utc_now_iso()))
|
||||||
|
for index in range(1, 8):
|
||||||
|
clip_path = second_dir / f"{index:02d}_second-{index}.mp4"
|
||||||
|
clip_path.write_text("fake", encoding="utf-8")
|
||||||
|
clips.append(Artifact(None, "task-2", "clip_video", str(clip_path), "{}", utc_now_iso()))
|
||||||
|
|
||||||
|
with patch("biliup_next.modules.publish.providers.biliup_cli.time.sleep", return_value=None):
|
||||||
|
provider.publish(
|
||||||
|
task,
|
||||||
|
clips,
|
||||||
|
{
|
||||||
|
"session_dir": str(root),
|
||||||
|
"upload_config_file": str(upload_config),
|
||||||
|
"biliup_path": "runtime/biliup",
|
||||||
|
"cookie_file": "runtime/cookies.json",
|
||||||
|
"retry_count": 1,
|
||||||
|
"command_timeout_seconds": 123,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
all_uploaded = [part for call in adapter.run_calls for part in call["cmd"] if str(part).endswith(".mp4")]
|
||||||
|
self.assertTrue(all_uploaded[0].endswith("01_first-1.mp4"))
|
||||||
|
self.assertTrue(all_uploaded[9].endswith("10_first-10.mp4"))
|
||||||
|
self.assertTrue(all_uploaded[10].endswith("11_second-1.mp4"))
|
||||||
|
self.assertTrue(all_uploaded[16].endswith("17_second-7.mp4"))
|
||||||
|
|
||||||
def test_publish_creates_progress_from_existing_bvid_for_append_resume(self) -> None:
|
def test_publish_creates_progress_from_existing_bvid_for_append_resume(self) -> None:
|
||||||
adapter = _FakeBiliupAdapter()
|
adapter = _FakeBiliupAdapter()
|
||||||
provider = BiliupCliPublishProvider(adapter=adapter)
|
provider = BiliupCliPublishProvider(adapter=adapter)
|
||||||
|
|||||||
85
tests/test_collection_service.py
Normal file
85
tests/test_collection_service.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from biliup_next.core.models import Task, TaskStep, utc_now_iso
|
||||||
|
from biliup_next.modules.collection.service import CollectionService
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRegistry:
|
||||||
|
def __init__(self, provider) -> None: # type: ignore[no-untyped-def]
|
||||||
|
self.provider = provider
|
||||||
|
|
||||||
|
def get(self, provider_type: str, provider_id: str): # type: ignore[no-untyped-def]
|
||||||
|
return self.provider
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeProvider:
|
||||||
|
def sync(self, task, target: str, settings: dict[str, object]) -> dict[str, object]: # type: ignore[no-untyped-def]
|
||||||
|
return {"status": "skipped", "target": target}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRepo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
now = utc_now_iso()
|
||||||
|
self.task = Task("task-1", "local_file", "/tmp/source.mp4", "task-1", "running", now, now)
|
||||||
|
self.steps = {
|
||||||
|
"collection_a": TaskStep(None, "task-1", "collection_a", "pending", None, None, 0, None, None),
|
||||||
|
"collection_b": TaskStep(None, "task-1", "collection_b", "pending", None, None, 0, None, None),
|
||||||
|
}
|
||||||
|
self.task_status_updates: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
def get_task(self, task_id: str): # type: ignore[no-untyped-def]
|
||||||
|
return self.task if task_id == self.task.id else None
|
||||||
|
|
||||||
|
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # type: ignore[no-untyped-def]
|
||||||
|
step = self.steps[step_name]
|
||||||
|
self.steps[step_name] = TaskStep(
|
||||||
|
step.id,
|
||||||
|
step.task_id,
|
||||||
|
step.step_name,
|
||||||
|
status,
|
||||||
|
kwargs.get("error_code", step.error_code),
|
||||||
|
kwargs.get("error_message", step.error_message),
|
||||||
|
kwargs.get("retry_count", step.retry_count),
|
||||||
|
kwargs.get("started_at", step.started_at),
|
||||||
|
kwargs.get("finished_at", step.finished_at),
|
||||||
|
)
|
||||||
|
|
||||||
|
def list_steps(self, task_id: str) -> list[TaskStep]:
|
||||||
|
return list(self.steps.values())
|
||||||
|
|
||||||
|
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||||
|
self.task_status_updates.append((task_id, status))
|
||||||
|
|
||||||
|
|
||||||
|
class CollectionServiceTests(unittest.TestCase):
|
||||||
|
def test_collection_a_restores_commented_status_so_collection_b_can_run(self) -> None:
|
||||||
|
repo = _FakeRepo()
|
||||||
|
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
|
||||||
|
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {}) # type: ignore[assignment]
|
||||||
|
|
||||||
|
result = service.run("task-1", "a", {"provider": "fake"})
|
||||||
|
|
||||||
|
self.assertEqual(result["status"], "skipped")
|
||||||
|
self.assertEqual(repo.steps["collection_a"].status, "succeeded")
|
||||||
|
self.assertEqual(repo.steps["collection_b"].status, "pending")
|
||||||
|
self.assertEqual(repo.task_status_updates[-1], ("task-1", "commented"))
|
||||||
|
|
||||||
|
def test_collection_b_marks_collection_synced_when_both_steps_succeeded(self) -> None:
|
||||||
|
repo = _FakeRepo()
|
||||||
|
repo.steps["collection_a"] = TaskStep(None, "task-1", "collection_a", "succeeded", None, None, 0, None, utc_now_iso())
|
||||||
|
service = CollectionService(_FakeRegistry(_FakeProvider()), repo) # type: ignore[arg-type]
|
||||||
|
service.cleanup = SimpleNamespace(cleanup_task_outputs=lambda task_id, settings: {"deleted": []}) # type: ignore[assignment]
|
||||||
|
|
||||||
|
result = service.run("task-1", "b", {"provider": "fake"})
|
||||||
|
|
||||||
|
self.assertEqual(result["status"], "skipped")
|
||||||
|
self.assertEqual(repo.steps["collection_b"].status, "succeeded")
|
||||||
|
self.assertEqual(repo.task_status_updates[-1], ("task-1", "collection_synced"))
|
||||||
|
self.assertEqual(result["cleanup"], {"deleted": []})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
277
tests/test_groq_transcribe_provider.py
Normal file
277
tests/test_groq_transcribe_provider.py
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from biliup_next.core.errors import ModuleError
|
||||||
|
from biliup_next.core.models import Artifact, Task
|
||||||
|
from biliup_next.modules.transcribe.providers.groq import GroqTranscribeProvider
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResponse:
|
||||||
|
def __init__(self, segments):
|
||||||
|
self.segments = segments
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeTranscriptions:
|
||||||
|
def __init__(self, outcomes: list[object]) -> None:
|
||||||
|
self.outcomes = list(outcomes)
|
||||||
|
self.calls: list[dict[str, object]] = []
|
||||||
|
|
||||||
|
def create(self, **kwargs): # noqa: ANN003
|
||||||
|
self.calls.append(kwargs)
|
||||||
|
outcome = self.outcomes.pop(0)
|
||||||
|
if isinstance(outcome, Exception):
|
||||||
|
raise outcome
|
||||||
|
return outcome
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeGroqClient:
|
||||||
|
def __init__(self, outcomes: list[object]) -> None:
|
||||||
|
self.audio = SimpleNamespace(transcriptions=_FakeTranscriptions(outcomes))
|
||||||
|
|
||||||
|
|
||||||
|
class GroqTranscribeProviderTests(unittest.TestCase):
|
||||||
|
def test_transcribe_retries_timeout_and_writes_srt_atomically(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
source_path = work_dir / "input.mp4"
|
||||||
|
source_path.write_bytes(b"video")
|
||||||
|
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||||
|
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
segment.write_bytes(b"audio")
|
||||||
|
|
||||||
|
client = _FakeGroqClient(
|
||||||
|
[
|
||||||
|
RuntimeError("Request timed out."),
|
||||||
|
_FakeResponse([{"start": 0, "end": 1.2, "text": "hello"}]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
"groq_api_key": "gsk_test",
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 23,
|
||||||
|
"request_timeout_seconds": 33,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 0,
|
||||||
|
"serialize_groq_requests": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("groq.Groq", return_value=client) as groq_ctor:
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
artifact = provider.transcribe(task, source_video, settings)
|
||||||
|
|
||||||
|
self.assertEqual(Path(artifact.path).read_text(encoding="utf-8"), "1\n00:00:00,000 --> 00:00:01,199\nhello\n\n")
|
||||||
|
self.assertFalse((work_dir / ".demo.srt.tmp").exists())
|
||||||
|
self.assertEqual(len(client.audio.transcriptions.calls), 2)
|
||||||
|
self.assertEqual(client.audio.transcriptions.calls[0]["timeout"], 33)
|
||||||
|
self.assertTrue((work_dir / "transcribe_segments" / "part_000.json").exists())
|
||||||
|
groq_ctor.assert_called_once_with(api_key="gsk_test", timeout=33, max_retries=0)
|
||||||
|
|
||||||
|
def test_transcribe_reuses_completed_segment_checkpoints(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
source_path = work_dir / "input.mp4"
|
||||||
|
source_path.write_bytes(b"video")
|
||||||
|
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||||
|
segments = [work_dir / "temp_audio" / "part_000.mp3", work_dir / "temp_audio" / "part_001.mp3"]
|
||||||
|
checkpoint_dir = work_dir / "transcribe_segments"
|
||||||
|
checkpoint_dir.mkdir()
|
||||||
|
(checkpoint_dir / "part_000.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"provider": "groq",
|
||||||
|
"model": "whisper-large-v3-turbo",
|
||||||
|
"language": "zh",
|
||||||
|
"audio_file": "part_000.mp3",
|
||||||
|
"segment_duration_seconds": 75,
|
||||||
|
"segments": [{"start": 0, "end": 1, "text": "first"}],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
for segment in segments:
|
||||||
|
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
segment.write_bytes(b"audio")
|
||||||
|
|
||||||
|
client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.5, "text": "second"}])])
|
||||||
|
settings = {
|
||||||
|
"groq_api_key": "gsk_test",
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 23,
|
||||||
|
"request_timeout_seconds": 33,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 0,
|
||||||
|
"serialize_groq_requests": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("groq.Groq", return_value=client):
|
||||||
|
with patch.object(provider, "_initial_segment_duration", return_value=75):
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
artifact = provider.transcribe(task, source_video, settings)
|
||||||
|
|
||||||
|
srt = Path(artifact.path).read_text(encoding="utf-8")
|
||||||
|
self.assertIn("00:00:00,000 --> 00:00:01,000\nfirst", srt)
|
||||||
|
self.assertIn("00:01:15,000 --> 00:01:16,500\nsecond", srt)
|
||||||
|
self.assertEqual(len(client.audio.transcriptions.calls), 1)
|
||||||
|
self.assertEqual(client.audio.transcriptions.calls[0]["file"][0], "part_001.mp3")
|
||||||
|
self.assertTrue((checkpoint_dir / "part_001.json").exists())
|
||||||
|
|
||||||
|
def test_transcribe_switches_to_next_api_key_on_rate_limit(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
source_path = work_dir / "input.mp4"
|
||||||
|
source_path.write_bytes(b"video")
|
||||||
|
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||||
|
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
segment.write_bytes(b"audio")
|
||||||
|
|
||||||
|
limited_client = _FakeGroqClient([RuntimeError("Error code: 429 rate_limit")])
|
||||||
|
fallback_client = _FakeGroqClient([_FakeResponse([{"start": 0, "end": 1.2, "text": "fallback"}])])
|
||||||
|
settings = {
|
||||||
|
"groq_api_key": "",
|
||||||
|
"groq_api_keys": ["gsk_first", "gsk_second"],
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 23,
|
||||||
|
"request_timeout_seconds": 20,
|
||||||
|
"request_max_retries": 0,
|
||||||
|
"request_retry_backoff_seconds": 0,
|
||||||
|
"serialize_groq_requests": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("groq.Groq", side_effect=[limited_client, fallback_client]) as groq_ctor:
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
artifact = provider.transcribe(task, source_video, settings)
|
||||||
|
|
||||||
|
self.assertIn("fallback", Path(artifact.path).read_text(encoding="utf-8"))
|
||||||
|
self.assertEqual(len(limited_client.audio.transcriptions.calls), 1)
|
||||||
|
self.assertEqual(len(fallback_client.audio.transcriptions.calls), 1)
|
||||||
|
self.assertEqual([call.kwargs["api_key"] for call in groq_ctor.call_args_list], ["gsk_first", "gsk_second"])
|
||||||
|
|
||||||
|
def test_transcribe_waits_after_all_api_keys_are_rate_limited(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
source_path = work_dir / "input.mp4"
|
||||||
|
source_path.write_bytes(b"video")
|
||||||
|
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||||
|
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
segment.write_bytes(b"audio")
|
||||||
|
|
||||||
|
first_client = _FakeGroqClient([RuntimeError("429 rate_limit"), _FakeResponse([{"start": 0, "end": 1, "text": "retry ok"}])])
|
||||||
|
second_client = _FakeGroqClient([RuntimeError("429 rate_limit")])
|
||||||
|
settings = {
|
||||||
|
"groq_api_key": "",
|
||||||
|
"groq_api_keys": ["gsk_first", "gsk_second"],
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 23,
|
||||||
|
"request_timeout_seconds": 20,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 7,
|
||||||
|
"serialize_groq_requests": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("groq.Groq", side_effect=[first_client, second_client]):
|
||||||
|
with patch("time.sleep") as sleep_mock:
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
artifact = provider.transcribe(task, source_video, settings)
|
||||||
|
|
||||||
|
self.assertIn("retry ok", Path(artifact.path).read_text(encoding="utf-8"))
|
||||||
|
sleep_mock.assert_called_once_with(7)
|
||||||
|
self.assertEqual(len(first_client.audio.transcriptions.calls), 2)
|
||||||
|
self.assertEqual(len(second_client.audio.transcriptions.calls), 1)
|
||||||
|
|
||||||
|
def test_transcribe_raises_after_retry_budget_is_exhausted(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
task = Task("task-1", "local_file", "/tmp/input.mp4", "demo", "created", "2026-01-01T00:00:00+00:00", "2026-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
source_path = work_dir / "input.mp4"
|
||||||
|
source_path.write_bytes(b"video")
|
||||||
|
source_video = Artifact(None, task.id, "source_video", str(source_path), "{}", "2026-01-01T00:00:00+00:00")
|
||||||
|
segment = work_dir / "temp_audio" / "part_000.mp3"
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
segment.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
segment.write_bytes(b"audio")
|
||||||
|
|
||||||
|
client = _FakeGroqClient([RuntimeError("Connection error."), RuntimeError("Connection error.")])
|
||||||
|
settings = {
|
||||||
|
"groq_api_key": "gsk_test",
|
||||||
|
"ffmpeg_bin": "ffmpeg",
|
||||||
|
"max_file_size_mb": 23,
|
||||||
|
"request_timeout_seconds": 20,
|
||||||
|
"request_max_retries": 1,
|
||||||
|
"request_retry_backoff_seconds": 0,
|
||||||
|
"serialize_groq_requests": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("groq.Groq", return_value=client):
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
with self.assertRaises(ModuleError) as exc_info:
|
||||||
|
provider.transcribe(task, source_video, settings)
|
||||||
|
|
||||||
|
self.assertEqual(exc_info.exception.message, "Groq 转录失败: part_000.mp3")
|
||||||
|
|
||||||
|
def test_initial_segment_duration_keeps_safety_margin(self) -> None:
|
||||||
|
self.assertLess(GroqTranscribeProvider._initial_segment_duration(12), 1536)
|
||||||
|
|
||||||
|
def test_extract_audio_segments_retries_when_segment_exceeds_size_limit(self) -> None:
|
||||||
|
provider = GroqTranscribeProvider()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
temp_audio_dir = work_dir / "temp_audio"
|
||||||
|
temp_audio_dir.mkdir()
|
||||||
|
output_pattern = temp_audio_dir / "part_%03d.mp3"
|
||||||
|
durations: list[int] = []
|
||||||
|
|
||||||
|
def fake_extract_audio_segments(**kwargs): # noqa: ANN003
|
||||||
|
durations.append(int(kwargs["segment_duration"]))
|
||||||
|
size = 20 if len(durations) == 1 else 5
|
||||||
|
(temp_audio_dir / "part_000.mp3").write_bytes(b"x" * size)
|
||||||
|
|
||||||
|
with patch.object(provider, "_extract_audio_segments", side_effect=fake_extract_audio_segments):
|
||||||
|
result = provider._extract_audio_segments_with_size_guard(
|
||||||
|
ffmpeg_bin="ffmpeg",
|
||||||
|
source_path=work_dir / "input.mp4",
|
||||||
|
output_pattern=output_pattern,
|
||||||
|
temp_audio_dir=temp_audio_dir,
|
||||||
|
initial_segment_duration=100,
|
||||||
|
max_segment_bytes=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(durations, [100, 75])
|
||||||
|
self.assertEqual(result, 75)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
85
tests/test_ingest_scan_stage.py
Normal file
85
tests/test_ingest_scan_stage.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from biliup_next.core.providers import ProviderManifest
|
||||||
|
from biliup_next.core.registry import Registry
|
||||||
|
from biliup_next.infra.db import Database
|
||||||
|
from biliup_next.infra.task_repository import TaskRepository
|
||||||
|
from biliup_next.modules.ingest.service import IngestService
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeLocalFileProvider:
|
||||||
|
manifest = ProviderManifest(
|
||||||
|
id="local_file",
|
||||||
|
name="Fake Local File Ingest",
|
||||||
|
version="0.1.0",
|
||||||
|
provider_type="ingest_provider",
|
||||||
|
entrypoint="tests.test_ingest_scan_stage:_FakeLocalFileProvider",
|
||||||
|
capabilities=["ingest"],
|
||||||
|
enabled_by_default=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def validate_source(self, source_path: Path, settings: dict[str, object]) -> None:
|
||||||
|
if not source_path.exists() or not source_path.is_file():
|
||||||
|
raise AssertionError(f"unexpected source path: {source_path}")
|
||||||
|
|
||||||
|
|
||||||
|
class IngestScanStageTests(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.tempdir = tempfile.TemporaryDirectory()
|
||||||
|
root = Path(self.tempdir.name)
|
||||||
|
self.stage_dir = root / "stage"
|
||||||
|
self.backup_dir = root / "backup"
|
||||||
|
self.session_dir = root / "session"
|
||||||
|
self.stage_dir.mkdir()
|
||||||
|
self.backup_dir.mkdir()
|
||||||
|
self.session_dir.mkdir()
|
||||||
|
|
||||||
|
db = Database(root / "test.db")
|
||||||
|
db.initialize()
|
||||||
|
repo = TaskRepository(db)
|
||||||
|
registry = Registry()
|
||||||
|
provider = _FakeLocalFileProvider()
|
||||||
|
registry.register("ingest_provider", "local_file", provider, provider.manifest)
|
||||||
|
self.service = IngestService(registry=registry, repo=repo)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self.tempdir.cleanup()
|
||||||
|
|
||||||
|
def test_scan_stage_uses_moved_file_for_reference_timestamp(self) -> None:
|
||||||
|
source_path = self.stage_dir / "王海颖唱歌录播 04月14日 17时49分.mp4"
|
||||||
|
source_path.write_bytes(b"fake-video")
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
"provider": "local_file",
|
||||||
|
"stage_dir": str(self.stage_dir),
|
||||||
|
"backup_dir": str(self.backup_dir),
|
||||||
|
"session_dir": str(self.session_dir),
|
||||||
|
"allowed_extensions": [".mp4"],
|
||||||
|
"ffprobe_bin": "ffprobe",
|
||||||
|
"min_duration_seconds": 0,
|
||||||
|
"stability_wait_seconds": 0,
|
||||||
|
"meta_sidecar_enabled": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.service._probe_duration_seconds = lambda *_args, **_kwargs: 120.0 # type: ignore[method-assign]
|
||||||
|
|
||||||
|
result = self.service.scan_stage(settings)
|
||||||
|
|
||||||
|
self.assertEqual(len(result["accepted"]), 1)
|
||||||
|
accepted = result["accepted"][0]
|
||||||
|
moved_path = Path(str(accepted["source_path"]))
|
||||||
|
self.assertTrue(moved_path.exists())
|
||||||
|
self.assertFalse(source_path.exists())
|
||||||
|
task = self.service.repo.get_task(moved_path.stem)
|
||||||
|
self.assertIsNotNone(task)
|
||||||
|
context = self.service.repo.get_task_context(moved_path.stem)
|
||||||
|
self.assertIsNotNone(context)
|
||||||
|
self.assertIsNotNone(context.segment_started_at)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from biliup_next.core.config import SettingsService
|
from biliup_next.core.config import SettingsService
|
||||||
@ -78,6 +79,146 @@ class SettingsServiceTests(unittest.TestCase):
|
|||||||
self.assertTrue((config_dir / "settings.staged.json").exists())
|
self.assertTrue((config_dir / "settings.staged.json").exists())
|
||||||
self.assertEqual(bundle.settings["paths"]["cookies_file"], str((root / "runtime" / "cookies.json").resolve()))
|
self.assertEqual(bundle.settings["paths"]["cookies_file"], str((root / "runtime" / "cookies.json").resolve()))
|
||||||
|
|
||||||
|
def test_load_applies_environment_overrides_before_path_normalization(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
config_dir = root / "config"
|
||||||
|
config_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(config_dir / "settings.schema.json").write_text(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"groups": {
|
||||||
|
"runtime": {
|
||||||
|
"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}
|
||||||
|
},
|
||||||
|
"paths": {
|
||||||
|
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
|
||||||
|
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
|
||||||
|
"session_dir": {"type": "string", "default": "data/workspace/session"},
|
||||||
|
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
|
||||||
|
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
|
||||||
|
},
|
||||||
|
"ingest": {
|
||||||
|
"ffprobe_bin": {"type": "string", "default": "ffprobe"},
|
||||||
|
"yt_dlp_cmd": {"type": "string", "default": "yt-dlp"},
|
||||||
|
"yt_dlp_format": {"type": "string", "default": ""}
|
||||||
|
},
|
||||||
|
"transcribe": {
|
||||||
|
"groq_api_key": {"type": "string", "default": "", "sensitive": true},
|
||||||
|
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
|
||||||
|
},
|
||||||
|
"split": {
|
||||||
|
"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}
|
||||||
|
},
|
||||||
|
"song_detect": {
|
||||||
|
"codex_cmd": {"type": "string", "default": "codex"},
|
||||||
|
"qwen_cmd": {"type": "string", "default": "qwen"}
|
||||||
|
},
|
||||||
|
"publish": {
|
||||||
|
"biliup_path": {"type": "string", "default": "runtime/biliup"},
|
||||||
|
"cookie_file": {"type": "string", "default": "runtime/cookies.json"}
|
||||||
|
},
|
||||||
|
"collection": {
|
||||||
|
"season_id_a": {"type": "integer", "default": 0},
|
||||||
|
"season_id_b": {"type": "integer", "default": 0}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(config_dir / "settings.standalone.example.json").write_text(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"runtime": {"database_path": "data/workspace/biliup_next.db"},
|
||||||
|
"paths": {
|
||||||
|
"stage_dir": "data/workspace/stage",
|
||||||
|
"backup_dir": "data/workspace/backup",
|
||||||
|
"session_dir": "data/workspace/session",
|
||||||
|
"cookies_file": "runtime/cookies.json",
|
||||||
|
"upload_config_file": "runtime/upload_config.json"
|
||||||
|
},
|
||||||
|
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp", "yt_dlp_format": ""},
|
||||||
|
"transcribe": {"groq_api_key": "", "ffmpeg_bin": "ffmpeg"},
|
||||||
|
"split": {"ffmpeg_bin": "ffmpeg"},
|
||||||
|
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
|
||||||
|
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"},
|
||||||
|
"collection": {"season_id_a": 0, "season_id_b": 0}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.dict(
|
||||||
|
"os.environ",
|
||||||
|
{
|
||||||
|
"GROQ_API_KEY": "gsk_test",
|
||||||
|
"COLLECTION_SEASON_ID_A": "7196643",
|
||||||
|
"BILIUP_NEXT__COLLECTION__SEASON_ID_B": "7196624",
|
||||||
|
"BILIUP_NEXT__PATHS__STAGE_DIR": "data/custom-stage",
|
||||||
|
},
|
||||||
|
clear=True,
|
||||||
|
):
|
||||||
|
bundle = SettingsService(root).load()
|
||||||
|
|
||||||
|
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "gsk_test")
|
||||||
|
self.assertEqual(bundle.settings["collection"]["season_id_a"], 7196643)
|
||||||
|
self.assertEqual(bundle.settings["collection"]["season_id_b"], 7196624)
|
||||||
|
self.assertEqual(bundle.settings["paths"]["stage_dir"], str((root / "data" / "custom-stage").resolve()))
|
||||||
|
|
||||||
|
def test_empty_environment_values_do_not_override_settings(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
config_dir = root / "config"
|
||||||
|
config_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(config_dir / "settings.schema.json").write_text(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"groups": {
|
||||||
|
"runtime": {"database_path": {"type": "string", "default": "data/workspace/biliup_next.db"}},
|
||||||
|
"paths": {
|
||||||
|
"stage_dir": {"type": "string", "default": "data/workspace/stage"},
|
||||||
|
"backup_dir": {"type": "string", "default": "data/workspace/backup"},
|
||||||
|
"session_dir": {"type": "string", "default": "data/workspace/session"},
|
||||||
|
"cookies_file": {"type": "string", "default": "runtime/cookies.json"},
|
||||||
|
"upload_config_file": {"type": "string", "default": "runtime/upload_config.json"}
|
||||||
|
},
|
||||||
|
"ingest": {"ffprobe_bin": {"type": "string", "default": "ffprobe"}, "yt_dlp_cmd": {"type": "string", "default": "yt-dlp"}},
|
||||||
|
"transcribe": {"groq_api_key": {"type": "string", "default": ""}, "ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
|
||||||
|
"split": {"ffmpeg_bin": {"type": "string", "default": "ffmpeg"}},
|
||||||
|
"song_detect": {"codex_cmd": {"type": "string", "default": "codex"}, "qwen_cmd": {"type": "string", "default": "qwen"}},
|
||||||
|
"publish": {"biliup_path": {"type": "string", "default": "runtime/biliup"}, "cookie_file": {"type": "string", "default": "runtime/cookies.json"}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(config_dir / "settings.standalone.example.json").write_text(
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"runtime": {"database_path": "data/workspace/biliup_next.db"},
|
||||||
|
"paths": {
|
||||||
|
"stage_dir": "data/workspace/stage",
|
||||||
|
"backup_dir": "data/workspace/backup",
|
||||||
|
"session_dir": "data/workspace/session",
|
||||||
|
"cookies_file": "runtime/cookies.json",
|
||||||
|
"upload_config_file": "runtime/upload_config.json"
|
||||||
|
},
|
||||||
|
"ingest": {"ffprobe_bin": "ffprobe", "yt_dlp_cmd": "yt-dlp"},
|
||||||
|
"transcribe": {"groq_api_key": "from-file", "ffmpeg_bin": "ffmpeg"},
|
||||||
|
"split": {"ffmpeg_bin": "ffmpeg"},
|
||||||
|
"song_detect": {"codex_cmd": "codex", "qwen_cmd": "qwen"},
|
||||||
|
"publish": {"biliup_path": "runtime/biliup", "cookie_file": "runtime/cookies.json"}
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.dict("os.environ", {"GROQ_API_KEY": ""}, clear=True):
|
||||||
|
bundle = SettingsService(root).load()
|
||||||
|
|
||||||
|
self.assertEqual(bundle.settings["transcribe"]["groq_api_key"], "from-file")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@ -1,11 +1,15 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
from biliup_next.core.models import Artifact, Task, utc_now_iso
|
from biliup_next.core.models import Artifact, Task, utc_now_iso
|
||||||
|
from biliup_next.infra.adapters.codex_cli import CodexCliAdapter
|
||||||
|
from biliup_next.modules.song_detect.providers.codex import CodexSongDetector
|
||||||
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
|
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
|
||||||
|
|
||||||
|
|
||||||
@ -38,6 +42,33 @@ class FakeQwenCliAdapter:
|
|||||||
return type("Result", (), {"returncode": self.returncode, "stdout": "ok", "stderr": ""})()
|
return type("Result", (), {"returncode": self.returncode, "stdout": "ok", "stderr": ""})()
|
||||||
|
|
||||||
|
|
||||||
|
class FakeCodexCliAdapter:
|
||||||
|
def __init__(self, returncode: int = 0) -> None:
|
||||||
|
self.returncode = returncode
|
||||||
|
|
||||||
|
def run_song_detect(self, *, codex_cmd: str, work_dir: Path, prompt: str): # noqa: ANN001
|
||||||
|
songs_json_path = work_dir / "songs.json"
|
||||||
|
songs_json_path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"songs": [
|
||||||
|
{
|
||||||
|
"start": "00:01:23,000",
|
||||||
|
"end": "00:03:45,000",
|
||||||
|
"title": "测试歌曲",
|
||||||
|
"artist": "测试歌手",
|
||||||
|
"confidence": 0.93,
|
||||||
|
"evidence": "歌词命中",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
return type("Result", (), {"returncode": self.returncode, "stdout": "codex stdout", "stderr": "codex stderr"})()
|
||||||
|
|
||||||
|
|
||||||
class SongDetectProviderTests(unittest.TestCase):
|
class SongDetectProviderTests(unittest.TestCase):
|
||||||
def test_qwen_cli_provider_generates_json_and_txt_artifacts(self) -> None:
|
def test_qwen_cli_provider_generates_json_and_txt_artifacts(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
@ -72,6 +103,67 @@ class SongDetectProviderTests(unittest.TestCase):
|
|||||||
self.assertTrue(Path(songs_txt.path).exists())
|
self.assertTrue(Path(songs_txt.path).exists())
|
||||||
self.assertIn("测试歌曲", Path(songs_txt.path).read_text(encoding="utf-8"))
|
self.assertIn("测试歌曲", Path(songs_txt.path).read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def test_codex_provider_writes_execution_output_to_session_log(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
work_dir = Path(tmpdir)
|
||||||
|
subtitle_path = work_dir / "subtitle.srt"
|
||||||
|
subtitle_path.write_text("1\n00:00:00,000 --> 00:00:03,000\n测试字幕\n", encoding="utf-8")
|
||||||
|
provider = CodexSongDetector(adapter=FakeCodexCliAdapter())
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
id="task-1",
|
||||||
|
source_type="local_file",
|
||||||
|
source_path=str(work_dir / "video.mp4"),
|
||||||
|
title="task-1",
|
||||||
|
status="transcribed",
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
updated_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
subtitle = Artifact(
|
||||||
|
id=None,
|
||||||
|
task_id=task.id,
|
||||||
|
artifact_type="subtitle_srt",
|
||||||
|
path=str(subtitle_path),
|
||||||
|
metadata_json=None,
|
||||||
|
created_at=utc_now_iso(),
|
||||||
|
)
|
||||||
|
|
||||||
|
songs_json, songs_txt = provider.detect(task, subtitle, {"codex_cmd": "codex"})
|
||||||
|
|
||||||
|
json_metadata = json.loads(songs_json.metadata_json)
|
||||||
|
txt_metadata = json.loads(songs_txt.metadata_json)
|
||||||
|
self.assertEqual(json_metadata["provider"], "codex")
|
||||||
|
self.assertEqual(txt_metadata["provider"], "codex")
|
||||||
|
self.assertNotIn("execution", json_metadata)
|
||||||
|
codex_log = work_dir / "codex.log"
|
||||||
|
self.assertTrue(codex_log.exists())
|
||||||
|
log_text = codex_log.read_text(encoding="utf-8")
|
||||||
|
self.assertIn("returncode: 0", log_text)
|
||||||
|
self.assertIn("codex stdout", log_text)
|
||||||
|
self.assertIn("codex stderr", log_text)
|
||||||
|
|
||||||
|
def test_codex_cli_adapter_disables_inner_sandbox_and_normalizes_proxy_env(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_run(cmd, **kwargs): # noqa: ANN001
|
||||||
|
calls.append((cmd, kwargs))
|
||||||
|
return type("Result", (), {"returncode": 0, "stdout": "", "stderr": ""})()
|
||||||
|
|
||||||
|
with patch.dict(os.environ, {"HTTPS_PROXY": "192.168.1.100:7897"}, clear=True):
|
||||||
|
with patch("subprocess.run", side_effect=fake_run):
|
||||||
|
CodexCliAdapter().run_song_detect(
|
||||||
|
codex_cmd="codex",
|
||||||
|
work_dir=Path(tmpdir),
|
||||||
|
prompt="detect songs",
|
||||||
|
)
|
||||||
|
|
||||||
|
cmd, kwargs = calls[0]
|
||||||
|
self.assertIn("--dangerously-bypass-approvals-and-sandbox", cmd)
|
||||||
|
self.assertNotIn("--full-auto", cmd)
|
||||||
|
self.assertNotIn("workspace-write", cmd)
|
||||||
|
self.assertEqual(kwargs["env"]["HTTPS_PROXY"], "http://192.168.1.100:7897")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
103
tests/test_song_detect_retry_policy.py
Normal file
103
tests/test_song_detect_retry_policy.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from biliup_next.app.retry_meta import retry_meta_for_step
|
||||||
|
from biliup_next.app.task_engine import next_runnable_step
|
||||||
|
from biliup_next.app.task_policies import resolve_failure
|
||||||
|
from biliup_next.core.errors import ModuleError
|
||||||
|
from biliup_next.core.models import TaskStep
|
||||||
|
from biliup_next.modules.song_detect.providers.qwen_cli import QwenCliSongDetector
|
||||||
|
|
||||||
|
|
||||||
|
class _Repo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.steps = [TaskStep(None, "task-1", "song_detect", "running", None, None, 0, None, None)]
|
||||||
|
self.step_updates: list[tuple] = []
|
||||||
|
self.task_updates: list[tuple] = []
|
||||||
|
|
||||||
|
def list_steps(self, task_id: str): # noqa: ANN001
|
||||||
|
return list(self.steps)
|
||||||
|
|
||||||
|
def get_task(self, task_id: str): # noqa: ANN001
|
||||||
|
return SimpleNamespace(id=task_id, status="running")
|
||||||
|
|
||||||
|
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
|
||||||
|
self.step_updates.append((task_id, step_name, status, kwargs))
|
||||||
|
self.steps = [
|
||||||
|
TaskStep(
|
||||||
|
None,
|
||||||
|
task_id,
|
||||||
|
step_name,
|
||||||
|
status,
|
||||||
|
kwargs.get("error_code"),
|
||||||
|
kwargs.get("error_message"),
|
||||||
|
kwargs.get("retry_count", 0),
|
||||||
|
kwargs.get("started_at"),
|
||||||
|
kwargs.get("finished_at"),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||||
|
self.task_updates.append((task_id, status, updated_at))
|
||||||
|
|
||||||
|
|
||||||
|
class SongDetectRetryPolicyTests(unittest.TestCase):
|
||||||
|
def test_retry_meta_reports_wait_window_for_song_detect(self) -> None:
|
||||||
|
step = TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
payload = retry_meta_for_step(step, {"song_detect": {"retry_schedule_minutes": [10]}})
|
||||||
|
|
||||||
|
self.assertIsNotNone(payload)
|
||||||
|
self.assertFalse(payload["retry_due"])
|
||||||
|
self.assertEqual(payload["retry_wait_seconds"], 600)
|
||||||
|
|
||||||
|
def test_next_runnable_step_waits_for_retryable_song_detect(self) -> None:
|
||||||
|
task = SimpleNamespace(id="task-1", status="failed_retryable")
|
||||||
|
steps = {
|
||||||
|
"song_detect": TaskStep(None, "task-1", "song_detect", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
|
||||||
|
}
|
||||||
|
state = {
|
||||||
|
"settings": {
|
||||||
|
"transcribe": {},
|
||||||
|
"song_detect": {"retry_schedule_minutes": [10]},
|
||||||
|
"comment": {"enabled": True},
|
||||||
|
"collection": {"enabled": True},
|
||||||
|
"paths": {},
|
||||||
|
"publish": {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
step_name, waiting_payload = next_runnable_step(task, steps, state)
|
||||||
|
|
||||||
|
self.assertIsNone(step_name)
|
||||||
|
self.assertIsNotNone(waiting_payload)
|
||||||
|
self.assertEqual(waiting_payload["step"], "song_detect")
|
||||||
|
|
||||||
|
def test_resolve_failure_adds_song_detect_retry_delay(self) -> None:
|
||||||
|
repo = _Repo()
|
||||||
|
task = SimpleNamespace(id="task-1", status="running")
|
||||||
|
state = {
|
||||||
|
"settings": {
|
||||||
|
"transcribe": {},
|
||||||
|
"song_detect": {"retry_schedule_minutes": [5, 10]},
|
||||||
|
"publish": {},
|
||||||
|
"comment": {},
|
||||||
|
"paths": {},
|
||||||
|
"collection": {"enabled": True},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = resolve_failure(task, repo, state, ModuleError(code="SONG_DETECT_FAILED", message="boom", retryable=True))
|
||||||
|
|
||||||
|
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
|
||||||
|
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
|
||||||
|
|
||||||
|
def test_qwen_auth_errors_are_not_retryable(self) -> None:
|
||||||
|
self.assertTrue(QwenCliSongDetector._is_auth_error("[API Error: 401 invalid access token or token expired]"))
|
||||||
|
self.assertFalse(QwenCliSongDetector._is_auth_error("temporary network failure"))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
@ -51,6 +51,7 @@ class TaskEngineTests(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
state = {
|
state = {
|
||||||
"settings": {
|
"settings": {
|
||||||
|
"transcribe": {},
|
||||||
"comment": {"enabled": True},
|
"comment": {"enabled": True},
|
||||||
"collection": {"enabled": True},
|
"collection": {"enabled": True},
|
||||||
"paths": {},
|
"paths": {},
|
||||||
|
|||||||
84
tests/test_transcribe_retry_policy.py
Normal file
84
tests/test_transcribe_retry_policy.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from biliup_next.app.retry_meta import retry_meta_for_step
|
||||||
|
from biliup_next.app.task_engine import next_runnable_step
|
||||||
|
from biliup_next.app.task_policies import resolve_failure
|
||||||
|
from biliup_next.core.errors import ModuleError
|
||||||
|
from biliup_next.core.models import TaskStep
|
||||||
|
|
||||||
|
|
||||||
|
class _Repo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.steps = [TaskStep(None, "task-1", "transcribe", "running", None, None, 0, None, None)]
|
||||||
|
self.step_updates: list[tuple] = []
|
||||||
|
self.task_updates: list[tuple] = []
|
||||||
|
|
||||||
|
def list_steps(self, task_id: str): # noqa: ANN001
|
||||||
|
return list(self.steps)
|
||||||
|
|
||||||
|
def get_task(self, task_id: str): # noqa: ANN001
|
||||||
|
return SimpleNamespace(id=task_id, status="running")
|
||||||
|
|
||||||
|
def update_step_status(self, task_id: str, step_name: str, status: str, **kwargs) -> None: # noqa: ANN001
|
||||||
|
self.step_updates.append((task_id, step_name, status, kwargs))
|
||||||
|
self.steps = [TaskStep(None, task_id, step_name, status, kwargs.get("error_code"), kwargs.get("error_message"), kwargs.get("retry_count", 0), kwargs.get("started_at"), kwargs.get("finished_at"))]
|
||||||
|
|
||||||
|
def update_task_status(self, task_id: str, status: str, updated_at: str) -> None:
|
||||||
|
self.task_updates.append((task_id, status, updated_at))
|
||||||
|
|
||||||
|
|
||||||
|
class TranscribeRetryPolicyTests(unittest.TestCase):
|
||||||
|
def test_retry_meta_reports_wait_window_for_transcribe(self) -> None:
|
||||||
|
step = TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00")
|
||||||
|
|
||||||
|
payload = retry_meta_for_step(step, {"transcribe": {"retry_schedule_minutes": [10]}})
|
||||||
|
|
||||||
|
self.assertIsNotNone(payload)
|
||||||
|
self.assertFalse(payload["retry_due"])
|
||||||
|
self.assertEqual(payload["retry_wait_seconds"], 600)
|
||||||
|
|
||||||
|
def test_next_runnable_step_waits_for_retryable_transcribe(self) -> None:
|
||||||
|
task = SimpleNamespace(id="task-1", status="failed_retryable")
|
||||||
|
steps = {
|
||||||
|
"transcribe": TaskStep(None, "task-1", "transcribe", "failed_retryable", "ERR", "boom", 1, None, "2099-01-01T00:00:00+00:00"),
|
||||||
|
}
|
||||||
|
state = {
|
||||||
|
"settings": {
|
||||||
|
"transcribe": {"retry_schedule_minutes": [10]},
|
||||||
|
"comment": {"enabled": True},
|
||||||
|
"collection": {"enabled": True},
|
||||||
|
"paths": {},
|
||||||
|
"publish": {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
step_name, waiting_payload = next_runnable_step(task, steps, state)
|
||||||
|
|
||||||
|
self.assertIsNone(step_name)
|
||||||
|
self.assertIsNotNone(waiting_payload)
|
||||||
|
self.assertEqual(waiting_payload["step"], "transcribe")
|
||||||
|
|
||||||
|
def test_resolve_failure_adds_transcribe_retry_delay(self) -> None:
|
||||||
|
repo = _Repo()
|
||||||
|
task = SimpleNamespace(id="task-1", status="running")
|
||||||
|
state = {
|
||||||
|
"settings": {
|
||||||
|
"transcribe": {"retry_schedule_minutes": [5, 10]},
|
||||||
|
"publish": {},
|
||||||
|
"comment": {},
|
||||||
|
"paths": {},
|
||||||
|
"collection": {"enabled": True},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = resolve_failure(task, repo, state, ModuleError(code="GROQ_TRANSCRIBE_FAILED", message="boom", retryable=True))
|
||||||
|
|
||||||
|
self.assertEqual(result["payload"]["retry_status"], "failed_retryable")
|
||||||
|
self.assertEqual(result["payload"]["next_retry_delay_seconds"], 300)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
170
tests/test_video_links.py
Normal file
170
tests/test_video_links.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from biliup_next.infra.adapters.full_video_locator import fetch_biliup_list
|
||||||
|
from biliup_next.infra.video_links import link_context_for_task
|
||||||
|
|
||||||
|
|
||||||
|
class VideoLinksTests(unittest.TestCase):
|
||||||
|
def test_fetch_biliup_list_keeps_pubing_videos(self) -> None:
|
||||||
|
output = (
|
||||||
|
"2026-04-22 15:56:43 INFO biliup_cli::uploader: user: test\n"
|
||||||
|
"BVREVIEW\t王海颖唱歌录播 04月22日 15时56分\t审核中\n"
|
||||||
|
"BVPUB\t王海颖唱歌录播 04月20日 22时08分\t开放浏览\n"
|
||||||
|
"BVPRIVATE\t私密视频\t仅自己可见\n"
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"biliup_next.infra.adapters.full_video_locator.subprocess.run",
|
||||||
|
return_value=subprocess.CompletedProcess(["biliup"], 0, stdout=output, stderr=""),
|
||||||
|
):
|
||||||
|
videos = fetch_biliup_list({"biliup_path": "biliup", "cookie_file": "cookies.json"}, max_pages=1)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
videos,
|
||||||
|
[
|
||||||
|
{"bvid": "BVREVIEW", "title": "王海颖唱歌录播 04月22日 15时56分"},
|
||||||
|
{"bvid": "BVPUB", "title": "王海颖唱歌录播 04月20日 22时08分"},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_previous_live_falls_back_to_biliup_list(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
source_path = Path(tmpdir) / "source.mp4"
|
||||||
|
source_path.write_bytes(b"")
|
||||||
|
task = SimpleNamespace(
|
||||||
|
id="task-current",
|
||||||
|
title="王海颖唱歌录播 04月19日 22时10分",
|
||||||
|
source_path=str(source_path),
|
||||||
|
)
|
||||||
|
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||||
|
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||||
|
return_value=[
|
||||||
|
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月18日 22时06分 共10首歌"},
|
||||||
|
{"bvid": "BVNEWER", "title": "王海颖唱歌录播 04月20日 22时00分"},
|
||||||
|
{"bvid": "BVPREV", "title": "王海颖唱歌录播 04月18日 22时06分"},
|
||||||
|
{"bvid": "BVOLDER", "title": "王海颖唱歌录播 04月17日 22时00分"},
|
||||||
|
],
|
||||||
|
):
|
||||||
|
context = link_context_for_task(task, repo, settings)
|
||||||
|
|
||||||
|
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
|
||||||
|
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVPREV")
|
||||||
|
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
|
||||||
|
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
|
||||||
|
|
||||||
|
def test_previous_live_merges_repo_and_biliup_list_links(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
current_path = root / "current" / "source.mp4"
|
||||||
|
previous_path = root / "previous" / "source.mp4"
|
||||||
|
current_path.parent.mkdir()
|
||||||
|
previous_path.parent.mkdir()
|
||||||
|
current_path.write_bytes(b"")
|
||||||
|
previous_path.write_bytes(b"")
|
||||||
|
(previous_path.parent / "full_video_bvid.txt").write_text("BVLOCALFULL", encoding="utf-8")
|
||||||
|
|
||||||
|
task = SimpleNamespace(
|
||||||
|
id="task-current",
|
||||||
|
title="王海颖唱歌录播 04月19日 22时10分",
|
||||||
|
source_path=str(current_path),
|
||||||
|
)
|
||||||
|
previous_task = SimpleNamespace(
|
||||||
|
id="task-previous",
|
||||||
|
title="王海颖唱歌录播 04月18日 22时06分",
|
||||||
|
source_path=str(previous_path),
|
||||||
|
)
|
||||||
|
current_context = SimpleNamespace(
|
||||||
|
task_id=task.id,
|
||||||
|
streamer="王海颖",
|
||||||
|
session_key="王海颖-0419",
|
||||||
|
segment_started_at="2026-04-19T22:10:00",
|
||||||
|
)
|
||||||
|
previous_context = SimpleNamespace(
|
||||||
|
task_id=previous_task.id,
|
||||||
|
streamer="王海颖",
|
||||||
|
session_key="王海颖-0418",
|
||||||
|
segment_started_at="2026-04-18T22:06:00",
|
||||||
|
full_video_bvid="BVLOCALFULL",
|
||||||
|
)
|
||||||
|
tasks = {task.id: task, previous_task.id: previous_task}
|
||||||
|
contexts = {task.id: current_context, previous_task.id: previous_context}
|
||||||
|
repo = SimpleNamespace(
|
||||||
|
get_task_context=lambda task_id: contexts.get(task_id),
|
||||||
|
get_task=lambda task_id: tasks.get(task_id),
|
||||||
|
find_recent_task_contexts=lambda streamer, limit=50: [current_context, previous_context],
|
||||||
|
)
|
||||||
|
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||||
|
return_value=[
|
||||||
|
{"bvid": "BVPURE", "title": "【王海颖(歌曲纯享版)】04月18日 22时06分 共18首歌"},
|
||||||
|
],
|
||||||
|
):
|
||||||
|
context = link_context_for_task(task, repo, settings)
|
||||||
|
|
||||||
|
self.assertEqual(context["previous_full_video_bvid"], "BVLOCALFULL")
|
||||||
|
self.assertEqual(context["previous_full_video_link"], "https://www.bilibili.com/video/BVLOCALFULL")
|
||||||
|
self.assertEqual(context["previous_pure_video_bvid"], "BVPURE")
|
||||||
|
self.assertEqual(context["previous_pure_video_link"], "https://www.bilibili.com/video/BVPURE")
|
||||||
|
|
||||||
|
def test_previous_live_biliup_list_handles_year_boundary(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
source_path = Path(tmpdir) / "source.mp4"
|
||||||
|
source_path.write_bytes(b"")
|
||||||
|
task = SimpleNamespace(
|
||||||
|
id="task-current",
|
||||||
|
title="王海颖唱歌录播 01月01日 22时10分",
|
||||||
|
source_path=str(source_path),
|
||||||
|
)
|
||||||
|
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||||
|
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"biliup_next.infra.video_links.fetch_biliup_list",
|
||||||
|
return_value=[
|
||||||
|
{"bvid": "BVPREV", "title": "王海颖唱歌录播 12月31日 22时06分"},
|
||||||
|
],
|
||||||
|
):
|
||||||
|
context = link_context_for_task(task, repo, settings)
|
||||||
|
|
||||||
|
self.assertEqual(context["previous_full_video_bvid"], "BVPREV")
|
||||||
|
|
||||||
|
def test_current_full_video_falls_back_to_biliup_list(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
source_path = Path(tmpdir) / "source.mp4"
|
||||||
|
source_path.write_bytes(b"")
|
||||||
|
task = SimpleNamespace(
|
||||||
|
id="task-current",
|
||||||
|
title="王海颖唱歌录播 04月22日 15时56分",
|
||||||
|
source_path=str(source_path),
|
||||||
|
)
|
||||||
|
repo = SimpleNamespace(get_task_context=lambda task_id: None)
|
||||||
|
settings = {"biliup_path": "biliup", "cookie_file": "cookies.json"}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"biliup_next.infra.adapters.full_video_locator.fetch_biliup_list",
|
||||||
|
return_value=[
|
||||||
|
{"bvid": "BVFULL", "title": "王海颖唱歌录播 04月22日 15时56分"},
|
||||||
|
{"bvid": "BVPURE", "title": "【王海颖 (歌曲纯享版)】 04月22日 15时56分 共20首歌"},
|
||||||
|
],
|
||||||
|
):
|
||||||
|
context = link_context_for_task(task, repo, settings)
|
||||||
|
|
||||||
|
self.assertEqual(context["current_full_video_bvid"], "BVFULL")
|
||||||
|
self.assertEqual(context["current_full_video_link"], "https://www.bilibili.com/video/BVFULL")
|
||||||
|
self.assertEqual((source_path.parent / "full_video_bvid.txt").read_text(encoding="utf-8"), "BVFULL")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
116
tests/test_workspace_cleanup.py
Normal file
116
tests/test_workspace_cleanup.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
from biliup_next.core.models import Task, utc_now_iso
|
||||||
|
from biliup_next.infra.workspace_cleanup import WorkspaceCleanupService
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRepo:
|
||||||
|
def __init__(self, tasks: list[Task], session_key: str | None = None) -> None:
|
||||||
|
self.tasks = {task.id: task for task in tasks}
|
||||||
|
self.session_key = session_key
|
||||||
|
self.deleted_artifacts: list[tuple[str, str]] = []
|
||||||
|
self.deleted_artifact_paths: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
def get_task(self, task_id: str) -> Task | None:
|
||||||
|
return self.tasks.get(task_id)
|
||||||
|
|
||||||
|
def get_task_context(self, task_id: str): # noqa: ANN201
|
||||||
|
if self.session_key is None or task_id not in self.tasks:
|
||||||
|
return None
|
||||||
|
return SimpleNamespace(task_id=task_id, session_key=self.session_key)
|
||||||
|
|
||||||
|
def list_task_contexts_by_session_key(self, session_key: str): # noqa: ANN201
|
||||||
|
if session_key != self.session_key:
|
||||||
|
return []
|
||||||
|
return [SimpleNamespace(task_id=task_id, session_key=session_key) for task_id in self.tasks]
|
||||||
|
|
||||||
|
def delete_artifacts(self, task_id: str, artifact_type: str) -> None:
|
||||||
|
self.deleted_artifacts.append((task_id, artifact_type))
|
||||||
|
|
||||||
|
def delete_artifact_by_path(self, task_id: str, path: str) -> None:
|
||||||
|
self.deleted_artifact_paths.append((task_id, path))
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task(task_id: str, root: Path) -> Task:
|
||||||
|
now = utc_now_iso()
|
||||||
|
work_dir = root / task_id
|
||||||
|
work_dir.mkdir(parents=True)
|
||||||
|
source = work_dir / "source.mp4"
|
||||||
|
source.write_bytes(b"source")
|
||||||
|
for dirname in ("split_video", "publish_video"):
|
||||||
|
video_dir = work_dir / dirname
|
||||||
|
video_dir.mkdir()
|
||||||
|
(video_dir / "01_song.mp4").write_bytes(b"clip")
|
||||||
|
return Task(task_id, "local_file", str(source), task_id, "collection_synced", now, now)
|
||||||
|
|
||||||
|
|
||||||
|
class WorkspaceCleanupServiceTests(unittest.TestCase):
|
||||||
|
def test_cleanup_removes_source_split_and_publish_video_for_single_task(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
task = _make_task("task-1", root)
|
||||||
|
repo = _FakeRepo([task])
|
||||||
|
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||||
|
task.id,
|
||||||
|
{
|
||||||
|
"delete_source_video_after_collection_synced": True,
|
||||||
|
"delete_split_videos_after_collection_synced": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
work_dir = root / "task-1"
|
||||||
|
self.assertFalse((work_dir / "source.mp4").exists())
|
||||||
|
self.assertFalse((work_dir / "split_video").exists())
|
||||||
|
self.assertFalse((work_dir / "publish_video").exists())
|
||||||
|
self.assertEqual(result["task_ids"], ["task-1"])
|
||||||
|
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video")])
|
||||||
|
self.assertEqual(repo.deleted_artifact_paths, [("task-1", str((work_dir / "source.mp4").resolve()))])
|
||||||
|
|
||||||
|
def test_cleanup_removes_all_tasks_in_same_session(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
task_1 = _make_task("task-1", root)
|
||||||
|
task_2 = _make_task("task-2", root)
|
||||||
|
repo = _FakeRepo([task_1, task_2], session_key="session-1")
|
||||||
|
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||||
|
task_1.id,
|
||||||
|
{
|
||||||
|
"delete_source_video_after_collection_synced": True,
|
||||||
|
"delete_split_videos_after_collection_synced": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
for task_id in ("task-1", "task-2"):
|
||||||
|
work_dir = root / task_id
|
||||||
|
self.assertFalse((work_dir / "source.mp4").exists())
|
||||||
|
self.assertFalse((work_dir / "split_video").exists())
|
||||||
|
self.assertFalse((work_dir / "publish_video").exists())
|
||||||
|
self.assertEqual(result["task_ids"], ["task-1", "task-2"])
|
||||||
|
self.assertEqual(repo.deleted_artifacts, [("task-1", "clip_video"), ("task-2", "clip_video")])
|
||||||
|
|
||||||
|
def test_cleanup_skips_missing_source_video(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
root = Path(tmpdir)
|
||||||
|
task = _make_task("task-1", root)
|
||||||
|
source = Path(task.source_path)
|
||||||
|
source.unlink()
|
||||||
|
repo = _FakeRepo([task])
|
||||||
|
result = WorkspaceCleanupService(repo).cleanup_task_outputs(
|
||||||
|
task.id,
|
||||||
|
{
|
||||||
|
"delete_source_video_after_collection_synced": True,
|
||||||
|
"delete_split_videos_after_collection_synced": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn(str(source.resolve()), result["skipped"])
|
||||||
|
self.assertEqual(repo.deleted_artifact_paths, [])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user