80 lines
2.7 KiB
Python
80 lines
2.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from biliup_next.core.errors import ModuleError
|
|
from biliup_next.core.models import Artifact, Task, utc_now_iso
|
|
from biliup_next.core.providers import ProviderManifest
|
|
from biliup_next.infra.legacy_paths import legacy_project_root
|
|
|
|
|
|
class LegacyGroqTranscribeProvider:
|
|
manifest = ProviderManifest(
|
|
id="groq",
|
|
name="Legacy Groq Transcribe Provider",
|
|
version="0.1.0",
|
|
provider_type="transcribe_provider",
|
|
entrypoint="biliup_next.infra.adapters.groq_legacy:LegacyGroqTranscribeProvider",
|
|
capabilities=["transcribe"],
|
|
enabled_by_default=True,
|
|
)
|
|
|
|
def __init__(self, next_root: Path):
|
|
self.next_root = next_root
|
|
self.legacy_root = legacy_project_root(next_root)
|
|
self.python_bin = self._resolve_python_bin()
|
|
|
|
def transcribe(self, task: Task, source_video: Artifact, settings: dict[str, Any]) -> Artifact:
|
|
session_dir = Path(str(settings.get("session_dir", str(self.legacy_root / "session"))))
|
|
work_dir = (session_dir / task.title).resolve()
|
|
cmd = [
|
|
self.python_bin,
|
|
"video2srt.py",
|
|
source_video.path,
|
|
str(work_dir),
|
|
]
|
|
env = {
|
|
**os.environ,
|
|
"GROQ_API_KEY": str(settings.get("groq_api_key", "")),
|
|
"FFMPEG_BIN": str(settings.get("ffmpeg_bin", "ffmpeg")),
|
|
}
|
|
result = subprocess.run(
|
|
cmd,
|
|
cwd=str(self.legacy_root),
|
|
capture_output=True,
|
|
text=True,
|
|
env=env,
|
|
)
|
|
if result.returncode != 0:
|
|
raise ModuleError(
|
|
code="TRANSCRIBE_FAILED",
|
|
message="legacy video2srt.py 执行失败",
|
|
retryable=True,
|
|
details={"stderr": result.stderr[-2000:], "stdout": result.stdout[-2000:]},
|
|
)
|
|
srt_path = work_dir / f"{task.title}.srt"
|
|
if not srt_path.exists():
|
|
raise ModuleError(
|
|
code="TRANSCRIBE_OUTPUT_MISSING",
|
|
message=f"未找到字幕文件: {srt_path}",
|
|
retryable=False,
|
|
)
|
|
return Artifact(
|
|
id=None,
|
|
task_id=task.id,
|
|
artifact_type="subtitle_srt",
|
|
path=str(srt_path),
|
|
metadata_json=json.dumps({"provider": "groq_legacy"}),
|
|
created_at=utc_now_iso(),
|
|
)
|
|
|
|
def _resolve_python_bin(self) -> str:
|
|
venv_python = self.legacy_root / ".venv" / "bin" / "python"
|
|
if venv_python.exists():
|
|
return str(venv_python)
|
|
return "python"
|