youtube-transcript-api/services/ytdlp.py

51 lines
1.3 KiB
Python

import json
import subprocess
from pathlib import Path
from typing import Tuple, List
from core.errors import bad_request
from core.config import settings
def extract_metadata_and_captions(
url: str,
workdir: str,
) -> Tuple[dict, List[Path]]:
cmd = [
"yt-dlp",
"--skip-download",
"--write-subs",
"--write-auto-subs",
"--sub-format", "vtt",
"--no-playlist",
"--print-json",
"-o", f"{workdir}/%(id)s",
url,
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
timeout=settings.request_timeout,
)
except subprocess.TimeoutExpired:
bad_request("yt-dlp timed out", "TIMEOUT")
except subprocess.CalledProcessError:
bad_request("Failed to extract video data", "YTDLP_ERROR")
lines = result.stdout.splitlines()
if not lines:
bad_request("No metadata returned from yt-dlp", "EMPTY_RESPONSE")
try:
metadata = json.loads(lines[0])
except json.JSONDecodeError:
bad_request("Invalid metadata returned from yt-dlp", "INVALID_METADATA")
subtitle_files = list(Path(workdir).glob("*.vtt"))
return metadata, subtitle_files