import json import subprocess from pathlib import Path from typing import Tuple, List from core.errors import bad_request from core.config import settings def extract_metadata_and_captions( url: str, workdir: str, ) -> Tuple[dict, List[Path]]: cmd = [ "yt-dlp", "--skip-download", "--write-subs", "--write-auto-subs", "--sub-format", "vtt", "--no-playlist", "--print-json", "-o", f"{workdir}/%(id)s", url, ] try: result = subprocess.run( cmd, capture_output=True, text=True, check=True, timeout=settings.request_timeout, ) except subprocess.TimeoutExpired: bad_request("yt-dlp timed out", "TIMEOUT") except subprocess.CalledProcessError: bad_request("Failed to extract video data", "YTDLP_ERROR") lines = result.stdout.splitlines() if not lines: bad_request("No metadata returned from yt-dlp", "EMPTY_RESPONSE") try: metadata = json.loads(lines[0]) except json.JSONDecodeError: bad_request("Invalid metadata returned from yt-dlp", "INVALID_METADATA") subtitle_files = list(Path(workdir).glob("*.vtt")) return metadata, subtitle_files