51 lines
1.3 KiB
Python
51 lines
1.3 KiB
Python
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Tuple, List
|
|
|
|
from core.errors import bad_request
|
|
from core.config import settings
|
|
|
|
|
|
def extract_metadata_and_captions(
|
|
url: str,
|
|
workdir: str,
|
|
) -> Tuple[dict, List[Path]]:
|
|
cmd = [
|
|
"yt-dlp",
|
|
"--skip-download",
|
|
"--write-subs",
|
|
"--write-auto-subs",
|
|
"--sub-format", "vtt",
|
|
"--no-playlist",
|
|
"--print-json",
|
|
"-o", f"{workdir}/%(id)s",
|
|
url,
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
timeout=settings.request_timeout,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
bad_request("yt-dlp timed out", "TIMEOUT")
|
|
except subprocess.CalledProcessError:
|
|
bad_request("Failed to extract video data", "YTDLP_ERROR")
|
|
|
|
lines = result.stdout.splitlines()
|
|
if not lines:
|
|
bad_request("No metadata returned from yt-dlp", "EMPTY_RESPONSE")
|
|
|
|
try:
|
|
metadata = json.loads(lines[0])
|
|
except json.JSONDecodeError:
|
|
bad_request("Invalid metadata returned from yt-dlp", "INVALID_METADATA")
|
|
|
|
subtitle_files = list(Path(workdir).glob("*.vtt"))
|
|
|
|
return metadata, subtitle_files
|