youtube-transcript-api/routes/transcript.py

46 lines
1.3 KiB
Python

from fastapi import APIRouter, Query
from utils.validators import validate_youtube_url
from utils.filesystem import temp_dir
from core.errors import not_found
from services.ytdlp import extract_metadata_and_captions
from services.captions import parse_vtt
from services.metadata import normalize_metadata
from schemas.transcript import TranscriptResponse
router = APIRouter()
@router.post("/transcript", response_model=TranscriptResponse)
def transcript(
url: str = Query(..., description="YouTube video URL"),
):
validate_youtube_url(url)
with temp_dir() as tmp:
metadata, caption_files = extract_metadata_and_captions(url, tmp)
if not caption_files:
not_found("No captions available for this video", "NO_CAPTIONS")
human = [p for p in caption_files if "auto" not in p.name.lower()]
auto = [p for p in caption_files if "auto" in p.name.lower()]
if human:
caption_path = human[0]
source = "human"
elif auto:
caption_path = auto[0]
source = "auto"
else:
not_found("No captions available", "NO_CAPTIONS")
captions = parse_vtt(str(caption_path))
video = normalize_metadata(metadata)
return TranscriptResponse(
video=video,
captions=captions,
language="auto",
source=source,
)