46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
from fastapi import APIRouter, Query
|
|
from utils.validators import validate_youtube_url
|
|
from utils.filesystem import temp_dir
|
|
from core.errors import not_found
|
|
from services.ytdlp import extract_metadata_and_captions
|
|
from services.captions import parse_vtt
|
|
from services.metadata import normalize_metadata
|
|
from schemas.transcript import TranscriptResponse
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/transcript", response_model=TranscriptResponse)
|
|
def transcript(
|
|
url: str = Query(..., description="YouTube video URL"),
|
|
):
|
|
validate_youtube_url(url)
|
|
|
|
with temp_dir() as tmp:
|
|
metadata, caption_files = extract_metadata_and_captions(url, tmp)
|
|
|
|
if not caption_files:
|
|
not_found("No captions available for this video", "NO_CAPTIONS")
|
|
|
|
human = [p for p in caption_files if "auto" not in p.name.lower()]
|
|
auto = [p for p in caption_files if "auto" in p.name.lower()]
|
|
|
|
if human:
|
|
caption_path = human[0]
|
|
source = "human"
|
|
elif auto:
|
|
caption_path = auto[0]
|
|
source = "auto"
|
|
else:
|
|
not_found("No captions available", "NO_CAPTIONS")
|
|
|
|
captions = parse_vtt(str(caption_path))
|
|
video = normalize_metadata(metadata)
|
|
|
|
return TranscriptResponse(
|
|
video=video,
|
|
captions=captions,
|
|
language="auto",
|
|
source=source,
|
|
)
|