AiVideo/engine/render_pipeline.py

from __future__ import annotations

from pathlib import Path
from typing import Any

from engine.model_factory import get_model
from engine.prompt_injector import inject_prompt
from engine.adapters.image.mock_adapter import MockImageGen


def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
    out_dir = Path(out_dir)
    clips_dir = out_dir / "clips"
    audio_dir = out_dir / "audio"
    clips_dir.mkdir(parents=True, exist_ok=True)
    audio_dir.mkdir(parents=True, exist_ok=True)

    shot_id = str(shot.get("shot_id", "unknown"))
    duration_s = float(shot.get("duration", 3))
    narration = str(shot.get("tts", "")).strip()

    # Models from config.
    image_fallback_gen = get_model("image_fallback", cfg)
    try:
        image_gen = get_model("image", cfg)
    except Exception as e:
        # Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
        print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
        image_gen = image_fallback_gen
    tts = get_model("tts", cfg)
    video_gen = get_model("video", cfg)

    # Prompt injection.
    global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
    prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
    positive_prompt = prompt_obj.get("positive", "")
    # Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
    enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
    if enrich_style not in positive_prompt:
        positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
        prompt_obj["positive"] = positive_prompt

    # 1) image
    try:
        image_path = image_gen.generate(prompt_obj, out_dir)
    except Exception as e:
        # Config-driven fallback; keeps provider switching non-invasive.
        print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
        try:
            image_path = image_fallback_gen.generate(prompt_obj, out_dir)
        except Exception as e2:
            print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
            image_path = MockImageGen().generate(prompt_obj, out_dir)

    scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
    print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")

    # 2) audio (optional)
    audio_path = None
    if narration:
        # Use a stable per-shot audio filename.
        ap = audio_dir / f"shot_{shot_id}.mp3"
        try:
            audio_path = tts.generate(narration, ap)
        except Exception as e:
            # Don't fail the whole render due to TTS issues.
            print(f"[WARN] TTS failed, continue without audio: {e}")
            audio_path = None

    # 3) clip
    clip_out = clips_dir / f"shot_{shot_id}.mp4"
    prompt = {
        "duration_s": duration_s,
        "fps": int(cfg.get("video.mock_fps", 24)),
        "audio_path": audio_path,
        "size": cfg.get("video.mock_size", None),
    }
    clip_path = video_gen.generate(image_path, prompt, clip_out)
    return clip_path