from __future__ import annotations from pathlib import Path from typing import Any from engine.model_factory import get_model from engine.prompt_injector import inject_prompt from engine.adapters.image.mock_adapter import MockImageGen def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str: out_dir = Path(out_dir) clips_dir = out_dir / "clips" audio_dir = out_dir / "audio" clips_dir.mkdir(parents=True, exist_ok=True) audio_dir.mkdir(parents=True, exist_ok=True) shot_id = str(shot.get("shot_id", "unknown")) duration_s = float(shot.get("duration", 3)) narration = str(shot.get("tts", "")).strip() # Models from config. image_fallback_gen = get_model("image_fallback", cfg) try: image_gen = get_model("image", cfg) except Exception as e: # Covers missing optional deps at adapter init time (e.g. replicate/openai packages). print(f"[WARN] image provider init failed, fallback to image_fallback: {e}") image_gen = image_fallback_gen tts = get_model("tts", cfg) video_gen = get_model("video", cfg) # Prompt injection. global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {} prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")}) positive_prompt = prompt_obj.get("positive", "") # Prompt enrichment: keeps ComfyUI generations cinematic and detailed. enrich_style = "cinematic, ultra realistic, 4k, detailed lighting" if enrich_style not in positive_prompt: positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ") prompt_obj["positive"] = positive_prompt # 1) image try: image_path = image_gen.generate(prompt_obj, out_dir) except Exception as e: # Config-driven fallback; keeps provider switching non-invasive. print(f"[WARN] Image generation failed, fallback to image_fallback: {e}") try: image_path = image_fallback_gen.generate(prompt_obj, out_dir) except Exception as e2: print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}") image_path = MockImageGen().generate(prompt_obj, out_dir) scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown") print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}") # 2) audio (optional) audio_path = None if narration: # Use a stable per-shot audio filename. ap = audio_dir / f"shot_{shot_id}.mp3" try: audio_path = tts.generate(narration, ap) except Exception as e: # Don't fail the whole render due to TTS issues. print(f"[WARN] TTS failed, continue without audio: {e}") audio_path = None # 3) clip clip_out = clips_dir / f"shot_{shot_id}.mp4" prompt = { "duration_s": duration_s, "fps": int(cfg.get("video.mock_fps", 24)), "audio_path": audio_path, "size": cfg.get("video.mock_size", None), } clip_path = video_gen.generate(image_path, prompt, clip_out) return clip_path