81 lines
3.1 KiB
Python
81 lines
3.1 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from engine.model_factory import get_model
|
|
from engine.prompt_injector import inject_prompt
|
|
from engine.adapters.image.mock_adapter import MockImageGen
|
|
|
|
|
|
def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
|
|
out_dir = Path(out_dir)
|
|
clips_dir = out_dir / "clips"
|
|
audio_dir = out_dir / "audio"
|
|
clips_dir.mkdir(parents=True, exist_ok=True)
|
|
audio_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
shot_id = str(shot.get("shot_id", "unknown"))
|
|
duration_s = float(shot.get("duration", 3))
|
|
narration = str(shot.get("tts", "")).strip()
|
|
|
|
# Models from config.
|
|
image_fallback_gen = get_model("image_fallback", cfg)
|
|
try:
|
|
image_gen = get_model("image", cfg)
|
|
except Exception as e:
|
|
# Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
|
|
print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
|
|
image_gen = image_fallback_gen
|
|
tts = get_model("tts", cfg)
|
|
video_gen = get_model("video", cfg)
|
|
|
|
# Prompt injection.
|
|
global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
|
|
prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
|
|
positive_prompt = prompt_obj.get("positive", "")
|
|
# Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
|
|
enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
|
|
if enrich_style not in positive_prompt:
|
|
positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
|
|
prompt_obj["positive"] = positive_prompt
|
|
|
|
# 1) image
|
|
try:
|
|
image_path = image_gen.generate(prompt_obj, out_dir)
|
|
except Exception as e:
|
|
# Config-driven fallback; keeps provider switching non-invasive.
|
|
print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
|
|
try:
|
|
image_path = image_fallback_gen.generate(prompt_obj, out_dir)
|
|
except Exception as e2:
|
|
print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
|
|
image_path = MockImageGen().generate(prompt_obj, out_dir)
|
|
|
|
scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
|
|
print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")
|
|
|
|
# 2) audio (optional)
|
|
audio_path = None
|
|
if narration:
|
|
# Use a stable per-shot audio filename.
|
|
ap = audio_dir / f"shot_{shot_id}.mp3"
|
|
try:
|
|
audio_path = tts.generate(narration, ap)
|
|
except Exception as e:
|
|
# Don't fail the whole render due to TTS issues.
|
|
print(f"[WARN] TTS failed, continue without audio: {e}")
|
|
audio_path = None
|
|
|
|
# 3) clip
|
|
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
|
prompt = {
|
|
"duration_s": duration_s,
|
|
"fps": int(cfg.get("video.mock_fps", 24)),
|
|
"audio_path": audio_path,
|
|
"size": cfg.get("video.mock_size", None),
|
|
}
|
|
clip_path = video_gen.generate(image_path, prompt, clip_out)
|
|
return clip_path
|
|
|