Files
AiVideo/engine/render_pipeline.py
2026-03-25 19:35:37 +08:00

81 lines
3.1 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Any
from engine.model_factory import get_model
from engine.prompt_injector import inject_prompt
from engine.adapters.image.mock_adapter import MockImageGen
def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
out_dir = Path(out_dir)
clips_dir = out_dir / "clips"
audio_dir = out_dir / "audio"
clips_dir.mkdir(parents=True, exist_ok=True)
audio_dir.mkdir(parents=True, exist_ok=True)
shot_id = str(shot.get("shot_id", "unknown"))
duration_s = float(shot.get("duration", 3))
narration = str(shot.get("tts", "")).strip()
# Models from config.
image_fallback_gen = get_model("image_fallback", cfg)
try:
image_gen = get_model("image", cfg)
except Exception as e:
# Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
image_gen = image_fallback_gen
tts = get_model("tts", cfg)
video_gen = get_model("video", cfg)
# Prompt injection.
global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
positive_prompt = prompt_obj.get("positive", "")
# Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
if enrich_style not in positive_prompt:
positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
prompt_obj["positive"] = positive_prompt
# 1) image
try:
image_path = image_gen.generate(prompt_obj, out_dir)
except Exception as e:
# Config-driven fallback; keeps provider switching non-invasive.
print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
try:
image_path = image_fallback_gen.generate(prompt_obj, out_dir)
except Exception as e2:
print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
image_path = MockImageGen().generate(prompt_obj, out_dir)
scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")
# 2) audio (optional)
audio_path = None
if narration:
# Use a stable per-shot audio filename.
ap = audio_dir / f"shot_{shot_id}.mp3"
try:
audio_path = tts.generate(narration, ap)
except Exception as e:
# Don't fail the whole render due to TTS issues.
print(f"[WARN] TTS failed, continue without audio: {e}")
audio_path = None
# 3) clip
clip_out = clips_dir / f"shot_{shot_id}.mp4"
prompt = {
"duration_s": duration_s,
"fps": int(cfg.get("video.mock_fps", 24)),
"audio_path": audio_path,
"size": cfg.get("video.mock_size", None),
}
clip_path = video_gen.generate(image_path, prompt, clip_out)
return clip_path