from __future__ import annotations import asyncio import random from pathlib import Path from typing import Any from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx from .audio_gen import synthesize_one from .comfy_client import ComfyClient from .config import AppConfig def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip: if audio.duration is None or video.duration is None: return video.with_audio(audio) if audio.duration > video.duration: video = video.with_effects([vfx.Loop(duration=audio.duration)]) elif video.duration > audio.duration: video = video.subclipped(0, audio.duration) return video.with_audio(audio) def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip: return ( TextClip( text=text, font_size=44, color="white", stroke_color="black", stroke_width=2, size=(int(size[0] * 0.92), None), method="caption", ) .with_position(("center", "bottom")) .with_duration(duration) .with_opacity(0.95) ) async def _render_shot_async( shot: dict[str, Any], output_dir: str | Path, cfg: AppConfig, *, mock: bool = False, ) -> str: out_dir = Path(output_dir) out_dir.mkdir(parents=True, exist_ok=True) clips_dir = out_dir / "clips" audio_dir = out_dir / "audio" clips_dir.mkdir(parents=True, exist_ok=True) audio_dir.mkdir(parents=True, exist_ok=True) shot_id = str(shot.get("shot_id", "unknown")) image_prompt = str(shot.get("image_prompt", "")).strip() motion = str(shot.get("motion", "")).strip() tts_text = str(shot.get("tts", "")).strip() duration_s = max(1.0, float(shot.get("duration", 3))) voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural")) rate = str(cfg.get("tts.rate", "+0%")) volume = str(cfg.get("tts.volume", "+0%")) audio_path = audio_dir / f"shot_{shot_id}.mp3" audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume) if mock: from engine.main import _ensure_mock_image, _make_mock_video # local import to avoid circular at module import mock_size = cfg.get("video.mock_size", [1024, 576]) w, h = int(mock_size[0]), int(mock_size[1]) mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h)) fps = int(cfg.get("video.mock_fps", 24)) raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4" _make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps) else: comfy = ComfyClient(cfg) wf = comfy.load_workflow() seed = random.randint(1, 2_147_483_647) wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None) result = await comfy.run_workflow(wf_i) candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}] raw_video_path = candidates[0] if candidates else result.output_files[0] clip_out = clips_dir / f"shot_{shot_id}.mp4" v = VideoFileClip(str(raw_video_path)) a = AudioFileClip(str(audio_asset.path)) try: v2 = _fit_video_to_audio(v, a) w2, h2 = v2.size subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s) comp = CompositeVideoClip([v2, subtitle]) try: comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast") finally: comp.close() finally: v.close() a.close() return str(clip_out) def render_shot( shot: dict[str, Any], output_dir: str | Path, cfg: AppConfig | None = None, *, mock: bool = False, ) -> str: cfg2 = cfg or AppConfig.load("./configs/config.yaml") return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))