114 lines
3.9 KiB
Python
114 lines
3.9 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import random
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx
|
|
|
|
from .audio_gen import synthesize_one
|
|
from .comfy_client import ComfyClient
|
|
from .config import AppConfig
|
|
|
|
|
|
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
|
|
if audio.duration is None or video.duration is None:
|
|
return video.with_audio(audio)
|
|
if audio.duration > video.duration:
|
|
video = video.with_effects([vfx.Loop(duration=audio.duration)])
|
|
elif video.duration > audio.duration:
|
|
video = video.subclipped(0, audio.duration)
|
|
return video.with_audio(audio)
|
|
|
|
|
|
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
|
|
return (
|
|
TextClip(
|
|
text=text,
|
|
font_size=44,
|
|
color="white",
|
|
stroke_color="black",
|
|
stroke_width=2,
|
|
size=(int(size[0] * 0.92), None),
|
|
method="caption",
|
|
)
|
|
.with_position(("center", "bottom"))
|
|
.with_duration(duration)
|
|
.with_opacity(0.95)
|
|
)
|
|
|
|
|
|
async def _render_shot_async(
|
|
shot: dict[str, Any],
|
|
output_dir: str | Path,
|
|
cfg: AppConfig,
|
|
*,
|
|
mock: bool = False,
|
|
) -> str:
|
|
out_dir = Path(output_dir)
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
clips_dir = out_dir / "clips"
|
|
audio_dir = out_dir / "audio"
|
|
clips_dir.mkdir(parents=True, exist_ok=True)
|
|
audio_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
shot_id = str(shot.get("shot_id", "unknown"))
|
|
image_prompt = str(shot.get("image_prompt", "")).strip()
|
|
motion = str(shot.get("motion", "")).strip()
|
|
tts_text = str(shot.get("tts", "")).strip()
|
|
duration_s = max(1.0, float(shot.get("duration", 3)))
|
|
|
|
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
|
rate = str(cfg.get("tts.rate", "+0%"))
|
|
volume = str(cfg.get("tts.volume", "+0%"))
|
|
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
|
audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume)
|
|
|
|
if mock:
|
|
from engine.main import _ensure_mock_image, _make_mock_video # local import to avoid circular at module import
|
|
|
|
mock_size = cfg.get("video.mock_size", [1024, 576])
|
|
w, h = int(mock_size[0]), int(mock_size[1])
|
|
mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
|
|
fps = int(cfg.get("video.mock_fps", 24))
|
|
raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4"
|
|
_make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps)
|
|
else:
|
|
comfy = ComfyClient(cfg)
|
|
wf = comfy.load_workflow()
|
|
seed = random.randint(1, 2_147_483_647)
|
|
wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None)
|
|
result = await comfy.run_workflow(wf_i)
|
|
candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
|
|
raw_video_path = candidates[0] if candidates else result.output_files[0]
|
|
|
|
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
|
v = VideoFileClip(str(raw_video_path))
|
|
a = AudioFileClip(str(audio_asset.path))
|
|
try:
|
|
v2 = _fit_video_to_audio(v, a)
|
|
w2, h2 = v2.size
|
|
subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s)
|
|
comp = CompositeVideoClip([v2, subtitle])
|
|
try:
|
|
comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast")
|
|
finally:
|
|
comp.close()
|
|
finally:
|
|
v.close()
|
|
a.close()
|
|
return str(clip_out)
|
|
|
|
|
|
def render_shot(
|
|
shot: dict[str, Any],
|
|
output_dir: str | Path,
|
|
cfg: AppConfig | None = None,
|
|
*,
|
|
mock: bool = False,
|
|
) -> str:
|
|
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
|
return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))
|
|
|