fix: 优化架构
This commit is contained in:
@@ -1,42 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx
|
||||
import numpy as np
|
||||
from moviepy import AudioFileClip, VideoClip
|
||||
from PIL import Image
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .audio_gen import synthesize_one
|
||||
from .comfy_client import ComfyClient
|
||||
from .comfy_client import generate_image as comfy_generate_image
|
||||
from .config import AppConfig
|
||||
from .render_pipeline import render_shot as render_shot_pipeline
|
||||
|
||||
|
||||
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
|
||||
if audio.duration is None or video.duration is None:
|
||||
return video.with_audio(audio)
|
||||
if audio.duration > video.duration:
|
||||
video = video.with_effects([vfx.Loop(duration=audio.duration)])
|
||||
elif video.duration > audio.duration:
|
||||
video = video.subclipped(0, audio.duration)
|
||||
return video.with_audio(audio)
|
||||
ASSETS_DIR = "assets"
|
||||
DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
|
||||
|
||||
|
||||
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
|
||||
return (
|
||||
TextClip(
|
||||
text=text,
|
||||
font_size=44,
|
||||
color="white",
|
||||
stroke_color="black",
|
||||
stroke_width=2,
|
||||
size=(int(size[0] * 0.92), None),
|
||||
method="caption",
|
||||
)
|
||||
.with_position(("center", "bottom"))
|
||||
.with_duration(duration)
|
||||
.with_opacity(0.95)
|
||||
)
|
||||
def ensure_demo_image() -> None:
|
||||
os.makedirs(ASSETS_DIR, exist_ok=True)
|
||||
if os.path.exists(DEMO_IMAGE):
|
||||
return
|
||||
|
||||
# Simple placeholder image source.
|
||||
url = "https://picsum.photos/1280/720"
|
||||
with urlopen(url, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
|
||||
with open(DEMO_IMAGE, "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
def generate_image_mock(prompt: str) -> str:
|
||||
# Keep interface compatible with the requested interface.
|
||||
_ = prompt
|
||||
ensure_demo_image()
|
||||
return DEMO_IMAGE
|
||||
|
||||
|
||||
def enrich_prompt(prompt_text: str) -> str:
|
||||
style = "cinematic, ultra realistic, 4k, detailed lighting"
|
||||
pt = (prompt_text or "").strip()
|
||||
if not pt:
|
||||
return style
|
||||
return f"{pt}, {style}"
|
||||
|
||||
|
||||
async def _render_shot_async(
|
||||
@@ -55,49 +66,102 @@ async def _render_shot_async(
|
||||
|
||||
shot_id = str(shot.get("shot_id", "unknown"))
|
||||
image_prompt = str(shot.get("image_prompt", "")).strip()
|
||||
motion = str(shot.get("motion", "")).strip()
|
||||
prompt_text = str(shot.get("prompt", image_prompt) or image_prompt).strip()
|
||||
tts_text = str(shot.get("tts", "")).strip()
|
||||
duration_s = max(1.0, float(shot.get("duration", 3)))
|
||||
|
||||
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||
rate = str(cfg.get("tts.rate", "+0%"))
|
||||
volume = str(cfg.get("tts.volume", "+0%"))
|
||||
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
||||
audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume)
|
||||
audio_asset: Any | None = None
|
||||
if tts_text:
|
||||
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
||||
audio_asset = await synthesize_one(tts_text, audio_path, voice, rate, volume)
|
||||
|
||||
# Use config-defined output resolution for stable concatenation.
|
||||
mock_size = cfg.get("video.mock_size", [1024, 576])
|
||||
w, h = int(mock_size[0]), int(mock_size[1])
|
||||
fps = int(cfg.get("video.mock_fps", 24))
|
||||
|
||||
if audio_asset and audio_asset.duration_s:
|
||||
duration_s = max(duration_s, float(audio_asset.duration_s))
|
||||
|
||||
# shot -> image (ComfyUI first; fallback to demo.jpg)
|
||||
image_path: str
|
||||
if mock:
|
||||
from engine.main import _ensure_mock_image, _make_mock_video # local import to avoid circular at module import
|
||||
|
||||
mock_size = cfg.get("video.mock_size", [1024, 576])
|
||||
w, h = int(mock_size[0]), int(mock_size[1])
|
||||
mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
|
||||
fps = int(cfg.get("video.mock_fps", 24))
|
||||
raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4"
|
||||
_make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps)
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
else:
|
||||
comfy = ComfyClient(cfg)
|
||||
wf = comfy.load_workflow()
|
||||
seed = random.randint(1, 2_147_483_647)
|
||||
wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None)
|
||||
result = await comfy.run_workflow(wf_i)
|
||||
candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
|
||||
raw_video_path = candidates[0] if candidates else result.output_files[0]
|
||||
|
||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||
v = VideoFileClip(str(raw_video_path))
|
||||
a = AudioFileClip(str(audio_asset.path))
|
||||
try:
|
||||
v2 = _fit_video_to_audio(v, a)
|
||||
w2, h2 = v2.size
|
||||
subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s)
|
||||
comp = CompositeVideoClip([v2, subtitle])
|
||||
try:
|
||||
comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast")
|
||||
finally:
|
||||
comp.close()
|
||||
enriched = enrich_prompt(prompt_text)
|
||||
# Store generated images directly under outputs/{task_id}
|
||||
# (as required by verification: outputs/{task_id}/*.png).
|
||||
image_path = str(
|
||||
comfy_generate_image(
|
||||
enriched,
|
||||
out_dir,
|
||||
cfg=cfg,
|
||||
timeout_s=60,
|
||||
retry=2,
|
||||
filename_prefix=f"shot_{shot_id}",
|
||||
)
|
||||
)
|
||||
print(f"[SHOT_RENDER] {shot_id} -> image generated: {image_path}")
|
||||
except Exception as e:
|
||||
print(f"[WARN] Comfy failed, fallback to demo: {e}")
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
|
||||
# Ensure image exists before rendering.
|
||||
if not image_path or not os.path.exists(image_path):
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
base_img = Image.open(image_path).convert("RGB")
|
||||
|
||||
def make_frame(t: float):
|
||||
# Subtle zoom-in from 1.00 to ~1.03 over the clip duration.
|
||||
progress = float(t) / max(duration_s, 1e-6)
|
||||
progress = max(0.0, min(1.0, progress))
|
||||
scale = 1.0 + 0.03 * progress
|
||||
|
||||
new_w = max(w, int(w * scale))
|
||||
new_h = max(h, int(h * scale))
|
||||
|
||||
frame = base_img.resize((new_w, new_h), Image.LANCZOS)
|
||||
left = (new_w - w) // 2
|
||||
top = (new_h - h) // 2
|
||||
frame = frame.crop((left, top, left + w, top + h))
|
||||
return np.array(frame)
|
||||
|
||||
# image -> video
|
||||
video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
|
||||
|
||||
# optional audio -> clip
|
||||
audio_clip: AudioFileClip | None = None
|
||||
if audio_asset and os.path.exists(str(audio_asset.path)):
|
||||
audio_clip = AudioFileClip(str(audio_asset.path))
|
||||
video = video.with_audio(audio_clip)
|
||||
|
||||
# output
|
||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||
print(f"[SHOT_RENDER] {shot_id} -> {clip_out}")
|
||||
try:
|
||||
video.write_videofile(
|
||||
str(clip_out),
|
||||
fps=fps,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="veryfast",
|
||||
threads=2,
|
||||
)
|
||||
finally:
|
||||
v.close()
|
||||
a.close()
|
||||
try:
|
||||
video.close()
|
||||
except Exception:
|
||||
pass
|
||||
if audio_clip is not None:
|
||||
try:
|
||||
audio_clip.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return str(clip_out)
|
||||
|
||||
|
||||
@@ -109,5 +173,5 @@ def render_shot(
|
||||
mock: bool = False,
|
||||
) -> str:
|
||||
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
||||
return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))
|
||||
return render_shot_pipeline(shot, cfg2, output_dir, mock=mock)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user