fix: 优化架构
BIN
assets/demo.jpg
Normal file
|
After Width: | Height: | Size: 150 KiB |
@@ -4,6 +4,38 @@ app:
|
|||||||
# ComfyUI output directory on the same machine running this code
|
# ComfyUI output directory on the same machine running this code
|
||||||
comfy_output_dir: "./ComfyUI/output"
|
comfy_output_dir: "./ComfyUI/output"
|
||||||
|
|
||||||
|
global:
|
||||||
|
# Used by prompt_injector + adapters.
|
||||||
|
style: ""
|
||||||
|
character: ""
|
||||||
|
negative_prompt: ""
|
||||||
|
|
||||||
|
llm:
|
||||||
|
# Controls /script + /refine generation.
|
||||||
|
provider: "mock" # "openai" to enable OpenAI/DashScope calls
|
||||||
|
|
||||||
|
image:
|
||||||
|
provider: "mock" # "mock" | "comfy" | "replicate" | "openai"
|
||||||
|
# Generic model name (used by some providers as fallback).
|
||||||
|
model: ""
|
||||||
|
|
||||||
|
replicate:
|
||||||
|
# Example: "stability-ai/sdxl"
|
||||||
|
model: "stability-ai/sdxl"
|
||||||
|
|
||||||
|
openai:
|
||||||
|
# Example: "gpt-image-1"
|
||||||
|
model: "gpt-image-1"
|
||||||
|
|
||||||
|
image_fallback:
|
||||||
|
provider: "mock"
|
||||||
|
|
||||||
|
video:
|
||||||
|
provider: "moviepy"
|
||||||
|
|
||||||
|
tts:
|
||||||
|
provider: "edge"
|
||||||
|
|
||||||
openai:
|
openai:
|
||||||
# Prefer environment variables in real deployments.
|
# Prefer environment variables in real deployments.
|
||||||
# OPENAI_API_KEY must be set; OPENAI_BASE_URL optional (for DeepSeek / other gateways).
|
# OPENAI_API_KEY must be set; OPENAI_BASE_URL optional (for DeepSeek / other gateways).
|
||||||
|
|||||||
1
engine/adapters/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
1
engine/adapters/image/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
9
engine/adapters/image/base.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class BaseImageGen:
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
36
engine/adapters/image/comfy_adapter.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from engine.comfy_client import generate_image as comfy_generate_image
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseImageGen
|
||||||
|
from .mock_adapter import MockImageGen
|
||||||
|
|
||||||
|
|
||||||
|
class ComfyAdapter(BaseImageGen):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
self.fallback = MockImageGen()
|
||||||
|
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
positive = str(prompt.get("positive", "") or "")
|
||||||
|
negative = str(prompt.get("negative", "") or "")
|
||||||
|
try:
|
||||||
|
return str(
|
||||||
|
comfy_generate_image(
|
||||||
|
positive,
|
||||||
|
output_dir,
|
||||||
|
negative_text=negative or None,
|
||||||
|
cfg=self.cfg,
|
||||||
|
timeout_s=60,
|
||||||
|
retry=2,
|
||||||
|
filename_prefix="shot",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# Let render_pipeline do configured fallback.
|
||||||
|
raise
|
||||||
|
|
||||||
45
engine/adapters/image/mock_adapter.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from .base import BaseImageGen
|
||||||
|
|
||||||
|
|
||||||
|
ASSETS_DIR = "assets"
|
||||||
|
DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_demo_image() -> None:
|
||||||
|
os.makedirs(ASSETS_DIR, exist_ok=True)
|
||||||
|
if os.path.exists(DEMO_IMAGE):
|
||||||
|
return
|
||||||
|
|
||||||
|
url = "https://picsum.photos/1280/720"
|
||||||
|
with urlopen(url, timeout=30) as resp:
|
||||||
|
data = resp.read()
|
||||||
|
with open(DEMO_IMAGE, "wb") as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
class MockImageGen(BaseImageGen):
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
# prompt is accepted for interface consistency; mock uses only demo.jpg.
|
||||||
|
_ = prompt
|
||||||
|
ensure_demo_image()
|
||||||
|
out_dir = Path(output_dir)
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
out_path = out_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||||
|
try:
|
||||||
|
# Convert to PNG so verification criteria can match *.png.
|
||||||
|
img = Image.open(DEMO_IMAGE).convert("RGB")
|
||||||
|
img.save(str(out_path), format="PNG")
|
||||||
|
except Exception:
|
||||||
|
# Last-resort: if PNG conversion fails, still write a best-effort copy.
|
||||||
|
out_path.write_bytes(Path(DEMO_IMAGE).read_bytes())
|
||||||
|
return str(out_path)
|
||||||
|
|
||||||
83
engine/adapters/image/openai_image_adapter.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseImageGen
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIImageAdapter(BaseImageGen):
|
||||||
|
"""
|
||||||
|
Optional image provider adapter using OpenAI Images API (or OpenAI-compatible gateways).
|
||||||
|
Requires `openai` python package and a configured API key via environment variables.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
# Expected keys (configurable):
|
||||||
|
# - image.openai.model
|
||||||
|
# - openai.api_key_env / openai.base_url_env (reuses existing engine/script_gen config fields)
|
||||||
|
self.model = str(cfg.get("image.openai.model", cfg.get("image.model", ""))).strip()
|
||||||
|
if not self.model:
|
||||||
|
raise ValueError("OpenAIImageAdapter requires `image.openai.model` (or `image.model`).")
|
||||||
|
|
||||||
|
api_key_env_or_literal = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
|
||||||
|
# Support both:
|
||||||
|
# - env var name (e.g. OPENAI_API_KEY)
|
||||||
|
# - literal API key (e.g. starts with `sk-...`) for quick local POCs.
|
||||||
|
if api_key_env_or_literal.startswith("sk-"):
|
||||||
|
api_key = api_key_env_or_literal
|
||||||
|
else:
|
||||||
|
api_key = os.environ.get(api_key_env_or_literal)
|
||||||
|
if not api_key:
|
||||||
|
raise RuntimeError(f"OpenAIImageAdapter missing API key: `{api_key_env_or_literal}`")
|
||||||
|
self.api_key = api_key
|
||||||
|
|
||||||
|
base_url_env_or_literal = str(cfg.get("openai.base_url_env", "https://api.openai.com/v1")).strip()
|
||||||
|
self.base_url = base_url_env_or_literal.rstrip("/") if base_url_env_or_literal else "https://api.openai.com/v1"
|
||||||
|
|
||||||
|
# Lazy import to avoid hard dependency for mock/comfy users.
|
||||||
|
from openai import OpenAI # type: ignore
|
||||||
|
|
||||||
|
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
||||||
|
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
output_dir = Path(output_dir)
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
positive = prompt.get("positive", "")
|
||||||
|
negative = prompt.get("negative", "")
|
||||||
|
# OpenAI Images API generally doesn't expose a dedicated negative_prompt field.
|
||||||
|
# To keep interface consistency, embed negative hints into the prompt text.
|
||||||
|
if negative:
|
||||||
|
prompt_text = f"{positive}\nNegative prompt: {negative}"
|
||||||
|
else:
|
||||||
|
prompt_text = positive
|
||||||
|
|
||||||
|
result = self.client.images.generate(model=self.model, prompt=prompt_text)
|
||||||
|
|
||||||
|
# OpenAI SDK: result.data[0].url
|
||||||
|
url: str | None = None
|
||||||
|
try:
|
||||||
|
url = result.data[0].url # type: ignore[attr-defined]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if not url:
|
||||||
|
raise RuntimeError("OpenAIImageAdapter unexpected response: missing image url")
|
||||||
|
|
||||||
|
r = requests.get(url, timeout=60)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||||
|
img = Image.open(BytesIO(r.content)).convert("RGB")
|
||||||
|
img.save(str(out_path), format="PNG")
|
||||||
|
return str(out_path)
|
||||||
|
|
||||||
60
engine/adapters/image/replicate_adapter.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseImageGen
|
||||||
|
|
||||||
|
|
||||||
|
class ReplicateAdapter(BaseImageGen):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
# Expected: image.replicate.model
|
||||||
|
self.model = str(cfg.get("image.replicate.model", cfg.get("image.model", ""))).strip()
|
||||||
|
if not self.model:
|
||||||
|
raise ValueError("ReplicateAdapter requires `image.replicate.model` (or `image.model`).")
|
||||||
|
|
||||||
|
# Import lazily so that environments without replicate installed can still run with mock/comfy.
|
||||||
|
import replicate # type: ignore
|
||||||
|
|
||||||
|
self.replicate = replicate
|
||||||
|
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
output_dir = Path(output_dir)
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
input_payload: dict[str, Any] = {
|
||||||
|
"prompt": prompt.get("positive", ""),
|
||||||
|
"negative_prompt": prompt.get("negative", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
# replicate.run is synchronous when wait is handled by the SDK version.
|
||||||
|
output = self.replicate.run(self.model, input=input_payload)
|
||||||
|
|
||||||
|
# Common shapes: [url, ...] or dict-like.
|
||||||
|
image_url = None
|
||||||
|
if isinstance(output, list) and output:
|
||||||
|
image_url = output[0]
|
||||||
|
elif isinstance(output, dict):
|
||||||
|
image_url = output.get("image") or output.get("output") or output.get("url")
|
||||||
|
if not isinstance(image_url, str) or not image_url:
|
||||||
|
raise RuntimeError(f"Unexpected Replicate output shape: {type(output)}")
|
||||||
|
|
||||||
|
r = requests.get(image_url, timeout=60)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# Always output PNG to satisfy downstream validation `outputs/{task_id}/*.png`.
|
||||||
|
out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||||
|
# Pillow doesn't provide open_bytes; wrap content into a buffer.
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
img = Image.open(BytesIO(r.content)).convert("RGB")
|
||||||
|
img.save(str(out_path), format="PNG")
|
||||||
|
return str(out_path)
|
||||||
|
|
||||||
21
engine/adapters/image/stability_adapter.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseImageGen
|
||||||
|
|
||||||
|
|
||||||
|
class StabilityAdapter(BaseImageGen):
|
||||||
|
"""
|
||||||
|
Placeholder for Stability AI image generation.
|
||||||
|
Add implementation + dependencies when needed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||||
|
raise NotImplementedError("StabilityAdapter not implemented yet")
|
||||||
|
|
||||||
1
engine/adapters/llm/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
12
engine/adapters/llm/base.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class BaseLLM:
|
||||||
|
def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> Any:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def refine_scene(self, scene: Any, context: dict[str, Any] | None = None) -> Any:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
25
engine/adapters/llm/mock_adapter.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from engine.types import Scene
|
||||||
|
|
||||||
|
from .base import BaseLLM
|
||||||
|
|
||||||
|
|
||||||
|
class MockLLM(BaseLLM):
|
||||||
|
def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> list[Scene]:
|
||||||
|
# Simple deterministic scenes for offline development.
|
||||||
|
prompt = (prompt or "").strip()
|
||||||
|
if not prompt:
|
||||||
|
prompt = "a warm city night"
|
||||||
|
return [
|
||||||
|
Scene(image_prompt=f"{prompt},城市夜景,霓虹灯,电影感", video_motion="缓慢推进镜头,轻微摇镜", narration="夜色温柔落在街灯上"),
|
||||||
|
Scene(image_prompt=f"{prompt},咖啡店窗边,暖光,细雨", video_motion="侧向平移,人物轻轻抬头", narration="雨声里藏着一段回忆"),
|
||||||
|
Scene(image_prompt=f"{prompt},桥上远景,车流光轨,温暖", video_motion="拉远全景,光轨流动", narration="我们在光里学会告别"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def refine_scene(self, scene: Scene, context: dict[str, Any] | None = None) -> Scene:
|
||||||
|
# Minimal polish: append a hint.
|
||||||
|
return Scene(image_prompt=scene.image_prompt, video_motion=scene.video_motion, narration=(scene.narration + "(更凝练)")[:30])
|
||||||
|
|
||||||
29
engine/adapters/llm/openai_adapter.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
from engine.script_gen import generate_scenes, refine_scene
|
||||||
|
|
||||||
|
from .base import BaseLLM
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIAdapter(BaseLLM):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def generate_script(self, prompt: str, context: dict[str, Any] | None = None):
|
||||||
|
# Existing script_gen already enforces JSON schema and length constraints.
|
||||||
|
return generate_scenes(prompt, self.cfg)
|
||||||
|
|
||||||
|
def refine_scene(self, scene: Any, context: dict[str, Any] | None = None):
|
||||||
|
if context is None:
|
||||||
|
context = {}
|
||||||
|
# Context carries needed values to call refine_scene in script_gen.
|
||||||
|
scenes = context.get("scenes")
|
||||||
|
prompt2 = context.get("prompt")
|
||||||
|
target_index = context.get("target_index")
|
||||||
|
if scenes is None or prompt2 is None or target_index is None:
|
||||||
|
raise ValueError("OpenAIAdapter.refine_scene missing context: scenes/prompt/target_index")
|
||||||
|
return refine_scene(prompt=prompt2, scenes=scenes, target_index=int(target_index), cfg=self.cfg)
|
||||||
|
|
||||||
1
engine/adapters/tts/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
9
engine/adapters/tts/base.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTTS:
|
||||||
|
def generate(self, text: str, output_path: str | Path) -> str:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
28
engine/adapters/tts/edge_adapter.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from engine.audio_gen import synthesize_one
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseTTS
|
||||||
|
|
||||||
|
|
||||||
|
class EdgeTTS(BaseTTS):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def generate(self, text: str, output_path: str | Path) -> str:
|
||||||
|
text = text or " "
|
||||||
|
output_path = Path(output_path)
|
||||||
|
voice = str(self.cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||||
|
rate = str(self.cfg.get("tts.rate", "+0%"))
|
||||||
|
volume = str(self.cfg.get("tts.volume", "+0%"))
|
||||||
|
|
||||||
|
async def _run():
|
||||||
|
asset = await synthesize_one(text, output_path, voice, rate, volume)
|
||||||
|
return str(asset.path)
|
||||||
|
|
||||||
|
return asyncio.run(_run())
|
||||||
|
|
||||||
15
engine/adapters/tts/mock_adapter.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .base import BaseTTS
|
||||||
|
|
||||||
|
|
||||||
|
class MockTTS(BaseTTS):
|
||||||
|
def generate(self, text: str, output_path: str | Path) -> str:
|
||||||
|
# No-op for offline tests: return empty path so video adapter skips audio.
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_path.write_bytes(b"")
|
||||||
|
return str(output_path)
|
||||||
|
|
||||||
1
engine/adapters/video/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
9
engine/adapters/video/base.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class BaseVideoGen:
|
||||||
|
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
18
engine/adapters/video/ltx_adapter.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseVideoGen
|
||||||
|
|
||||||
|
|
||||||
|
class LTXVideoGen(BaseVideoGen):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||||
|
# Reserved for future: direct image->video generation (LTX / diffusion video).
|
||||||
|
# Current project keeps clip generation via MoviePy for stability.
|
||||||
|
raise NotImplementedError("LTXVideoGen is not implemented yet")
|
||||||
|
|
||||||
81
engine/adapters/video/moviepy_adapter.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from moviepy import AudioFileClip, VideoClip
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
from .base import BaseVideoGen
|
||||||
|
|
||||||
|
|
||||||
|
class MoviePyVideoGen(BaseVideoGen):
|
||||||
|
def __init__(self, cfg: AppConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
|
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Required prompt fields for shot rendering.
|
||||||
|
duration_s = float(prompt.get("duration_s", 3))
|
||||||
|
fps = int(prompt.get("fps", self.cfg.get("video.mock_fps", 24)))
|
||||||
|
audio_path = prompt.get("audio_path")
|
||||||
|
|
||||||
|
# Clip resolution.
|
||||||
|
size = prompt.get("size")
|
||||||
|
if isinstance(size, (list, tuple)) and len(size) == 2:
|
||||||
|
w, h = int(size[0]), int(size[1])
|
||||||
|
else:
|
||||||
|
mock_size = self.cfg.get("video.mock_size", [1024, 576])
|
||||||
|
w, h = int(mock_size[0]), int(mock_size[1])
|
||||||
|
|
||||||
|
base_img = Image.open(image_path).convert("RGB")
|
||||||
|
|
||||||
|
def make_frame(t: float):
|
||||||
|
progress = float(t) / max(duration_s, 1e-6)
|
||||||
|
progress = max(0.0, min(1.0, progress))
|
||||||
|
scale = 1.0 + 0.03 * progress
|
||||||
|
new_w = max(w, int(w * scale))
|
||||||
|
new_h = max(h, int(h * scale))
|
||||||
|
frame = base_img.resize((new_w, new_h), Image.LANCZOS)
|
||||||
|
left = (new_w - w) // 2
|
||||||
|
top = (new_h - h) // 2
|
||||||
|
frame = frame.crop((left, top, left + w, top + h))
|
||||||
|
return np.array(frame)
|
||||||
|
|
||||||
|
video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
|
||||||
|
|
||||||
|
# Optional audio.
|
||||||
|
if audio_path and os.path.exists(str(audio_path)):
|
||||||
|
a = AudioFileClip(str(audio_path))
|
||||||
|
video = video.with_audio(a)
|
||||||
|
else:
|
||||||
|
a = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
video.write_videofile(
|
||||||
|
str(output_path),
|
||||||
|
fps=fps,
|
||||||
|
codec="libx264",
|
||||||
|
audio_codec="aac",
|
||||||
|
preset="veryfast",
|
||||||
|
threads=2,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
video.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if a is not None:
|
||||||
|
try:
|
||||||
|
a.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return str(output_path)
|
||||||
|
|
||||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -186,3 +187,215 @@ class ComfyClient:
|
|||||||
|
|
||||||
# unreachable
|
# unreachable
|
||||||
# return ComfyResult(prompt_id=prompt_id, output_files=last_files)
|
# return ComfyResult(prompt_id=prompt_id, output_files=last_files)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Minimal "text->image" helpers (used by shot rendering)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _build_simple_workflow(
|
||||||
|
prompt_text: str,
|
||||||
|
*,
|
||||||
|
seed: int,
|
||||||
|
ckpt_name: str,
|
||||||
|
width: int,
|
||||||
|
height: int,
|
||||||
|
steps: int = 20,
|
||||||
|
cfg: float = 8.0,
|
||||||
|
sampler_name: str = "euler",
|
||||||
|
scheduler: str = "normal",
|
||||||
|
denoise: float = 1.0,
|
||||||
|
filename_prefix: str = "shot",
|
||||||
|
negative_text: str = "low quality, blurry",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
# Best-effort workflow. If your ComfyUI nodes/models differ, generation must fallback.
|
||||||
|
return {
|
||||||
|
"3": {
|
||||||
|
"class_type": "KSampler",
|
||||||
|
"inputs": {
|
||||||
|
"seed": int(seed),
|
||||||
|
"steps": int(steps),
|
||||||
|
"cfg": float(cfg),
|
||||||
|
"sampler_name": sampler_name,
|
||||||
|
"scheduler": scheduler,
|
||||||
|
"denoise": float(denoise),
|
||||||
|
"model": ["4", 0],
|
||||||
|
"positive": ["6", 0],
|
||||||
|
"negative": ["7", 0],
|
||||||
|
"latent_image": ["5", 0],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"class_type": "CheckpointLoaderSimple",
|
||||||
|
"inputs": {
|
||||||
|
"ckpt_name": ckpt_name,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"class_type": "EmptyLatentImage",
|
||||||
|
"inputs": {
|
||||||
|
"width": int(width),
|
||||||
|
"height": int(height),
|
||||||
|
"batch_size": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"6": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": prompt_text,
|
||||||
|
"clip": ["4", 1],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"7": {
|
||||||
|
"class_type": "CLIPTextEncode",
|
||||||
|
"inputs": {
|
||||||
|
"text": negative_text,
|
||||||
|
"clip": ["4", 1],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"class_type": "VAEDecode",
|
||||||
|
"inputs": {
|
||||||
|
"samples": ["3", 0],
|
||||||
|
"vae": ["4", 2],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"class_type": "SaveImage",
|
||||||
|
"inputs": {
|
||||||
|
"images": ["8", 0],
|
||||||
|
"filename_prefix": filename_prefix,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _queue_prompt(base_url: str, workflow: dict[str, Any], client_id: str) -> str:
|
||||||
|
r = httpx.post(
|
||||||
|
base_url.rstrip("/") + "/prompt",
|
||||||
|
json={"prompt": workflow, "client_id": client_id},
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
pid = data.get("prompt_id")
|
||||||
|
if not isinstance(pid, str) or not pid:
|
||||||
|
raise RuntimeError(f"Unexpected /prompt response: {data}")
|
||||||
|
return pid
|
||||||
|
|
||||||
|
|
||||||
|
def _get_history_item(base_url: str, prompt_id: str) -> dict[str, Any] | None:
|
||||||
|
for url in (f"{base_url.rstrip('/')}/history/{prompt_id}", f"{base_url.rstrip('/')}/history"):
|
||||||
|
try:
|
||||||
|
r = httpx.get(url, timeout=30.0)
|
||||||
|
if r.status_code == 404:
|
||||||
|
continue
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()
|
||||||
|
if isinstance(data, dict):
|
||||||
|
if prompt_id in data and isinstance(data[prompt_id], dict):
|
||||||
|
return data[prompt_id]
|
||||||
|
if url.endswith(f"/{prompt_id}") and isinstance(data, dict):
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_first_image_view_target(history_item: dict[str, Any]) -> tuple[str, str] | None:
|
||||||
|
outputs = history_item.get("outputs")
|
||||||
|
if not isinstance(outputs, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def walk(v: Any) -> list[dict[str, Any]]:
|
||||||
|
found: list[dict[str, Any]] = []
|
||||||
|
if isinstance(v, dict):
|
||||||
|
if isinstance(v.get("filename"), str) and v.get("filename").strip():
|
||||||
|
found.append(v)
|
||||||
|
for vv in v.values():
|
||||||
|
found.extend(walk(vv))
|
||||||
|
elif isinstance(v, list):
|
||||||
|
for vv in v:
|
||||||
|
found.extend(walk(vv))
|
||||||
|
return found
|
||||||
|
|
||||||
|
candidates = walk(outputs)
|
||||||
|
for c in candidates:
|
||||||
|
fn = str(c.get("filename", "")).strip()
|
||||||
|
sf = str(c.get("subfolder", "") or "").strip()
|
||||||
|
if fn:
|
||||||
|
return fn, sf
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def generate_image(
|
||||||
|
prompt_text: str,
|
||||||
|
output_dir: str | Path,
|
||||||
|
*,
|
||||||
|
cfg: AppConfig | None = None,
|
||||||
|
timeout_s: int = 60,
|
||||||
|
retry: int = 2,
|
||||||
|
width: int | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
filename_prefix: str = "shot",
|
||||||
|
ckpt_candidates: list[str] | None = None,
|
||||||
|
negative_text: str | None = None,
|
||||||
|
) -> Path:
|
||||||
|
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
||||||
|
base_url = str(cfg2.get("app.comfy_base_url", "http://comfyui:8188")).rstrip("/")
|
||||||
|
|
||||||
|
out_dir = Path(output_dir)
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if width is None or height is None:
|
||||||
|
mock_size = cfg2.get("video.mock_size", [1024, 576])
|
||||||
|
width = int(width or mock_size[0])
|
||||||
|
height = int(height or mock_size[1])
|
||||||
|
|
||||||
|
if negative_text is None:
|
||||||
|
negative_text = "low quality, blurry"
|
||||||
|
|
||||||
|
if ckpt_candidates is None:
|
||||||
|
ckpt_candidates = [
|
||||||
|
"v1-5-pruned-emaonly.ckpt",
|
||||||
|
"v1-5-pruned-emaonly.safetensors",
|
||||||
|
"sd-v1-5-tiny.safetensors",
|
||||||
|
]
|
||||||
|
|
||||||
|
last_err: Exception | None = None
|
||||||
|
for _attempt in range(max(1, retry)):
|
||||||
|
for ckpt_name in ckpt_candidates:
|
||||||
|
client_id = str(uuid.uuid4())
|
||||||
|
seed = int(uuid.uuid4().int % 2_147_483_647)
|
||||||
|
workflow = _build_simple_workflow(
|
||||||
|
prompt_text,
|
||||||
|
seed=seed,
|
||||||
|
ckpt_name=ckpt_name,
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
filename_prefix=filename_prefix,
|
||||||
|
negative_text=negative_text,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
prompt_id = _queue_prompt(base_url, workflow, client_id)
|
||||||
|
start = time.time()
|
||||||
|
while time.time() - start < timeout_s:
|
||||||
|
item = _get_history_item(base_url, prompt_id)
|
||||||
|
if isinstance(item, dict):
|
||||||
|
img_target = _extract_first_image_view_target(item)
|
||||||
|
if img_target:
|
||||||
|
filename, subfolder = img_target
|
||||||
|
view_url = f"{base_url}/view?filename={filename}&subfolder={subfolder}"
|
||||||
|
img_resp = httpx.get(view_url, timeout=60.0)
|
||||||
|
img_resp.raise_for_status()
|
||||||
|
image_path = out_dir / filename
|
||||||
|
image_path.write_bytes(img_resp.content)
|
||||||
|
return image_path
|
||||||
|
time.sleep(1.0)
|
||||||
|
except Exception as e:
|
||||||
|
last_err = e
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise RuntimeError(f"ComfyUI image generation failed after retries: {last_err}")
|
||||||
|
|||||||
@@ -12,13 +12,14 @@ from typing import Any
|
|||||||
from moviepy import ImageClip
|
from moviepy import ImageClip
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
from engine.audio_gen import synthesize_scenes
|
from engine.model_factory import get_model
|
||||||
|
from engine.prompt_injector import inject_prompt
|
||||||
|
from engine.adapters.image.mock_adapter import MockImageGen
|
||||||
from engine.assembler import assemble_clips
|
from engine.assembler import assemble_clips
|
||||||
from engine.comfy_client import ComfyClient
|
from engine.comfy_client import ComfyClient
|
||||||
from engine.config import AppConfig
|
from engine.config import AppConfig
|
||||||
from engine.director import scenes_to_shots
|
from engine.director import scenes_to_shots
|
||||||
from engine.shot_executor import render_shot
|
from engine.shot_executor import render_shot
|
||||||
from engine.script_gen import generate_scenes, refine_scene
|
|
||||||
from engine.task_store import create_task, update_shot_status, update_task_status
|
from engine.task_store import create_task, update_shot_status, update_task_status
|
||||||
from engine.types import Scene
|
from engine.types import Scene
|
||||||
from engine.video_editor import Segment, render_final
|
from engine.video_editor import Segment, render_final
|
||||||
@@ -28,13 +29,15 @@ def _emit(line: str) -> None:
|
|||||||
print(line, flush=True)
|
print(line, flush=True)
|
||||||
|
|
||||||
|
|
||||||
def _emit_scene(scene_idx: int, scene: Scene) -> None:
|
def _emit_scene(scene_idx: int, scene: Scene, extra: dict[str, Any] | None = None) -> None:
|
||||||
payload = {
|
payload = {
|
||||||
"index": scene_idx,
|
"index": scene_idx,
|
||||||
"image_prompt": scene.image_prompt,
|
"image_prompt": scene.image_prompt,
|
||||||
"video_motion": scene.video_motion,
|
"video_motion": scene.video_motion,
|
||||||
"narration": scene.narration,
|
"narration": scene.narration,
|
||||||
}
|
}
|
||||||
|
if extra:
|
||||||
|
payload.update(extra)
|
||||||
_emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
|
_emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
@@ -136,9 +139,50 @@ def _fallback_scenes(prompt: str) -> list[Scene]:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_scene_preview(
|
||||||
|
*,
|
||||||
|
cfg: AppConfig,
|
||||||
|
out_dir: Path,
|
||||||
|
image_prompt: str,
|
||||||
|
style: str | None,
|
||||||
|
character: str | None,
|
||||||
|
) -> str | None:
|
||||||
|
try:
|
||||||
|
image_gen = get_model("image", cfg)
|
||||||
|
except Exception:
|
||||||
|
image_gen = get_model("image_fallback", cfg)
|
||||||
|
|
||||||
|
global_cfg = dict(cfg.get("global", {}) or {})
|
||||||
|
if style:
|
||||||
|
global_cfg["style"] = style
|
||||||
|
if character:
|
||||||
|
global_cfg["character"] = character
|
||||||
|
|
||||||
|
prompt_obj = inject_prompt(global_cfg, {"prompt": image_prompt})
|
||||||
|
try:
|
||||||
|
image_path = image_gen.generate(prompt_obj, out_dir)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
image_path = get_model("image_fallback", cfg).generate(prompt_obj, out_dir)
|
||||||
|
except Exception:
|
||||||
|
# Last-resort hard fallback: never block script stage due to preview failures.
|
||||||
|
image_path = MockImageGen().generate(prompt_obj, out_dir)
|
||||||
|
|
||||||
|
p = Path(str(image_path))
|
||||||
|
if not p.exists():
|
||||||
|
return None
|
||||||
|
return f"/api/static/{out_dir.name}/{p.name}"
|
||||||
|
|
||||||
|
|
||||||
def _has_llm_key(cfg: AppConfig) -> bool:
|
def _has_llm_key(cfg: AppConfig) -> bool:
|
||||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
|
||||||
return bool(os.environ.get(api_key_env))
|
# Env var name case.
|
||||||
|
if os.environ.get(api_key_env):
|
||||||
|
return True
|
||||||
|
# Literal key case (DashScope / OpenAI-compatible).
|
||||||
|
if api_key_env.startswith("sk-"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _parse_scenes_from_obj(obj: Any) -> list[Scene]:
|
def _parse_scenes_from_obj(obj: Any) -> list[Scene]:
|
||||||
@@ -239,7 +283,8 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
|
|||||||
# fallback scenes still should include global injection
|
# fallback scenes still should include global injection
|
||||||
scenes = _fallback_scenes(prompt)
|
scenes = _fallback_scenes(prompt)
|
||||||
else:
|
else:
|
||||||
scenes = generate_scenes(prompt2, cfg)
|
llm = get_model("llm", cfg)
|
||||||
|
scenes = llm.generate_script(prompt2, context=None)
|
||||||
|
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
_emit("SCRIPT_BEGIN")
|
_emit("SCRIPT_BEGIN")
|
||||||
@@ -249,7 +294,14 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
|
|||||||
video_motion=s.video_motion,
|
video_motion=s.video_motion,
|
||||||
narration=s.narration,
|
narration=s.narration,
|
||||||
)
|
)
|
||||||
_emit_scene(idx, s2)
|
preview_url = _generate_scene_preview(
|
||||||
|
cfg=cfg,
|
||||||
|
out_dir=out_dir,
|
||||||
|
image_prompt=s2.image_prompt,
|
||||||
|
style=style,
|
||||||
|
character=character,
|
||||||
|
)
|
||||||
|
_emit_scene(idx, s2, extra={"preview_url": preview_url or ""})
|
||||||
_emit("SCRIPT_END")
|
_emit("SCRIPT_END")
|
||||||
(out_dir / "scenes.json").write_text(
|
(out_dir / "scenes.json").write_text(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
@@ -292,8 +344,9 @@ def step_refine(
|
|||||||
narration=(s.narration + "(更凝练)")[:30],
|
narration=(s.narration + "(更凝练)")[:30],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Ensure globals are visible to LLM, and inject to output image prompt.
|
llm = get_model("llm", cfg)
|
||||||
refined0 = refine_scene(prompt=prompt2, scenes=scenes, target_index=target_index, cfg=cfg)
|
# Context carries prompt + scenes for consistent refinement.
|
||||||
|
refined0 = llm.refine_scene(scenes[target_index - 1], context={"prompt": prompt2, "scenes": scenes, "target_index": target_index})
|
||||||
refined = Scene(
|
refined = Scene(
|
||||||
image_prompt=_decorate_image_prompt(refined0.image_prompt, style=style, character=character),
|
image_prompt=_decorate_image_prompt(refined0.image_prompt, style=style, character=character),
|
||||||
video_motion=refined0.video_motion,
|
video_motion=refined0.video_motion,
|
||||||
@@ -301,7 +354,14 @@ def step_refine(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Keep the original index for frontend replacement.
|
# Keep the original index for frontend replacement.
|
||||||
_emit_scene(scene_index, refined)
|
preview_url = _generate_scene_preview(
|
||||||
|
cfg=cfg,
|
||||||
|
out_dir=out_dir,
|
||||||
|
image_prompt=refined.image_prompt,
|
||||||
|
style=style,
|
||||||
|
character=character,
|
||||||
|
)
|
||||||
|
_emit_scene(scene_index, refined, extra={"preview_url": preview_url or ""})
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
(out_dir / f"refine_scene_{scene_index}.json").write_text(
|
(out_dir / f"refine_scene_{scene_index}.json").write_text(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
|
|||||||
80
engine/model_factory.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from engine.config import AppConfig
|
||||||
|
|
||||||
|
|
||||||
|
def _provider(cfg: AppConfig, path: str, default: str) -> str:
|
||||||
|
env_map = {
|
||||||
|
"llm.provider": "ENGINE_LLM_PROVIDER",
|
||||||
|
"image.provider": "ENGINE_IMAGE_PROVIDER",
|
||||||
|
"image_fallback.provider": "ENGINE_IMAGE_FALLBACK_PROVIDER",
|
||||||
|
"video.provider": "ENGINE_VIDEO_PROVIDER",
|
||||||
|
"tts.provider": "ENGINE_TTS_PROVIDER",
|
||||||
|
}
|
||||||
|
env_key = env_map.get(path)
|
||||||
|
if env_key:
|
||||||
|
env_val = str(os.environ.get(env_key, "")).strip()
|
||||||
|
if env_val:
|
||||||
|
return env_val
|
||||||
|
v = cfg.get(path, default)
|
||||||
|
return str(v or default).strip() or default
|
||||||
|
|
||||||
|
|
||||||
|
def get_model(name: str, cfg: AppConfig) -> Any:
|
||||||
|
if name == "llm":
|
||||||
|
provider = _provider(cfg, "llm.provider", "openai")
|
||||||
|
if provider == "mock":
|
||||||
|
from engine.adapters.llm.mock_adapter import MockLLM
|
||||||
|
|
||||||
|
return MockLLM()
|
||||||
|
from engine.adapters.llm.openai_adapter import OpenAIAdapter
|
||||||
|
|
||||||
|
return OpenAIAdapter(cfg)
|
||||||
|
|
||||||
|
if name in ("image", "image_fallback"):
|
||||||
|
section = "image" if name == "image" else "image_fallback"
|
||||||
|
# Important: fallback must default to mock, not follow primary image provider.
|
||||||
|
provider_default = "mock" if name == "image_fallback" else _provider(cfg, "image.provider", "mock")
|
||||||
|
provider = _provider(cfg, f"{section}.provider", provider_default)
|
||||||
|
if provider == "comfy":
|
||||||
|
from engine.adapters.image.comfy_adapter import ComfyAdapter
|
||||||
|
|
||||||
|
return ComfyAdapter(cfg)
|
||||||
|
if provider == "replicate":
|
||||||
|
from engine.adapters.image.replicate_adapter import ReplicateAdapter
|
||||||
|
|
||||||
|
return ReplicateAdapter(cfg)
|
||||||
|
if provider == "openai":
|
||||||
|
from engine.adapters.image.openai_image_adapter import OpenAIImageAdapter
|
||||||
|
|
||||||
|
return OpenAIImageAdapter(cfg)
|
||||||
|
|
||||||
|
from engine.adapters.image.mock_adapter import MockImageGen
|
||||||
|
|
||||||
|
return MockImageGen()
|
||||||
|
|
||||||
|
if name == "video":
|
||||||
|
provider = _provider(cfg, "video.provider", "moviepy")
|
||||||
|
if provider == "ltx":
|
||||||
|
from engine.adapters.video.ltx_adapter import LTXVideoGen
|
||||||
|
|
||||||
|
return LTXVideoGen(cfg)
|
||||||
|
from engine.adapters.video.moviepy_adapter import MoviePyVideoGen
|
||||||
|
|
||||||
|
return MoviePyVideoGen(cfg)
|
||||||
|
|
||||||
|
if name == "tts":
|
||||||
|
provider = _provider(cfg, "tts.provider", "edge")
|
||||||
|
if provider == "mock":
|
||||||
|
from engine.adapters.tts.mock_adapter import MockTTS
|
||||||
|
|
||||||
|
return MockTTS()
|
||||||
|
from engine.adapters.tts.edge_adapter import EdgeTTS
|
||||||
|
|
||||||
|
return EdgeTTS(cfg)
|
||||||
|
|
||||||
|
raise ValueError(f"Unknown model adapter name: {name}")
|
||||||
|
|
||||||
23
engine/prompt_injector.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
def inject_prompt(global_cfg: dict[str, Any] | None, scene: dict[str, Any]) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Unified positive/negative prompt builder.
|
||||||
|
Note: current pipeline already injects some globals into `scene["image_prompt"]`.
|
||||||
|
"""
|
||||||
|
global_cfg = global_cfg or {}
|
||||||
|
character = str(global_cfg.get("character", "") or "").strip()
|
||||||
|
style = str(global_cfg.get("style", "") or "").strip()
|
||||||
|
negative = str(global_cfg.get("negative_prompt", "") or "").strip()
|
||||||
|
|
||||||
|
base = str(scene.get("prompt") or scene.get("image_prompt") or "").strip()
|
||||||
|
if not base:
|
||||||
|
base = str(scene.get("image_prompt") or "")
|
||||||
|
|
||||||
|
positive_parts = [p for p in [character, style, base] if p]
|
||||||
|
positive = ", ".join(positive_parts).strip(", ")
|
||||||
|
return {"positive": positive, "negative": negative}
|
||||||
|
|
||||||
80
engine/render_pipeline.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from engine.model_factory import get_model
|
||||||
|
from engine.prompt_injector import inject_prompt
|
||||||
|
from engine.adapters.image.mock_adapter import MockImageGen
|
||||||
|
|
||||||
|
|
||||||
|
def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
|
||||||
|
out_dir = Path(out_dir)
|
||||||
|
clips_dir = out_dir / "clips"
|
||||||
|
audio_dir = out_dir / "audio"
|
||||||
|
clips_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
audio_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
shot_id = str(shot.get("shot_id", "unknown"))
|
||||||
|
duration_s = float(shot.get("duration", 3))
|
||||||
|
narration = str(shot.get("tts", "")).strip()
|
||||||
|
|
||||||
|
# Models from config.
|
||||||
|
image_fallback_gen = get_model("image_fallback", cfg)
|
||||||
|
try:
|
||||||
|
image_gen = get_model("image", cfg)
|
||||||
|
except Exception as e:
|
||||||
|
# Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
|
||||||
|
print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
|
||||||
|
image_gen = image_fallback_gen
|
||||||
|
tts = get_model("tts", cfg)
|
||||||
|
video_gen = get_model("video", cfg)
|
||||||
|
|
||||||
|
# Prompt injection.
|
||||||
|
global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
|
||||||
|
prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
|
||||||
|
positive_prompt = prompt_obj.get("positive", "")
|
||||||
|
# Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
|
||||||
|
enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
|
||||||
|
if enrich_style not in positive_prompt:
|
||||||
|
positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
|
||||||
|
prompt_obj["positive"] = positive_prompt
|
||||||
|
|
||||||
|
# 1) image
|
||||||
|
try:
|
||||||
|
image_path = image_gen.generate(prompt_obj, out_dir)
|
||||||
|
except Exception as e:
|
||||||
|
# Config-driven fallback; keeps provider switching non-invasive.
|
||||||
|
print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
|
||||||
|
try:
|
||||||
|
image_path = image_fallback_gen.generate(prompt_obj, out_dir)
|
||||||
|
except Exception as e2:
|
||||||
|
print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
|
||||||
|
image_path = MockImageGen().generate(prompt_obj, out_dir)
|
||||||
|
|
||||||
|
scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
|
||||||
|
print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")
|
||||||
|
|
||||||
|
# 2) audio (optional)
|
||||||
|
audio_path = None
|
||||||
|
if narration:
|
||||||
|
# Use a stable per-shot audio filename.
|
||||||
|
ap = audio_dir / f"shot_{shot_id}.mp3"
|
||||||
|
try:
|
||||||
|
audio_path = tts.generate(narration, ap)
|
||||||
|
except Exception as e:
|
||||||
|
# Don't fail the whole render due to TTS issues.
|
||||||
|
print(f"[WARN] TTS failed, continue without audio: {e}")
|
||||||
|
audio_path = None
|
||||||
|
|
||||||
|
# 3) clip
|
||||||
|
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||||
|
prompt = {
|
||||||
|
"duration_s": duration_s,
|
||||||
|
"fps": int(cfg.get("video.mock_fps", 24)),
|
||||||
|
"audio_path": audio_path,
|
||||||
|
"size": cfg.get("video.mock_size", None),
|
||||||
|
}
|
||||||
|
clip_path = video_gen.generate(image_path, prompt, clip_out)
|
||||||
|
return clip_path
|
||||||
|
|
||||||
@@ -10,6 +10,38 @@ from .config import AppConfig
|
|||||||
from .types import Scene
|
from .types import Scene
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_api_key(v: str) -> bool:
|
||||||
|
vv = (v or "").strip()
|
||||||
|
# Common prefixes: DashScope uses "sk-..."; we keep it minimal and permissive.
|
||||||
|
return bool(vv) and vv.startswith("sk-")
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_url(v: str) -> bool:
|
||||||
|
vv = (v or "").strip()
|
||||||
|
return vv.startswith("http://") or vv.startswith("https://")
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_openai_credentials(cfg: AppConfig) -> tuple[str, str | None]:
|
||||||
|
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "").strip()
|
||||||
|
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL") or "").strip()
|
||||||
|
|
||||||
|
# 1) Resolve api_key: allow both "env var name" and "literal key" for safety.
|
||||||
|
api_key = os.environ.get(api_key_env) if api_key_env else None
|
||||||
|
if not api_key and api_key_env and _looks_like_api_key(api_key_env):
|
||||||
|
api_key = api_key_env
|
||||||
|
|
||||||
|
if not api_key:
|
||||||
|
raise RuntimeError(f"Missing OpenAI compatible API key (env={api_key_env})")
|
||||||
|
|
||||||
|
# 2) Resolve base_url: allow both "env var name" and "literal URL".
|
||||||
|
base_url = os.environ.get(base_url_env) if base_url_env else None
|
||||||
|
if not base_url and base_url_env and _looks_like_url(base_url_env):
|
||||||
|
base_url = base_url_env
|
||||||
|
if base_url:
|
||||||
|
base_url = str(base_url).strip() or None
|
||||||
|
return str(api_key), base_url
|
||||||
|
|
||||||
|
|
||||||
def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
|
def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
|
||||||
return f"""你是一个专业短视频编剧与分镜师。
|
return f"""你是一个专业短视频编剧与分镜师。
|
||||||
请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
|
请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
|
||||||
@@ -56,17 +88,13 @@ def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
|
|||||||
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
||||||
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
||||||
|
|
||||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
|
||||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
|
|
||||||
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
||||||
|
|
||||||
api_key = os.environ.get(api_key_env)
|
api_key, base_url = _resolve_openai_credentials(cfg)
|
||||||
if not api_key:
|
|
||||||
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
|
|
||||||
|
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=os.environ.get(base_url_env) or None,
|
base_url=base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = client.chat.completions.create(
|
resp = client.chat.completions.create(
|
||||||
@@ -105,17 +133,13 @@ def refine_scene(*, prompt: str, scenes: list[Scene], target_index: int, cfg: Ap
|
|||||||
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
||||||
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
||||||
|
|
||||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
|
||||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
|
|
||||||
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
||||||
|
|
||||||
api_key = os.environ.get(api_key_env)
|
api_key, base_url = _resolve_openai_credentials(cfg)
|
||||||
if not api_key:
|
|
||||||
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
|
|
||||||
|
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=os.environ.get(base_url_env) or None,
|
base_url=base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
scenes_payload = [
|
scenes_payload = [
|
||||||
|
|||||||
@@ -1,42 +1,53 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
import random
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx
|
import numpy as np
|
||||||
|
from moviepy import AudioFileClip, VideoClip
|
||||||
|
from PIL import Image
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
from .audio_gen import synthesize_one
|
from .audio_gen import synthesize_one
|
||||||
from .comfy_client import ComfyClient
|
from .comfy_client import generate_image as comfy_generate_image
|
||||||
from .config import AppConfig
|
from .config import AppConfig
|
||||||
|
from .render_pipeline import render_shot as render_shot_pipeline
|
||||||
|
|
||||||
|
|
||||||
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
|
ASSETS_DIR = "assets"
|
||||||
if audio.duration is None or video.duration is None:
|
DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
|
||||||
return video.with_audio(audio)
|
|
||||||
if audio.duration > video.duration:
|
|
||||||
video = video.with_effects([vfx.Loop(duration=audio.duration)])
|
|
||||||
elif video.duration > audio.duration:
|
|
||||||
video = video.subclipped(0, audio.duration)
|
|
||||||
return video.with_audio(audio)
|
|
||||||
|
|
||||||
|
|
||||||
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
|
def ensure_demo_image() -> None:
|
||||||
return (
|
os.makedirs(ASSETS_DIR, exist_ok=True)
|
||||||
TextClip(
|
if os.path.exists(DEMO_IMAGE):
|
||||||
text=text,
|
return
|
||||||
font_size=44,
|
|
||||||
color="white",
|
# Simple placeholder image source.
|
||||||
stroke_color="black",
|
url = "https://picsum.photos/1280/720"
|
||||||
stroke_width=2,
|
with urlopen(url, timeout=30) as resp:
|
||||||
size=(int(size[0] * 0.92), None),
|
data = resp.read()
|
||||||
method="caption",
|
|
||||||
)
|
with open(DEMO_IMAGE, "wb") as f:
|
||||||
.with_position(("center", "bottom"))
|
f.write(data)
|
||||||
.with_duration(duration)
|
|
||||||
.with_opacity(0.95)
|
|
||||||
)
|
def generate_image_mock(prompt: str) -> str:
|
||||||
|
# Keep interface compatible with the requested interface.
|
||||||
|
_ = prompt
|
||||||
|
ensure_demo_image()
|
||||||
|
return DEMO_IMAGE
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_prompt(prompt_text: str) -> str:
|
||||||
|
style = "cinematic, ultra realistic, 4k, detailed lighting"
|
||||||
|
pt = (prompt_text or "").strip()
|
||||||
|
if not pt:
|
||||||
|
return style
|
||||||
|
return f"{pt}, {style}"
|
||||||
|
|
||||||
|
|
||||||
async def _render_shot_async(
|
async def _render_shot_async(
|
||||||
@@ -55,49 +66,102 @@ async def _render_shot_async(
|
|||||||
|
|
||||||
shot_id = str(shot.get("shot_id", "unknown"))
|
shot_id = str(shot.get("shot_id", "unknown"))
|
||||||
image_prompt = str(shot.get("image_prompt", "")).strip()
|
image_prompt = str(shot.get("image_prompt", "")).strip()
|
||||||
motion = str(shot.get("motion", "")).strip()
|
prompt_text = str(shot.get("prompt", image_prompt) or image_prompt).strip()
|
||||||
tts_text = str(shot.get("tts", "")).strip()
|
tts_text = str(shot.get("tts", "")).strip()
|
||||||
duration_s = max(1.0, float(shot.get("duration", 3)))
|
duration_s = max(1.0, float(shot.get("duration", 3)))
|
||||||
|
|
||||||
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||||
rate = str(cfg.get("tts.rate", "+0%"))
|
rate = str(cfg.get("tts.rate", "+0%"))
|
||||||
volume = str(cfg.get("tts.volume", "+0%"))
|
volume = str(cfg.get("tts.volume", "+0%"))
|
||||||
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
audio_asset: Any | None = None
|
||||||
audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume)
|
if tts_text:
|
||||||
|
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
||||||
|
audio_asset = await synthesize_one(tts_text, audio_path, voice, rate, volume)
|
||||||
|
|
||||||
|
# Use config-defined output resolution for stable concatenation.
|
||||||
|
mock_size = cfg.get("video.mock_size", [1024, 576])
|
||||||
|
w, h = int(mock_size[0]), int(mock_size[1])
|
||||||
|
fps = int(cfg.get("video.mock_fps", 24))
|
||||||
|
|
||||||
|
if audio_asset and audio_asset.duration_s:
|
||||||
|
duration_s = max(duration_s, float(audio_asset.duration_s))
|
||||||
|
|
||||||
|
# shot -> image (ComfyUI first; fallback to demo.jpg)
|
||||||
|
image_path: str
|
||||||
if mock:
|
if mock:
|
||||||
from engine.main import _ensure_mock_image, _make_mock_video # local import to avoid circular at module import
|
image_path = generate_image_mock(prompt_text)
|
||||||
|
|
||||||
mock_size = cfg.get("video.mock_size", [1024, 576])
|
|
||||||
w, h = int(mock_size[0]), int(mock_size[1])
|
|
||||||
mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
|
|
||||||
fps = int(cfg.get("video.mock_fps", 24))
|
|
||||||
raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4"
|
|
||||||
_make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps)
|
|
||||||
else:
|
else:
|
||||||
comfy = ComfyClient(cfg)
|
|
||||||
wf = comfy.load_workflow()
|
|
||||||
seed = random.randint(1, 2_147_483_647)
|
|
||||||
wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None)
|
|
||||||
result = await comfy.run_workflow(wf_i)
|
|
||||||
candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
|
|
||||||
raw_video_path = candidates[0] if candidates else result.output_files[0]
|
|
||||||
|
|
||||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
|
||||||
v = VideoFileClip(str(raw_video_path))
|
|
||||||
a = AudioFileClip(str(audio_asset.path))
|
|
||||||
try:
|
|
||||||
v2 = _fit_video_to_audio(v, a)
|
|
||||||
w2, h2 = v2.size
|
|
||||||
subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s)
|
|
||||||
comp = CompositeVideoClip([v2, subtitle])
|
|
||||||
try:
|
try:
|
||||||
comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast")
|
enriched = enrich_prompt(prompt_text)
|
||||||
finally:
|
# Store generated images directly under outputs/{task_id}
|
||||||
comp.close()
|
# (as required by verification: outputs/{task_id}/*.png).
|
||||||
|
image_path = str(
|
||||||
|
comfy_generate_image(
|
||||||
|
enriched,
|
||||||
|
out_dir,
|
||||||
|
cfg=cfg,
|
||||||
|
timeout_s=60,
|
||||||
|
retry=2,
|
||||||
|
filename_prefix=f"shot_{shot_id}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print(f"[SHOT_RENDER] {shot_id} -> image generated: {image_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WARN] Comfy failed, fallback to demo: {e}")
|
||||||
|
image_path = generate_image_mock(prompt_text)
|
||||||
|
|
||||||
|
# Ensure image exists before rendering.
|
||||||
|
if not image_path or not os.path.exists(image_path):
|
||||||
|
image_path = generate_image_mock(prompt_text)
|
||||||
|
base_img = Image.open(image_path).convert("RGB")
|
||||||
|
|
||||||
|
def make_frame(t: float):
|
||||||
|
# Subtle zoom-in from 1.00 to ~1.03 over the clip duration.
|
||||||
|
progress = float(t) / max(duration_s, 1e-6)
|
||||||
|
progress = max(0.0, min(1.0, progress))
|
||||||
|
scale = 1.0 + 0.03 * progress
|
||||||
|
|
||||||
|
new_w = max(w, int(w * scale))
|
||||||
|
new_h = max(h, int(h * scale))
|
||||||
|
|
||||||
|
frame = base_img.resize((new_w, new_h), Image.LANCZOS)
|
||||||
|
left = (new_w - w) // 2
|
||||||
|
top = (new_h - h) // 2
|
||||||
|
frame = frame.crop((left, top, left + w, top + h))
|
||||||
|
return np.array(frame)
|
||||||
|
|
||||||
|
# image -> video
|
||||||
|
video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
|
||||||
|
|
||||||
|
# optional audio -> clip
|
||||||
|
audio_clip: AudioFileClip | None = None
|
||||||
|
if audio_asset and os.path.exists(str(audio_asset.path)):
|
||||||
|
audio_clip = AudioFileClip(str(audio_asset.path))
|
||||||
|
video = video.with_audio(audio_clip)
|
||||||
|
|
||||||
|
# output
|
||||||
|
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||||
|
print(f"[SHOT_RENDER] {shot_id} -> {clip_out}")
|
||||||
|
try:
|
||||||
|
video.write_videofile(
|
||||||
|
str(clip_out),
|
||||||
|
fps=fps,
|
||||||
|
codec="libx264",
|
||||||
|
audio_codec="aac",
|
||||||
|
preset="veryfast",
|
||||||
|
threads=2,
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
v.close()
|
try:
|
||||||
a.close()
|
video.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if audio_clip is not None:
|
||||||
|
try:
|
||||||
|
audio_clip.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return str(clip_out)
|
return str(clip_out)
|
||||||
|
|
||||||
|
|
||||||
@@ -109,5 +173,5 @@ def render_shot(
|
|||||||
mock: bool = False,
|
mock: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
||||||
return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))
|
return render_shot_pipeline(shot, cfg2, output_dir, mock=mock)
|
||||||
|
|
||||||
|
|||||||
18
outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "'06b0a90f-c964-4a88-8e80-6ff668e031b3'",
|
||||||
|
"status": "failed",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "running"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "pending"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "pending"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/final.mp4
Normal file
18
outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "'13c9b724-77e3-4553-aebf-dfc845dd17c1'",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
19
outputs/0d546f5e-0274-4372-b91d-fb64ace85d49/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/final.mp4
Normal file
19
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
18
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "3ef0c0b8-c90f-49a8-88e4-e8ca735312f0",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/final.mp4
Normal file
10
outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/task.json
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"task_id": "3f82b1ce-da18-4f82-9147-25eb0abeaf2c",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/62da5541-43d2-4ead-a243-e68345877dff/final.mp4
Normal file
18
outputs/62da5541-43d2-4ead-a243-e68345877dff/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "62da5541-43d2-4ead-a243-e68345877dff",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/final.mp4
Normal file
19
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
18
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "7b8255ea-ed2f-4356-8a57-d5c77e351351",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
19
outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
19
outputs/ab4bb47e-cf35-4a99-977f-63097bdac9ed/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/final.mp4
Normal file
19
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
18
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/task.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"task_id": "ab68ccf6-0de0-4465-b4d7-1843f88d0201",
|
||||||
|
"status": "done",
|
||||||
|
"shots": [
|
||||||
|
{
|
||||||
|
"shot_id": "scene_01_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_02_01",
|
||||||
|
"status": "done"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"shot_id": "scene_03_01",
|
||||||
|
"status": "done"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
19
outputs/b4d67faf-fabe-4ead-8803-7bfbb7ee4ad2/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "Cinematic night shot, wet street reflecting neon lights, Xiao Lin walking away, beige trench coat, white scarf, cold tone background, bokeh.",
|
||||||
|
"video_motion": "镜头缓慢跟随背影移动,雨丝飘落。",
|
||||||
|
"narration": "霓虹灯下城市结束喧嚣,夜色格外温柔。"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "Medium shot inside convenience store, warm yellow lighting, Xiao Lin holding hot coffee, steam rising, soft facial lighting, cinematic depth of field.",
|
||||||
|
"video_motion": "镜头缓缓推进,捕捉蒸汽升腾动态。",
|
||||||
|
"narration": "街角便利店的灯光,是深夜里最暖的守候。"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "Close-up of Xiao Lin smiling slightly, blurred city light bokeh background, beige coat collar visible, warm atmosphere, high quality portrait.",
|
||||||
|
"video_motion": "固定镜头微距拍摄,眼神自然眨动。",
|
||||||
|
"narration": "捧一杯热茶,原来幸福就藏在平凡夜晚里。"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
19
outputs/b77f2668-6451-47ff-81da-48b498ecb436/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"scenes": [
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||||
|
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||||
|
"narration": "夜色温柔落在街灯上"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||||
|
"video_motion": "侧向平移,人物轻轻抬头",
|
||||||
|
"narration": "雨声里藏着一段回忆"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||||
|
"video_motion": "拉远全景,光轨流动",
|
||||||
|
"narration": "我们在光里学会告别"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |