fix: 优化架构
BIN
assets/demo.jpg
Normal file
|
After Width: | Height: | Size: 150 KiB |
@@ -4,6 +4,38 @@ app:
|
||||
# ComfyUI output directory on the same machine running this code
|
||||
comfy_output_dir: "./ComfyUI/output"
|
||||
|
||||
global:
|
||||
# Used by prompt_injector + adapters.
|
||||
style: ""
|
||||
character: ""
|
||||
negative_prompt: ""
|
||||
|
||||
llm:
|
||||
# Controls /script + /refine generation.
|
||||
provider: "mock" # "openai" to enable OpenAI/DashScope calls
|
||||
|
||||
image:
|
||||
provider: "mock" # "mock" | "comfy" | "replicate" | "openai"
|
||||
# Generic model name (used by some providers as fallback).
|
||||
model: ""
|
||||
|
||||
replicate:
|
||||
# Example: "stability-ai/sdxl"
|
||||
model: "stability-ai/sdxl"
|
||||
|
||||
openai:
|
||||
# Example: "gpt-image-1"
|
||||
model: "gpt-image-1"
|
||||
|
||||
image_fallback:
|
||||
provider: "mock"
|
||||
|
||||
video:
|
||||
provider: "moviepy"
|
||||
|
||||
tts:
|
||||
provider: "edge"
|
||||
|
||||
openai:
|
||||
# Prefer environment variables in real deployments.
|
||||
# OPENAI_API_KEY must be set; OPENAI_BASE_URL optional (for DeepSeek / other gateways).
|
||||
|
||||
1
engine/adapters/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
engine/adapters/image/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
9
engine/adapters/image/base.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class BaseImageGen:
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
36
engine/adapters/image/comfy_adapter.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from engine.comfy_client import generate_image as comfy_generate_image
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseImageGen
|
||||
from .mock_adapter import MockImageGen
|
||||
|
||||
|
||||
class ComfyAdapter(BaseImageGen):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
self.fallback = MockImageGen()
|
||||
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
positive = str(prompt.get("positive", "") or "")
|
||||
negative = str(prompt.get("negative", "") or "")
|
||||
try:
|
||||
return str(
|
||||
comfy_generate_image(
|
||||
positive,
|
||||
output_dir,
|
||||
negative_text=negative or None,
|
||||
cfg=self.cfg,
|
||||
timeout_s=60,
|
||||
retry=2,
|
||||
filename_prefix="shot",
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
# Let render_pipeline do configured fallback.
|
||||
raise
|
||||
|
||||
45
engine/adapters/image/mock_adapter.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from urllib.request import urlopen
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from .base import BaseImageGen
|
||||
|
||||
|
||||
ASSETS_DIR = "assets"
|
||||
DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
|
||||
|
||||
|
||||
def ensure_demo_image() -> None:
|
||||
os.makedirs(ASSETS_DIR, exist_ok=True)
|
||||
if os.path.exists(DEMO_IMAGE):
|
||||
return
|
||||
|
||||
url = "https://picsum.photos/1280/720"
|
||||
with urlopen(url, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
with open(DEMO_IMAGE, "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
class MockImageGen(BaseImageGen):
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
# prompt is accepted for interface consistency; mock uses only demo.jpg.
|
||||
_ = prompt
|
||||
ensure_demo_image()
|
||||
out_dir = Path(output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = out_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||
try:
|
||||
# Convert to PNG so verification criteria can match *.png.
|
||||
img = Image.open(DEMO_IMAGE).convert("RGB")
|
||||
img.save(str(out_path), format="PNG")
|
||||
except Exception:
|
||||
# Last-resort: if PNG conversion fails, still write a best-effort copy.
|
||||
out_path.write_bytes(Path(DEMO_IMAGE).read_bytes())
|
||||
return str(out_path)
|
||||
|
||||
83
engine/adapters/image/openai_image_adapter.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseImageGen
|
||||
|
||||
|
||||
class OpenAIImageAdapter(BaseImageGen):
|
||||
"""
|
||||
Optional image provider adapter using OpenAI Images API (or OpenAI-compatible gateways).
|
||||
Requires `openai` python package and a configured API key via environment variables.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
# Expected keys (configurable):
|
||||
# - image.openai.model
|
||||
# - openai.api_key_env / openai.base_url_env (reuses existing engine/script_gen config fields)
|
||||
self.model = str(cfg.get("image.openai.model", cfg.get("image.model", ""))).strip()
|
||||
if not self.model:
|
||||
raise ValueError("OpenAIImageAdapter requires `image.openai.model` (or `image.model`).")
|
||||
|
||||
api_key_env_or_literal = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
|
||||
# Support both:
|
||||
# - env var name (e.g. OPENAI_API_KEY)
|
||||
# - literal API key (e.g. starts with `sk-...`) for quick local POCs.
|
||||
if api_key_env_or_literal.startswith("sk-"):
|
||||
api_key = api_key_env_or_literal
|
||||
else:
|
||||
api_key = os.environ.get(api_key_env_or_literal)
|
||||
if not api_key:
|
||||
raise RuntimeError(f"OpenAIImageAdapter missing API key: `{api_key_env_or_literal}`")
|
||||
self.api_key = api_key
|
||||
|
||||
base_url_env_or_literal = str(cfg.get("openai.base_url_env", "https://api.openai.com/v1")).strip()
|
||||
self.base_url = base_url_env_or_literal.rstrip("/") if base_url_env_or_literal else "https://api.openai.com/v1"
|
||||
|
||||
# Lazy import to avoid hard dependency for mock/comfy users.
|
||||
from openai import OpenAI # type: ignore
|
||||
|
||||
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
||||
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
positive = prompt.get("positive", "")
|
||||
negative = prompt.get("negative", "")
|
||||
# OpenAI Images API generally doesn't expose a dedicated negative_prompt field.
|
||||
# To keep interface consistency, embed negative hints into the prompt text.
|
||||
if negative:
|
||||
prompt_text = f"{positive}\nNegative prompt: {negative}"
|
||||
else:
|
||||
prompt_text = positive
|
||||
|
||||
result = self.client.images.generate(model=self.model, prompt=prompt_text)
|
||||
|
||||
# OpenAI SDK: result.data[0].url
|
||||
url: str | None = None
|
||||
try:
|
||||
url = result.data[0].url # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
if not url:
|
||||
raise RuntimeError("OpenAIImageAdapter unexpected response: missing image url")
|
||||
|
||||
r = requests.get(url, timeout=60)
|
||||
r.raise_for_status()
|
||||
|
||||
out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||
img = Image.open(BytesIO(r.content)).convert("RGB")
|
||||
img.save(str(out_path), format="PNG")
|
||||
return str(out_path)
|
||||
|
||||
60
engine/adapters/image/replicate_adapter.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseImageGen
|
||||
|
||||
|
||||
class ReplicateAdapter(BaseImageGen):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
# Expected: image.replicate.model
|
||||
self.model = str(cfg.get("image.replicate.model", cfg.get("image.model", ""))).strip()
|
||||
if not self.model:
|
||||
raise ValueError("ReplicateAdapter requires `image.replicate.model` (or `image.model`).")
|
||||
|
||||
# Import lazily so that environments without replicate installed can still run with mock/comfy.
|
||||
import replicate # type: ignore
|
||||
|
||||
self.replicate = replicate
|
||||
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
input_payload: dict[str, Any] = {
|
||||
"prompt": prompt.get("positive", ""),
|
||||
"negative_prompt": prompt.get("negative", ""),
|
||||
}
|
||||
|
||||
# replicate.run is synchronous when wait is handled by the SDK version.
|
||||
output = self.replicate.run(self.model, input=input_payload)
|
||||
|
||||
# Common shapes: [url, ...] or dict-like.
|
||||
image_url = None
|
||||
if isinstance(output, list) and output:
|
||||
image_url = output[0]
|
||||
elif isinstance(output, dict):
|
||||
image_url = output.get("image") or output.get("output") or output.get("url")
|
||||
if not isinstance(image_url, str) or not image_url:
|
||||
raise RuntimeError(f"Unexpected Replicate output shape: {type(output)}")
|
||||
|
||||
r = requests.get(image_url, timeout=60)
|
||||
r.raise_for_status()
|
||||
|
||||
# Always output PNG to satisfy downstream validation `outputs/{task_id}/*.png`.
|
||||
out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
|
||||
# Pillow doesn't provide open_bytes; wrap content into a buffer.
|
||||
from io import BytesIO
|
||||
|
||||
img = Image.open(BytesIO(r.content)).convert("RGB")
|
||||
img.save(str(out_path), format="PNG")
|
||||
return str(out_path)
|
||||
|
||||
21
engine/adapters/image/stability_adapter.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseImageGen
|
||||
|
||||
|
||||
class StabilityAdapter(BaseImageGen):
|
||||
"""
|
||||
Placeholder for Stability AI image generation.
|
||||
Add implementation + dependencies when needed.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
|
||||
def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
|
||||
raise NotImplementedError("StabilityAdapter not implemented yet")
|
||||
|
||||
1
engine/adapters/llm/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
12
engine/adapters/llm/base.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
class BaseLLM:
|
||||
def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> Any:
|
||||
raise NotImplementedError
|
||||
|
||||
def refine_scene(self, scene: Any, context: dict[str, Any] | None = None) -> Any:
|
||||
raise NotImplementedError
|
||||
|
||||
25
engine/adapters/llm/mock_adapter.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from engine.types import Scene
|
||||
|
||||
from .base import BaseLLM
|
||||
|
||||
|
||||
class MockLLM(BaseLLM):
|
||||
def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> list[Scene]:
|
||||
# Simple deterministic scenes for offline development.
|
||||
prompt = (prompt or "").strip()
|
||||
if not prompt:
|
||||
prompt = "a warm city night"
|
||||
return [
|
||||
Scene(image_prompt=f"{prompt},城市夜景,霓虹灯,电影感", video_motion="缓慢推进镜头,轻微摇镜", narration="夜色温柔落在街灯上"),
|
||||
Scene(image_prompt=f"{prompt},咖啡店窗边,暖光,细雨", video_motion="侧向平移,人物轻轻抬头", narration="雨声里藏着一段回忆"),
|
||||
Scene(image_prompt=f"{prompt},桥上远景,车流光轨,温暖", video_motion="拉远全景,光轨流动", narration="我们在光里学会告别"),
|
||||
]
|
||||
|
||||
def refine_scene(self, scene: Scene, context: dict[str, Any] | None = None) -> Scene:
|
||||
# Minimal polish: append a hint.
|
||||
return Scene(image_prompt=scene.image_prompt, video_motion=scene.video_motion, narration=(scene.narration + "(更凝练)")[:30])
|
||||
|
||||
29
engine/adapters/llm/openai_adapter.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from engine.config import AppConfig
|
||||
from engine.script_gen import generate_scenes, refine_scene
|
||||
|
||||
from .base import BaseLLM
|
||||
|
||||
|
||||
class OpenAIAdapter(BaseLLM):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
|
||||
def generate_script(self, prompt: str, context: dict[str, Any] | None = None):
|
||||
# Existing script_gen already enforces JSON schema and length constraints.
|
||||
return generate_scenes(prompt, self.cfg)
|
||||
|
||||
def refine_scene(self, scene: Any, context: dict[str, Any] | None = None):
|
||||
if context is None:
|
||||
context = {}
|
||||
# Context carries needed values to call refine_scene in script_gen.
|
||||
scenes = context.get("scenes")
|
||||
prompt2 = context.get("prompt")
|
||||
target_index = context.get("target_index")
|
||||
if scenes is None or prompt2 is None or target_index is None:
|
||||
raise ValueError("OpenAIAdapter.refine_scene missing context: scenes/prompt/target_index")
|
||||
return refine_scene(prompt=prompt2, scenes=scenes, target_index=int(target_index), cfg=self.cfg)
|
||||
|
||||
1
engine/adapters/tts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
9
engine/adapters/tts/base.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class BaseTTS:
|
||||
def generate(self, text: str, output_path: str | Path) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
28
engine/adapters/tts/edge_adapter.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from engine.audio_gen import synthesize_one
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseTTS
|
||||
|
||||
|
||||
class EdgeTTS(BaseTTS):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
|
||||
def generate(self, text: str, output_path: str | Path) -> str:
|
||||
text = text or " "
|
||||
output_path = Path(output_path)
|
||||
voice = str(self.cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||
rate = str(self.cfg.get("tts.rate", "+0%"))
|
||||
volume = str(self.cfg.get("tts.volume", "+0%"))
|
||||
|
||||
async def _run():
|
||||
asset = await synthesize_one(text, output_path, voice, rate, volume)
|
||||
return str(asset.path)
|
||||
|
||||
return asyncio.run(_run())
|
||||
|
||||
15
engine/adapters/tts/mock_adapter.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .base import BaseTTS
|
||||
|
||||
|
||||
class MockTTS(BaseTTS):
|
||||
def generate(self, text: str, output_path: str | Path) -> str:
|
||||
# No-op for offline tests: return empty path so video adapter skips audio.
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_bytes(b"")
|
||||
return str(output_path)
|
||||
|
||||
1
engine/adapters/video/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
9
engine/adapters/video/base.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class BaseVideoGen:
|
||||
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
18
engine/adapters/video/ltx_adapter.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseVideoGen
|
||||
|
||||
|
||||
class LTXVideoGen(BaseVideoGen):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
|
||||
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||
# Reserved for future: direct image->video generation (LTX / diffusion video).
|
||||
# Current project keeps clip generation via MoviePy for stability.
|
||||
raise NotImplementedError("LTXVideoGen is not implemented yet")
|
||||
|
||||
81
engine/adapters/video/moviepy_adapter.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from moviepy import AudioFileClip, VideoClip
|
||||
from PIL import Image
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
from .base import BaseVideoGen
|
||||
|
||||
|
||||
class MoviePyVideoGen(BaseVideoGen):
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
|
||||
def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Required prompt fields for shot rendering.
|
||||
duration_s = float(prompt.get("duration_s", 3))
|
||||
fps = int(prompt.get("fps", self.cfg.get("video.mock_fps", 24)))
|
||||
audio_path = prompt.get("audio_path")
|
||||
|
||||
# Clip resolution.
|
||||
size = prompt.get("size")
|
||||
if isinstance(size, (list, tuple)) and len(size) == 2:
|
||||
w, h = int(size[0]), int(size[1])
|
||||
else:
|
||||
mock_size = self.cfg.get("video.mock_size", [1024, 576])
|
||||
w, h = int(mock_size[0]), int(mock_size[1])
|
||||
|
||||
base_img = Image.open(image_path).convert("RGB")
|
||||
|
||||
def make_frame(t: float):
|
||||
progress = float(t) / max(duration_s, 1e-6)
|
||||
progress = max(0.0, min(1.0, progress))
|
||||
scale = 1.0 + 0.03 * progress
|
||||
new_w = max(w, int(w * scale))
|
||||
new_h = max(h, int(h * scale))
|
||||
frame = base_img.resize((new_w, new_h), Image.LANCZOS)
|
||||
left = (new_w - w) // 2
|
||||
top = (new_h - h) // 2
|
||||
frame = frame.crop((left, top, left + w, top + h))
|
||||
return np.array(frame)
|
||||
|
||||
video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
|
||||
|
||||
# Optional audio.
|
||||
if audio_path and os.path.exists(str(audio_path)):
|
||||
a = AudioFileClip(str(audio_path))
|
||||
video = video.with_audio(a)
|
||||
else:
|
||||
a = None
|
||||
|
||||
try:
|
||||
video.write_videofile(
|
||||
str(output_path),
|
||||
fps=fps,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="veryfast",
|
||||
threads=2,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
video.close()
|
||||
except Exception:
|
||||
pass
|
||||
if a is not None:
|
||||
try:
|
||||
a.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return str(output_path)
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@@ -186,3 +187,215 @@ class ComfyClient:
|
||||
|
||||
# unreachable
|
||||
# return ComfyResult(prompt_id=prompt_id, output_files=last_files)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Minimal "text->image" helpers (used by shot rendering)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_simple_workflow(
|
||||
prompt_text: str,
|
||||
*,
|
||||
seed: int,
|
||||
ckpt_name: str,
|
||||
width: int,
|
||||
height: int,
|
||||
steps: int = 20,
|
||||
cfg: float = 8.0,
|
||||
sampler_name: str = "euler",
|
||||
scheduler: str = "normal",
|
||||
denoise: float = 1.0,
|
||||
filename_prefix: str = "shot",
|
||||
negative_text: str = "low quality, blurry",
|
||||
) -> dict[str, Any]:
|
||||
# Best-effort workflow. If your ComfyUI nodes/models differ, generation must fallback.
|
||||
return {
|
||||
"3": {
|
||||
"class_type": "KSampler",
|
||||
"inputs": {
|
||||
"seed": int(seed),
|
||||
"steps": int(steps),
|
||||
"cfg": float(cfg),
|
||||
"sampler_name": sampler_name,
|
||||
"scheduler": scheduler,
|
||||
"denoise": float(denoise),
|
||||
"model": ["4", 0],
|
||||
"positive": ["6", 0],
|
||||
"negative": ["7", 0],
|
||||
"latent_image": ["5", 0],
|
||||
},
|
||||
},
|
||||
"4": {
|
||||
"class_type": "CheckpointLoaderSimple",
|
||||
"inputs": {
|
||||
"ckpt_name": ckpt_name,
|
||||
},
|
||||
},
|
||||
"5": {
|
||||
"class_type": "EmptyLatentImage",
|
||||
"inputs": {
|
||||
"width": int(width),
|
||||
"height": int(height),
|
||||
"batch_size": 1,
|
||||
},
|
||||
},
|
||||
"6": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"text": prompt_text,
|
||||
"clip": ["4", 1],
|
||||
},
|
||||
},
|
||||
"7": {
|
||||
"class_type": "CLIPTextEncode",
|
||||
"inputs": {
|
||||
"text": negative_text,
|
||||
"clip": ["4", 1],
|
||||
},
|
||||
},
|
||||
"8": {
|
||||
"class_type": "VAEDecode",
|
||||
"inputs": {
|
||||
"samples": ["3", 0],
|
||||
"vae": ["4", 2],
|
||||
},
|
||||
},
|
||||
"9": {
|
||||
"class_type": "SaveImage",
|
||||
"inputs": {
|
||||
"images": ["8", 0],
|
||||
"filename_prefix": filename_prefix,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _queue_prompt(base_url: str, workflow: dict[str, Any], client_id: str) -> str:
|
||||
r = httpx.post(
|
||||
base_url.rstrip("/") + "/prompt",
|
||||
json={"prompt": workflow, "client_id": client_id},
|
||||
timeout=30.0,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
pid = data.get("prompt_id")
|
||||
if not isinstance(pid, str) or not pid:
|
||||
raise RuntimeError(f"Unexpected /prompt response: {data}")
|
||||
return pid
|
||||
|
||||
|
||||
def _get_history_item(base_url: str, prompt_id: str) -> dict[str, Any] | None:
|
||||
for url in (f"{base_url.rstrip('/')}/history/{prompt_id}", f"{base_url.rstrip('/')}/history"):
|
||||
try:
|
||||
r = httpx.get(url, timeout=30.0)
|
||||
if r.status_code == 404:
|
||||
continue
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
if isinstance(data, dict):
|
||||
if prompt_id in data and isinstance(data[prompt_id], dict):
|
||||
return data[prompt_id]
|
||||
if url.endswith(f"/{prompt_id}") and isinstance(data, dict):
|
||||
return data
|
||||
return None
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _extract_first_image_view_target(history_item: dict[str, Any]) -> tuple[str, str] | None:
|
||||
outputs = history_item.get("outputs")
|
||||
if not isinstance(outputs, dict):
|
||||
return None
|
||||
|
||||
def walk(v: Any) -> list[dict[str, Any]]:
|
||||
found: list[dict[str, Any]] = []
|
||||
if isinstance(v, dict):
|
||||
if isinstance(v.get("filename"), str) and v.get("filename").strip():
|
||||
found.append(v)
|
||||
for vv in v.values():
|
||||
found.extend(walk(vv))
|
||||
elif isinstance(v, list):
|
||||
for vv in v:
|
||||
found.extend(walk(vv))
|
||||
return found
|
||||
|
||||
candidates = walk(outputs)
|
||||
for c in candidates:
|
||||
fn = str(c.get("filename", "")).strip()
|
||||
sf = str(c.get("subfolder", "") or "").strip()
|
||||
if fn:
|
||||
return fn, sf
|
||||
return None
|
||||
|
||||
|
||||
def generate_image(
|
||||
prompt_text: str,
|
||||
output_dir: str | Path,
|
||||
*,
|
||||
cfg: AppConfig | None = None,
|
||||
timeout_s: int = 60,
|
||||
retry: int = 2,
|
||||
width: int | None = None,
|
||||
height: int | None = None,
|
||||
filename_prefix: str = "shot",
|
||||
ckpt_candidates: list[str] | None = None,
|
||||
negative_text: str | None = None,
|
||||
) -> Path:
|
||||
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
||||
base_url = str(cfg2.get("app.comfy_base_url", "http://comfyui:8188")).rstrip("/")
|
||||
|
||||
out_dir = Path(output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if width is None or height is None:
|
||||
mock_size = cfg2.get("video.mock_size", [1024, 576])
|
||||
width = int(width or mock_size[0])
|
||||
height = int(height or mock_size[1])
|
||||
|
||||
if negative_text is None:
|
||||
negative_text = "low quality, blurry"
|
||||
|
||||
if ckpt_candidates is None:
|
||||
ckpt_candidates = [
|
||||
"v1-5-pruned-emaonly.ckpt",
|
||||
"v1-5-pruned-emaonly.safetensors",
|
||||
"sd-v1-5-tiny.safetensors",
|
||||
]
|
||||
|
||||
last_err: Exception | None = None
|
||||
for _attempt in range(max(1, retry)):
|
||||
for ckpt_name in ckpt_candidates:
|
||||
client_id = str(uuid.uuid4())
|
||||
seed = int(uuid.uuid4().int % 2_147_483_647)
|
||||
workflow = _build_simple_workflow(
|
||||
prompt_text,
|
||||
seed=seed,
|
||||
ckpt_name=ckpt_name,
|
||||
width=width,
|
||||
height=height,
|
||||
filename_prefix=filename_prefix,
|
||||
negative_text=negative_text,
|
||||
)
|
||||
try:
|
||||
prompt_id = _queue_prompt(base_url, workflow, client_id)
|
||||
start = time.time()
|
||||
while time.time() - start < timeout_s:
|
||||
item = _get_history_item(base_url, prompt_id)
|
||||
if isinstance(item, dict):
|
||||
img_target = _extract_first_image_view_target(item)
|
||||
if img_target:
|
||||
filename, subfolder = img_target
|
||||
view_url = f"{base_url}/view?filename={filename}&subfolder={subfolder}"
|
||||
img_resp = httpx.get(view_url, timeout=60.0)
|
||||
img_resp.raise_for_status()
|
||||
image_path = out_dir / filename
|
||||
image_path.write_bytes(img_resp.content)
|
||||
return image_path
|
||||
time.sleep(1.0)
|
||||
except Exception as e:
|
||||
last_err = e
|
||||
continue
|
||||
|
||||
raise RuntimeError(f"ComfyUI image generation failed after retries: {last_err}")
|
||||
|
||||
@@ -12,13 +12,14 @@ from typing import Any
|
||||
from moviepy import ImageClip
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from engine.audio_gen import synthesize_scenes
|
||||
from engine.model_factory import get_model
|
||||
from engine.prompt_injector import inject_prompt
|
||||
from engine.adapters.image.mock_adapter import MockImageGen
|
||||
from engine.assembler import assemble_clips
|
||||
from engine.comfy_client import ComfyClient
|
||||
from engine.config import AppConfig
|
||||
from engine.director import scenes_to_shots
|
||||
from engine.shot_executor import render_shot
|
||||
from engine.script_gen import generate_scenes, refine_scene
|
||||
from engine.task_store import create_task, update_shot_status, update_task_status
|
||||
from engine.types import Scene
|
||||
from engine.video_editor import Segment, render_final
|
||||
@@ -28,13 +29,15 @@ def _emit(line: str) -> None:
|
||||
print(line, flush=True)
|
||||
|
||||
|
||||
def _emit_scene(scene_idx: int, scene: Scene) -> None:
|
||||
def _emit_scene(scene_idx: int, scene: Scene, extra: dict[str, Any] | None = None) -> None:
|
||||
payload = {
|
||||
"index": scene_idx,
|
||||
"image_prompt": scene.image_prompt,
|
||||
"video_motion": scene.video_motion,
|
||||
"narration": scene.narration,
|
||||
}
|
||||
if extra:
|
||||
payload.update(extra)
|
||||
_emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
|
||||
|
||||
|
||||
@@ -136,9 +139,50 @@ def _fallback_scenes(prompt: str) -> list[Scene]:
|
||||
]
|
||||
|
||||
|
||||
def _generate_scene_preview(
|
||||
*,
|
||||
cfg: AppConfig,
|
||||
out_dir: Path,
|
||||
image_prompt: str,
|
||||
style: str | None,
|
||||
character: str | None,
|
||||
) -> str | None:
|
||||
try:
|
||||
image_gen = get_model("image", cfg)
|
||||
except Exception:
|
||||
image_gen = get_model("image_fallback", cfg)
|
||||
|
||||
global_cfg = dict(cfg.get("global", {}) or {})
|
||||
if style:
|
||||
global_cfg["style"] = style
|
||||
if character:
|
||||
global_cfg["character"] = character
|
||||
|
||||
prompt_obj = inject_prompt(global_cfg, {"prompt": image_prompt})
|
||||
try:
|
||||
image_path = image_gen.generate(prompt_obj, out_dir)
|
||||
except Exception:
|
||||
try:
|
||||
image_path = get_model("image_fallback", cfg).generate(prompt_obj, out_dir)
|
||||
except Exception:
|
||||
# Last-resort hard fallback: never block script stage due to preview failures.
|
||||
image_path = MockImageGen().generate(prompt_obj, out_dir)
|
||||
|
||||
p = Path(str(image_path))
|
||||
if not p.exists():
|
||||
return None
|
||||
return f"/api/static/{out_dir.name}/{p.name}"
|
||||
|
||||
|
||||
def _has_llm_key(cfg: AppConfig) -> bool:
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
||||
return bool(os.environ.get(api_key_env))
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
|
||||
# Env var name case.
|
||||
if os.environ.get(api_key_env):
|
||||
return True
|
||||
# Literal key case (DashScope / OpenAI-compatible).
|
||||
if api_key_env.startswith("sk-"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _parse_scenes_from_obj(obj: Any) -> list[Scene]:
|
||||
@@ -239,7 +283,8 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
|
||||
# fallback scenes still should include global injection
|
||||
scenes = _fallback_scenes(prompt)
|
||||
else:
|
||||
scenes = generate_scenes(prompt2, cfg)
|
||||
llm = get_model("llm", cfg)
|
||||
scenes = llm.generate_script(prompt2, context=None)
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
_emit("SCRIPT_BEGIN")
|
||||
@@ -249,7 +294,14 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
|
||||
video_motion=s.video_motion,
|
||||
narration=s.narration,
|
||||
)
|
||||
_emit_scene(idx, s2)
|
||||
preview_url = _generate_scene_preview(
|
||||
cfg=cfg,
|
||||
out_dir=out_dir,
|
||||
image_prompt=s2.image_prompt,
|
||||
style=style,
|
||||
character=character,
|
||||
)
|
||||
_emit_scene(idx, s2, extra={"preview_url": preview_url or ""})
|
||||
_emit("SCRIPT_END")
|
||||
(out_dir / "scenes.json").write_text(
|
||||
json.dumps(
|
||||
@@ -292,8 +344,9 @@ def step_refine(
|
||||
narration=(s.narration + "(更凝练)")[:30],
|
||||
)
|
||||
else:
|
||||
# Ensure globals are visible to LLM, and inject to output image prompt.
|
||||
refined0 = refine_scene(prompt=prompt2, scenes=scenes, target_index=target_index, cfg=cfg)
|
||||
llm = get_model("llm", cfg)
|
||||
# Context carries prompt + scenes for consistent refinement.
|
||||
refined0 = llm.refine_scene(scenes[target_index - 1], context={"prompt": prompt2, "scenes": scenes, "target_index": target_index})
|
||||
refined = Scene(
|
||||
image_prompt=_decorate_image_prompt(refined0.image_prompt, style=style, character=character),
|
||||
video_motion=refined0.video_motion,
|
||||
@@ -301,7 +354,14 @@ def step_refine(
|
||||
)
|
||||
|
||||
# Keep the original index for frontend replacement.
|
||||
_emit_scene(scene_index, refined)
|
||||
preview_url = _generate_scene_preview(
|
||||
cfg=cfg,
|
||||
out_dir=out_dir,
|
||||
image_prompt=refined.image_prompt,
|
||||
style=style,
|
||||
character=character,
|
||||
)
|
||||
_emit_scene(scene_index, refined, extra={"preview_url": preview_url or ""})
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / f"refine_scene_{scene_index}.json").write_text(
|
||||
json.dumps(
|
||||
|
||||
80
engine/model_factory.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from engine.config import AppConfig
|
||||
|
||||
|
||||
def _provider(cfg: AppConfig, path: str, default: str) -> str:
|
||||
env_map = {
|
||||
"llm.provider": "ENGINE_LLM_PROVIDER",
|
||||
"image.provider": "ENGINE_IMAGE_PROVIDER",
|
||||
"image_fallback.provider": "ENGINE_IMAGE_FALLBACK_PROVIDER",
|
||||
"video.provider": "ENGINE_VIDEO_PROVIDER",
|
||||
"tts.provider": "ENGINE_TTS_PROVIDER",
|
||||
}
|
||||
env_key = env_map.get(path)
|
||||
if env_key:
|
||||
env_val = str(os.environ.get(env_key, "")).strip()
|
||||
if env_val:
|
||||
return env_val
|
||||
v = cfg.get(path, default)
|
||||
return str(v or default).strip() or default
|
||||
|
||||
|
||||
def get_model(name: str, cfg: AppConfig) -> Any:
|
||||
if name == "llm":
|
||||
provider = _provider(cfg, "llm.provider", "openai")
|
||||
if provider == "mock":
|
||||
from engine.adapters.llm.mock_adapter import MockLLM
|
||||
|
||||
return MockLLM()
|
||||
from engine.adapters.llm.openai_adapter import OpenAIAdapter
|
||||
|
||||
return OpenAIAdapter(cfg)
|
||||
|
||||
if name in ("image", "image_fallback"):
|
||||
section = "image" if name == "image" else "image_fallback"
|
||||
# Important: fallback must default to mock, not follow primary image provider.
|
||||
provider_default = "mock" if name == "image_fallback" else _provider(cfg, "image.provider", "mock")
|
||||
provider = _provider(cfg, f"{section}.provider", provider_default)
|
||||
if provider == "comfy":
|
||||
from engine.adapters.image.comfy_adapter import ComfyAdapter
|
||||
|
||||
return ComfyAdapter(cfg)
|
||||
if provider == "replicate":
|
||||
from engine.adapters.image.replicate_adapter import ReplicateAdapter
|
||||
|
||||
return ReplicateAdapter(cfg)
|
||||
if provider == "openai":
|
||||
from engine.adapters.image.openai_image_adapter import OpenAIImageAdapter
|
||||
|
||||
return OpenAIImageAdapter(cfg)
|
||||
|
||||
from engine.adapters.image.mock_adapter import MockImageGen
|
||||
|
||||
return MockImageGen()
|
||||
|
||||
if name == "video":
|
||||
provider = _provider(cfg, "video.provider", "moviepy")
|
||||
if provider == "ltx":
|
||||
from engine.adapters.video.ltx_adapter import LTXVideoGen
|
||||
|
||||
return LTXVideoGen(cfg)
|
||||
from engine.adapters.video.moviepy_adapter import MoviePyVideoGen
|
||||
|
||||
return MoviePyVideoGen(cfg)
|
||||
|
||||
if name == "tts":
|
||||
provider = _provider(cfg, "tts.provider", "edge")
|
||||
if provider == "mock":
|
||||
from engine.adapters.tts.mock_adapter import MockTTS
|
||||
|
||||
return MockTTS()
|
||||
from engine.adapters.tts.edge_adapter import EdgeTTS
|
||||
|
||||
return EdgeTTS(cfg)
|
||||
|
||||
raise ValueError(f"Unknown model adapter name: {name}")
|
||||
|
||||
23
engine/prompt_injector.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
def inject_prompt(global_cfg: dict[str, Any] | None, scene: dict[str, Any]) -> dict[str, str]:
|
||||
"""
|
||||
Unified positive/negative prompt builder.
|
||||
Note: current pipeline already injects some globals into `scene["image_prompt"]`.
|
||||
"""
|
||||
global_cfg = global_cfg or {}
|
||||
character = str(global_cfg.get("character", "") or "").strip()
|
||||
style = str(global_cfg.get("style", "") or "").strip()
|
||||
negative = str(global_cfg.get("negative_prompt", "") or "").strip()
|
||||
|
||||
base = str(scene.get("prompt") or scene.get("image_prompt") or "").strip()
|
||||
if not base:
|
||||
base = str(scene.get("image_prompt") or "")
|
||||
|
||||
positive_parts = [p for p in [character, style, base] if p]
|
||||
positive = ", ".join(positive_parts).strip(", ")
|
||||
return {"positive": positive, "negative": negative}
|
||||
|
||||
80
engine/render_pipeline.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from engine.model_factory import get_model
|
||||
from engine.prompt_injector import inject_prompt
|
||||
from engine.adapters.image.mock_adapter import MockImageGen
|
||||
|
||||
|
||||
def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
|
||||
out_dir = Path(out_dir)
|
||||
clips_dir = out_dir / "clips"
|
||||
audio_dir = out_dir / "audio"
|
||||
clips_dir.mkdir(parents=True, exist_ok=True)
|
||||
audio_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shot_id = str(shot.get("shot_id", "unknown"))
|
||||
duration_s = float(shot.get("duration", 3))
|
||||
narration = str(shot.get("tts", "")).strip()
|
||||
|
||||
# Models from config.
|
||||
image_fallback_gen = get_model("image_fallback", cfg)
|
||||
try:
|
||||
image_gen = get_model("image", cfg)
|
||||
except Exception as e:
|
||||
# Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
|
||||
print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
|
||||
image_gen = image_fallback_gen
|
||||
tts = get_model("tts", cfg)
|
||||
video_gen = get_model("video", cfg)
|
||||
|
||||
# Prompt injection.
|
||||
global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
|
||||
prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
|
||||
positive_prompt = prompt_obj.get("positive", "")
|
||||
# Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
|
||||
enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
|
||||
if enrich_style not in positive_prompt:
|
||||
positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
|
||||
prompt_obj["positive"] = positive_prompt
|
||||
|
||||
# 1) image
|
||||
try:
|
||||
image_path = image_gen.generate(prompt_obj, out_dir)
|
||||
except Exception as e:
|
||||
# Config-driven fallback; keeps provider switching non-invasive.
|
||||
print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
|
||||
try:
|
||||
image_path = image_fallback_gen.generate(prompt_obj, out_dir)
|
||||
except Exception as e2:
|
||||
print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
|
||||
image_path = MockImageGen().generate(prompt_obj, out_dir)
|
||||
|
||||
scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
|
||||
print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")
|
||||
|
||||
# 2) audio (optional)
|
||||
audio_path = None
|
||||
if narration:
|
||||
# Use a stable per-shot audio filename.
|
||||
ap = audio_dir / f"shot_{shot_id}.mp3"
|
||||
try:
|
||||
audio_path = tts.generate(narration, ap)
|
||||
except Exception as e:
|
||||
# Don't fail the whole render due to TTS issues.
|
||||
print(f"[WARN] TTS failed, continue without audio: {e}")
|
||||
audio_path = None
|
||||
|
||||
# 3) clip
|
||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||
prompt = {
|
||||
"duration_s": duration_s,
|
||||
"fps": int(cfg.get("video.mock_fps", 24)),
|
||||
"audio_path": audio_path,
|
||||
"size": cfg.get("video.mock_size", None),
|
||||
}
|
||||
clip_path = video_gen.generate(image_path, prompt, clip_out)
|
||||
return clip_path
|
||||
|
||||
@@ -10,6 +10,38 @@ from .config import AppConfig
|
||||
from .types import Scene
|
||||
|
||||
|
||||
def _looks_like_api_key(v: str) -> bool:
|
||||
vv = (v or "").strip()
|
||||
# Common prefixes: DashScope uses "sk-..."; we keep it minimal and permissive.
|
||||
return bool(vv) and vv.startswith("sk-")
|
||||
|
||||
|
||||
def _looks_like_url(v: str) -> bool:
|
||||
vv = (v or "").strip()
|
||||
return vv.startswith("http://") or vv.startswith("https://")
|
||||
|
||||
|
||||
def _resolve_openai_credentials(cfg: AppConfig) -> tuple[str, str | None]:
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "").strip()
|
||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL") or "").strip()
|
||||
|
||||
# 1) Resolve api_key: allow both "env var name" and "literal key" for safety.
|
||||
api_key = os.environ.get(api_key_env) if api_key_env else None
|
||||
if not api_key and api_key_env and _looks_like_api_key(api_key_env):
|
||||
api_key = api_key_env
|
||||
|
||||
if not api_key:
|
||||
raise RuntimeError(f"Missing OpenAI compatible API key (env={api_key_env})")
|
||||
|
||||
# 2) Resolve base_url: allow both "env var name" and "literal URL".
|
||||
base_url = os.environ.get(base_url_env) if base_url_env else None
|
||||
if not base_url and base_url_env and _looks_like_url(base_url_env):
|
||||
base_url = base_url_env
|
||||
if base_url:
|
||||
base_url = str(base_url).strip() or None
|
||||
return str(api_key), base_url
|
||||
|
||||
|
||||
def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
|
||||
return f"""你是一个专业短视频编剧与分镜师。
|
||||
请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
|
||||
@@ -56,17 +88,13 @@ def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
|
||||
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
||||
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
||||
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
|
||||
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
||||
|
||||
api_key = os.environ.get(api_key_env)
|
||||
if not api_key:
|
||||
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
|
||||
api_key, base_url = _resolve_openai_credentials(cfg)
|
||||
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=os.environ.get(base_url_env) or None,
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
resp = client.chat.completions.create(
|
||||
@@ -105,17 +133,13 @@ def refine_scene(*, prompt: str, scenes: list[Scene], target_index: int, cfg: Ap
|
||||
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
||||
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
||||
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
|
||||
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
||||
|
||||
api_key = os.environ.get(api_key_env)
|
||||
if not api_key:
|
||||
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
|
||||
api_key, base_url = _resolve_openai_credentials(cfg)
|
||||
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=os.environ.get(base_url_env) or None,
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
scenes_payload = [
|
||||
|
||||
@@ -1,42 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx
|
||||
import numpy as np
|
||||
from moviepy import AudioFileClip, VideoClip
|
||||
from PIL import Image
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .audio_gen import synthesize_one
|
||||
from .comfy_client import ComfyClient
|
||||
from .comfy_client import generate_image as comfy_generate_image
|
||||
from .config import AppConfig
|
||||
from .render_pipeline import render_shot as render_shot_pipeline
|
||||
|
||||
|
||||
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
|
||||
if audio.duration is None or video.duration is None:
|
||||
return video.with_audio(audio)
|
||||
if audio.duration > video.duration:
|
||||
video = video.with_effects([vfx.Loop(duration=audio.duration)])
|
||||
elif video.duration > audio.duration:
|
||||
video = video.subclipped(0, audio.duration)
|
||||
return video.with_audio(audio)
|
||||
ASSETS_DIR = "assets"
|
||||
DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
|
||||
|
||||
|
||||
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
|
||||
return (
|
||||
TextClip(
|
||||
text=text,
|
||||
font_size=44,
|
||||
color="white",
|
||||
stroke_color="black",
|
||||
stroke_width=2,
|
||||
size=(int(size[0] * 0.92), None),
|
||||
method="caption",
|
||||
)
|
||||
.with_position(("center", "bottom"))
|
||||
.with_duration(duration)
|
||||
.with_opacity(0.95)
|
||||
)
|
||||
def ensure_demo_image() -> None:
|
||||
os.makedirs(ASSETS_DIR, exist_ok=True)
|
||||
if os.path.exists(DEMO_IMAGE):
|
||||
return
|
||||
|
||||
# Simple placeholder image source.
|
||||
url = "https://picsum.photos/1280/720"
|
||||
with urlopen(url, timeout=30) as resp:
|
||||
data = resp.read()
|
||||
|
||||
with open(DEMO_IMAGE, "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
def generate_image_mock(prompt: str) -> str:
|
||||
# Keep interface compatible with the requested interface.
|
||||
_ = prompt
|
||||
ensure_demo_image()
|
||||
return DEMO_IMAGE
|
||||
|
||||
|
||||
def enrich_prompt(prompt_text: str) -> str:
|
||||
style = "cinematic, ultra realistic, 4k, detailed lighting"
|
||||
pt = (prompt_text or "").strip()
|
||||
if not pt:
|
||||
return style
|
||||
return f"{pt}, {style}"
|
||||
|
||||
|
||||
async def _render_shot_async(
|
||||
@@ -55,49 +66,102 @@ async def _render_shot_async(
|
||||
|
||||
shot_id = str(shot.get("shot_id", "unknown"))
|
||||
image_prompt = str(shot.get("image_prompt", "")).strip()
|
||||
motion = str(shot.get("motion", "")).strip()
|
||||
prompt_text = str(shot.get("prompt", image_prompt) or image_prompt).strip()
|
||||
tts_text = str(shot.get("tts", "")).strip()
|
||||
duration_s = max(1.0, float(shot.get("duration", 3)))
|
||||
|
||||
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||
rate = str(cfg.get("tts.rate", "+0%"))
|
||||
volume = str(cfg.get("tts.volume", "+0%"))
|
||||
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
||||
audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume)
|
||||
audio_asset: Any | None = None
|
||||
if tts_text:
|
||||
audio_path = audio_dir / f"shot_{shot_id}.mp3"
|
||||
audio_asset = await synthesize_one(tts_text, audio_path, voice, rate, volume)
|
||||
|
||||
# Use config-defined output resolution for stable concatenation.
|
||||
mock_size = cfg.get("video.mock_size", [1024, 576])
|
||||
w, h = int(mock_size[0]), int(mock_size[1])
|
||||
fps = int(cfg.get("video.mock_fps", 24))
|
||||
|
||||
if audio_asset and audio_asset.duration_s:
|
||||
duration_s = max(duration_s, float(audio_asset.duration_s))
|
||||
|
||||
# shot -> image (ComfyUI first; fallback to demo.jpg)
|
||||
image_path: str
|
||||
if mock:
|
||||
from engine.main import _ensure_mock_image, _make_mock_video # local import to avoid circular at module import
|
||||
|
||||
mock_size = cfg.get("video.mock_size", [1024, 576])
|
||||
w, h = int(mock_size[0]), int(mock_size[1])
|
||||
mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
|
||||
fps = int(cfg.get("video.mock_fps", 24))
|
||||
raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4"
|
||||
_make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps)
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
else:
|
||||
comfy = ComfyClient(cfg)
|
||||
wf = comfy.load_workflow()
|
||||
seed = random.randint(1, 2_147_483_647)
|
||||
wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None)
|
||||
result = await comfy.run_workflow(wf_i)
|
||||
candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
|
||||
raw_video_path = candidates[0] if candidates else result.output_files[0]
|
||||
|
||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||
v = VideoFileClip(str(raw_video_path))
|
||||
a = AudioFileClip(str(audio_asset.path))
|
||||
try:
|
||||
v2 = _fit_video_to_audio(v, a)
|
||||
w2, h2 = v2.size
|
||||
subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s)
|
||||
comp = CompositeVideoClip([v2, subtitle])
|
||||
try:
|
||||
comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast")
|
||||
finally:
|
||||
comp.close()
|
||||
enriched = enrich_prompt(prompt_text)
|
||||
# Store generated images directly under outputs/{task_id}
|
||||
# (as required by verification: outputs/{task_id}/*.png).
|
||||
image_path = str(
|
||||
comfy_generate_image(
|
||||
enriched,
|
||||
out_dir,
|
||||
cfg=cfg,
|
||||
timeout_s=60,
|
||||
retry=2,
|
||||
filename_prefix=f"shot_{shot_id}",
|
||||
)
|
||||
)
|
||||
print(f"[SHOT_RENDER] {shot_id} -> image generated: {image_path}")
|
||||
except Exception as e:
|
||||
print(f"[WARN] Comfy failed, fallback to demo: {e}")
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
|
||||
# Ensure image exists before rendering.
|
||||
if not image_path or not os.path.exists(image_path):
|
||||
image_path = generate_image_mock(prompt_text)
|
||||
base_img = Image.open(image_path).convert("RGB")
|
||||
|
||||
def make_frame(t: float):
|
||||
# Subtle zoom-in from 1.00 to ~1.03 over the clip duration.
|
||||
progress = float(t) / max(duration_s, 1e-6)
|
||||
progress = max(0.0, min(1.0, progress))
|
||||
scale = 1.0 + 0.03 * progress
|
||||
|
||||
new_w = max(w, int(w * scale))
|
||||
new_h = max(h, int(h * scale))
|
||||
|
||||
frame = base_img.resize((new_w, new_h), Image.LANCZOS)
|
||||
left = (new_w - w) // 2
|
||||
top = (new_h - h) // 2
|
||||
frame = frame.crop((left, top, left + w, top + h))
|
||||
return np.array(frame)
|
||||
|
||||
# image -> video
|
||||
video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
|
||||
|
||||
# optional audio -> clip
|
||||
audio_clip: AudioFileClip | None = None
|
||||
if audio_asset and os.path.exists(str(audio_asset.path)):
|
||||
audio_clip = AudioFileClip(str(audio_asset.path))
|
||||
video = video.with_audio(audio_clip)
|
||||
|
||||
# output
|
||||
clip_out = clips_dir / f"shot_{shot_id}.mp4"
|
||||
print(f"[SHOT_RENDER] {shot_id} -> {clip_out}")
|
||||
try:
|
||||
video.write_videofile(
|
||||
str(clip_out),
|
||||
fps=fps,
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
preset="veryfast",
|
||||
threads=2,
|
||||
)
|
||||
finally:
|
||||
v.close()
|
||||
a.close()
|
||||
try:
|
||||
video.close()
|
||||
except Exception:
|
||||
pass
|
||||
if audio_clip is not None:
|
||||
try:
|
||||
audio_clip.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return str(clip_out)
|
||||
|
||||
|
||||
@@ -109,5 +173,5 @@ def render_shot(
|
||||
mock: bool = False,
|
||||
) -> str:
|
||||
cfg2 = cfg or AppConfig.load("./configs/config.yaml")
|
||||
return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))
|
||||
return render_shot_pipeline(shot, cfg2, output_dir, mock=mock)
|
||||
|
||||
|
||||
18
outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "'06b0a90f-c964-4a88-8e80-6ff668e031b3'",
|
||||
"status": "failed",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "running"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "pending"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "pending"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/final.mp4
Normal file
18
outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "'13c9b724-77e3-4553-aebf-dfc845dd17c1'",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
19
outputs/0d546f5e-0274-4372-b91d-fb64ace85d49/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/final.mp4
Normal file
19
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
18
outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "3ef0c0b8-c90f-49a8-88e4-e8ca735312f0",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/final.mp4
Normal file
10
outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/task.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"task_id": "3f82b1ce-da18-4f82-9147-25eb0abeaf2c",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/62da5541-43d2-4ead-a243-e68345877dff/final.mp4
Normal file
18
outputs/62da5541-43d2-4ead-a243-e68345877dff/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "62da5541-43d2-4ead-a243-e68345877dff",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/final.mp4
Normal file
19
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
18
outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "7b8255ea-ed2f-4356-8a57-d5c77e351351",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
19
outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
19
outputs/ab4bb47e-cf35-4a99-977f-63097bdac9ed/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
BIN
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/final.mp4
Normal file
19
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |
18
outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/task.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"task_id": "ab68ccf6-0de0-4465-b4d7-1843f88d0201",
|
||||
"status": "done",
|
||||
"shots": [
|
||||
{
|
||||
"shot_id": "scene_01_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_02_01",
|
||||
"status": "done"
|
||||
},
|
||||
{
|
||||
"shot_id": "scene_03_01",
|
||||
"status": "done"
|
||||
}
|
||||
]
|
||||
}
|
||||
19
outputs/b4d67faf-fabe-4ead-8803-7bfbb7ee4ad2/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "Cinematic night shot, wet street reflecting neon lights, Xiao Lin walking away, beige trench coat, white scarf, cold tone background, bokeh.",
|
||||
"video_motion": "镜头缓慢跟随背影移动,雨丝飘落。",
|
||||
"narration": "霓虹灯下城市结束喧嚣,夜色格外温柔。"
|
||||
},
|
||||
{
|
||||
"image_prompt": "Medium shot inside convenience store, warm yellow lighting, Xiao Lin holding hot coffee, steam rising, soft facial lighting, cinematic depth of field.",
|
||||
"video_motion": "镜头缓缓推进,捕捉蒸汽升腾动态。",
|
||||
"narration": "街角便利店的灯光,是深夜里最暖的守候。"
|
||||
},
|
||||
{
|
||||
"image_prompt": "Close-up of Xiao Lin smiling slightly, blurred city light bokeh background, beige coat collar visible, warm atmosphere, high quality portrait.",
|
||||
"video_motion": "固定镜头微距拍摄,眼神自然眨动。",
|
||||
"narration": "捧一杯热茶,原来幸福就藏在平凡夜晚里。"
|
||||
}
|
||||
]
|
||||
}
|
||||
19
outputs/b77f2668-6451-47ff-81da-48b498ecb436/scenes.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"scenes": [
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,城市夜景,霓虹灯,电影感",
|
||||
"video_motion": "缓慢推进镜头,轻微摇镜",
|
||||
"narration": "夜色温柔落在街灯上"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,咖啡店窗边,暖光,细雨",
|
||||
"video_motion": "侧向平移,人物轻轻抬头",
|
||||
"narration": "雨声里藏着一段回忆"
|
||||
},
|
||||
{
|
||||
"image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息,并保持三分镜主角一致。,桥上远景,车流光轨,温暖",
|
||||
"video_motion": "拉远全景,光轨流动",
|
||||
"narration": "我们在光里学会告别"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
After Width: | Height: | Size: 1.1 MiB |
|
After Width: | Height: | Size: 1.1 MiB |