diff --git a/assets/demo.jpg b/assets/demo.jpg
new file mode 100644
index 0000000..3526080
Binary files /dev/null and b/assets/demo.jpg differ
diff --git a/configs/config.yaml b/configs/config.yaml
index 11ab367..e1bbc88 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -4,6 +4,38 @@ app:
   # ComfyUI output directory on the same machine running this code
   comfy_output_dir: "./ComfyUI/output"
 
+global:
+  # Used by prompt_injector + adapters.
+  style: ""
+  character: ""
+  negative_prompt: ""
+
+llm:
+  # Controls /script + /refine generation.
+  provider: "mock" # "openai" to enable OpenAI/DashScope calls
+
+image:
+  provider: "mock" # "mock" | "comfy" | "replicate" | "openai"
+  # Generic model name (used by some providers as fallback).
+  model: ""
+
+  replicate:
+    # Example: "stability-ai/sdxl"
+    model: "stability-ai/sdxl"
+
+  openai:
+    # Example: "gpt-image-1"
+    model: "gpt-image-1"
+
+image_fallback:
+  provider: "mock"
+
+video:
+  provider: "moviepy"
+
+tts:
+  provider: "edge"
+
 openai:
   # Prefer environment variables in real deployments.
   # OPENAI_API_KEY must be set; OPENAI_BASE_URL optional (for DeepSeek / other gateways).
diff --git a/engine/adapters/__init__.py b/engine/adapters/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/engine/adapters/__init__.py
@@ -0,0 +1 @@
+
diff --git a/engine/adapters/image/__init__.py b/engine/adapters/image/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/engine/adapters/image/__init__.py
@@ -0,0 +1 @@
+
diff --git a/engine/adapters/image/base.py b/engine/adapters/image/base.py
new file mode 100644
index 0000000..b29151b
--- /dev/null
+++ b/engine/adapters/image/base.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+class BaseImageGen:
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        raise NotImplementedError
+
diff --git a/engine/adapters/image/comfy_adapter.py b/engine/adapters/image/comfy_adapter.py
new file mode 100644
index 0000000..6a44fd6
--- /dev/null
+++ b/engine/adapters/image/comfy_adapter.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from engine.comfy_client import generate_image as comfy_generate_image
+from engine.config import AppConfig
+
+from .base import BaseImageGen
+from .mock_adapter import MockImageGen
+
+
+class ComfyAdapter(BaseImageGen):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+        self.fallback = MockImageGen()
+
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        positive = str(prompt.get("positive", "") or "")
+        negative = str(prompt.get("negative", "") or "")
+        try:
+            return str(
+                comfy_generate_image(
+                    positive,
+                    output_dir,
+                    negative_text=negative or None,
+                    cfg=self.cfg,
+                    timeout_s=60,
+                    retry=2,
+                    filename_prefix="shot",
+                )
+            )
+        except Exception as e:
+            # Let render_pipeline do configured fallback.
+            raise
+
diff --git a/engine/adapters/image/mock_adapter.py b/engine/adapters/image/mock_adapter.py
new file mode 100644
index 0000000..7d14b49
--- /dev/null
+++ b/engine/adapters/image/mock_adapter.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import os
+import uuid
+from pathlib import Path
+from urllib.request import urlopen
+
+from PIL import Image
+
+from .base import BaseImageGen
+
+
+ASSETS_DIR = "assets"
+DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
+
+
+def ensure_demo_image() -> None:
+    os.makedirs(ASSETS_DIR, exist_ok=True)
+    if os.path.exists(DEMO_IMAGE):
+        return
+
+    url = "https://picsum.photos/1280/720"
+    with urlopen(url, timeout=30) as resp:
+        data = resp.read()
+    with open(DEMO_IMAGE, "wb") as f:
+        f.write(data)
+
+
+class MockImageGen(BaseImageGen):
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        # prompt is accepted for interface consistency; mock uses only demo.jpg.
+        _ = prompt
+        ensure_demo_image()
+        out_dir = Path(output_dir)
+        out_dir.mkdir(parents=True, exist_ok=True)
+        out_path = out_dir / f"shot_{uuid.uuid4().hex}.png"
+        try:
+            # Convert to PNG so verification criteria can match *.png.
+            img = Image.open(DEMO_IMAGE).convert("RGB")
+            img.save(str(out_path), format="PNG")
+        except Exception:
+            # Last-resort: if PNG conversion fails, still write a best-effort copy.
+            out_path.write_bytes(Path(DEMO_IMAGE).read_bytes())
+        return str(out_path)
+
diff --git a/engine/adapters/image/openai_image_adapter.py b/engine/adapters/image/openai_image_adapter.py
new file mode 100644
index 0000000..da23e1a
--- /dev/null
+++ b/engine/adapters/image/openai_image_adapter.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import os
+import uuid
+from io import BytesIO
+from pathlib import Path
+from typing import Any
+
+import requests
+from PIL import Image
+
+from engine.config import AppConfig
+
+from .base import BaseImageGen
+
+
+class OpenAIImageAdapter(BaseImageGen):
+    """
+    Optional image provider adapter using OpenAI Images API (or OpenAI-compatible gateways).
+    Requires `openai` python package and a configured API key via environment variables.
+    """
+
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+        # Expected keys (configurable):
+        # - image.openai.model
+        # - openai.api_key_env / openai.base_url_env (reuses existing engine/script_gen config fields)
+        self.model = str(cfg.get("image.openai.model", cfg.get("image.model", ""))).strip()
+        if not self.model:
+            raise ValueError("OpenAIImageAdapter requires `image.openai.model` (or `image.model`).")
+
+        api_key_env_or_literal = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
+        # Support both:
+        # - env var name (e.g. OPENAI_API_KEY)
+        # - literal API key (e.g. starts with `sk-...`) for quick local POCs.
+        if api_key_env_or_literal.startswith("sk-"):
+            api_key = api_key_env_or_literal
+        else:
+            api_key = os.environ.get(api_key_env_or_literal)
+        if not api_key:
+            raise RuntimeError(f"OpenAIImageAdapter missing API key: `{api_key_env_or_literal}`")
+        self.api_key = api_key
+
+        base_url_env_or_literal = str(cfg.get("openai.base_url_env", "https://api.openai.com/v1")).strip()
+        self.base_url = base_url_env_or_literal.rstrip("/") if base_url_env_or_literal else "https://api.openai.com/v1"
+
+        # Lazy import to avoid hard dependency for mock/comfy users.
+        from openai import OpenAI  # type: ignore
+
+        self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
+
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        positive = prompt.get("positive", "")
+        negative = prompt.get("negative", "")
+        # OpenAI Images API generally doesn't expose a dedicated negative_prompt field.
+        # To keep interface consistency, embed negative hints into the prompt text.
+        if negative:
+            prompt_text = f"{positive}\nNegative prompt: {negative}"
+        else:
+            prompt_text = positive
+
+        result = self.client.images.generate(model=self.model, prompt=prompt_text)
+
+        # OpenAI SDK: result.data[0].url
+        url: str | None = None
+        try:
+            url = result.data[0].url  # type: ignore[attr-defined]
+        except Exception:
+            pass
+        if not url:
+            raise RuntimeError("OpenAIImageAdapter unexpected response: missing image url")
+
+        r = requests.get(url, timeout=60)
+        r.raise_for_status()
+
+        out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
+        img = Image.open(BytesIO(r.content)).convert("RGB")
+        img.save(str(out_path), format="PNG")
+        return str(out_path)
+
diff --git a/engine/adapters/image/replicate_adapter.py b/engine/adapters/image/replicate_adapter.py
new file mode 100644
index 0000000..70c26b4
--- /dev/null
+++ b/engine/adapters/image/replicate_adapter.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import uuid
+from pathlib import Path
+from typing import Any
+
+import requests
+from PIL import Image
+
+from engine.config import AppConfig
+
+from .base import BaseImageGen
+
+
+class ReplicateAdapter(BaseImageGen):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+        # Expected: image.replicate.model
+        self.model = str(cfg.get("image.replicate.model", cfg.get("image.model", ""))).strip()
+        if not self.model:
+            raise ValueError("ReplicateAdapter requires `image.replicate.model` (or `image.model`).")
+
+        # Import lazily so that environments without replicate installed can still run with mock/comfy.
+        import replicate  # type: ignore
+
+        self.replicate = replicate
+
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        input_payload: dict[str, Any] = {
+            "prompt": prompt.get("positive", ""),
+            "negative_prompt": prompt.get("negative", ""),
+        }
+
+        # replicate.run is synchronous when wait is handled by the SDK version.
+        output = self.replicate.run(self.model, input=input_payload)
+
+        # Common shapes: [url, ...] or dict-like.
+        image_url = None
+        if isinstance(output, list) and output:
+            image_url = output[0]
+        elif isinstance(output, dict):
+            image_url = output.get("image") or output.get("output") or output.get("url")
+        if not isinstance(image_url, str) or not image_url:
+            raise RuntimeError(f"Unexpected Replicate output shape: {type(output)}")
+
+        r = requests.get(image_url, timeout=60)
+        r.raise_for_status()
+
+        # Always output PNG to satisfy downstream validation `outputs/{task_id}/*.png`.
+        out_path = output_dir / f"shot_{uuid.uuid4().hex}.png"
+        # Pillow doesn't provide open_bytes; wrap content into a buffer.
+        from io import BytesIO
+
+        img = Image.open(BytesIO(r.content)).convert("RGB")
+        img.save(str(out_path), format="PNG")
+        return str(out_path)
+
diff --git a/engine/adapters/image/stability_adapter.py b/engine/adapters/image/stability_adapter.py
new file mode 100644
index 0000000..3ef1278
--- /dev/null
+++ b/engine/adapters/image/stability_adapter.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from engine.config import AppConfig
+
+from .base import BaseImageGen
+
+
+class StabilityAdapter(BaseImageGen):
+    """
+    Placeholder for Stability AI image generation.
+    Add implementation + dependencies when needed.
+    """
+
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+
+    def generate(self, prompt: dict[str, str], output_dir: str | Path) -> str:
+        raise NotImplementedError("StabilityAdapter not implemented yet")
+
diff --git a/engine/adapters/llm/__init__.py b/engine/adapters/llm/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/engine/adapters/llm/__init__.py
@@ -0,0 +1 @@
+
diff --git a/engine/adapters/llm/base.py b/engine/adapters/llm/base.py
new file mode 100644
index 0000000..f0090eb
--- /dev/null
+++ b/engine/adapters/llm/base.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+class BaseLLM:
+    def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> Any:
+        raise NotImplementedError
+
+    def refine_scene(self, scene: Any, context: dict[str, Any] | None = None) -> Any:
+        raise NotImplementedError
+
diff --git a/engine/adapters/llm/mock_adapter.py b/engine/adapters/llm/mock_adapter.py
new file mode 100644
index 0000000..17fb8a7
--- /dev/null
+++ b/engine/adapters/llm/mock_adapter.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from typing import Any
+
+from engine.types import Scene
+
+from .base import BaseLLM
+
+
+class MockLLM(BaseLLM):
+    def generate_script(self, prompt: str, context: dict[str, Any] | None = None) -> list[Scene]:
+        # Simple deterministic scenes for offline development.
+        prompt = (prompt or "").strip()
+        if not prompt:
+            prompt = "a warm city night"
+        return [
+            Scene(image_prompt=f"{prompt}，城市夜景，霓虹灯，电影感", video_motion="缓慢推进镜头，轻微摇镜", narration="夜色温柔落在街灯上"),
+            Scene(image_prompt=f"{prompt}，咖啡店窗边，暖光，细雨", video_motion="侧向平移，人物轻轻抬头", narration="雨声里藏着一段回忆"),
+            Scene(image_prompt=f"{prompt}，桥上远景，车流光轨，温暖", video_motion="拉远全景，光轨流动", narration="我们在光里学会告别"),
+        ]
+
+    def refine_scene(self, scene: Scene, context: dict[str, Any] | None = None) -> Scene:
+        # Minimal polish: append a hint.
+        return Scene(image_prompt=scene.image_prompt, video_motion=scene.video_motion, narration=(scene.narration + "（更凝练）")[:30])
+
diff --git a/engine/adapters/llm/openai_adapter.py b/engine/adapters/llm/openai_adapter.py
new file mode 100644
index 0000000..5c8c25c
--- /dev/null
+++ b/engine/adapters/llm/openai_adapter.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from typing import Any
+
+from engine.config import AppConfig
+from engine.script_gen import generate_scenes, refine_scene
+
+from .base import BaseLLM
+
+
+class OpenAIAdapter(BaseLLM):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+
+    def generate_script(self, prompt: str, context: dict[str, Any] | None = None):
+        # Existing script_gen already enforces JSON schema and length constraints.
+        return generate_scenes(prompt, self.cfg)
+
+    def refine_scene(self, scene: Any, context: dict[str, Any] | None = None):
+        if context is None:
+            context = {}
+        # Context carries needed values to call refine_scene in script_gen.
+        scenes = context.get("scenes")
+        prompt2 = context.get("prompt")
+        target_index = context.get("target_index")
+        if scenes is None or prompt2 is None or target_index is None:
+            raise ValueError("OpenAIAdapter.refine_scene missing context: scenes/prompt/target_index")
+        return refine_scene(prompt=prompt2, scenes=scenes, target_index=int(target_index), cfg=self.cfg)
+
diff --git a/engine/adapters/tts/__init__.py b/engine/adapters/tts/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/engine/adapters/tts/__init__.py
@@ -0,0 +1 @@
+
diff --git a/engine/adapters/tts/base.py b/engine/adapters/tts/base.py
new file mode 100644
index 0000000..cdbcd07
--- /dev/null
+++ b/engine/adapters/tts/base.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+class BaseTTS:
+    def generate(self, text: str, output_path: str | Path) -> str:
+        raise NotImplementedError
+
diff --git a/engine/adapters/tts/edge_adapter.py b/engine/adapters/tts/edge_adapter.py
new file mode 100644
index 0000000..860c58d
--- /dev/null
+++ b/engine/adapters/tts/edge_adapter.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+from engine.audio_gen import synthesize_one
+from engine.config import AppConfig
+
+from .base import BaseTTS
+
+
+class EdgeTTS(BaseTTS):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+
+    def generate(self, text: str, output_path: str | Path) -> str:
+        text = text or " "
+        output_path = Path(output_path)
+        voice = str(self.cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
+        rate = str(self.cfg.get("tts.rate", "+0%"))
+        volume = str(self.cfg.get("tts.volume", "+0%"))
+
+        async def _run():
+            asset = await synthesize_one(text, output_path, voice, rate, volume)
+            return str(asset.path)
+
+        return asyncio.run(_run())
+
diff --git a/engine/adapters/tts/mock_adapter.py b/engine/adapters/tts/mock_adapter.py
new file mode 100644
index 0000000..5a3c3ed
--- /dev/null
+++ b/engine/adapters/tts/mock_adapter.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from .base import BaseTTS
+
+
+class MockTTS(BaseTTS):
+    def generate(self, text: str, output_path: str | Path) -> str:
+        # No-op for offline tests: return empty path so video adapter skips audio.
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(b"")
+        return str(output_path)
+
diff --git a/engine/adapters/video/__init__.py b/engine/adapters/video/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/engine/adapters/video/__init__.py
@@ -0,0 +1 @@
+
diff --git a/engine/adapters/video/base.py b/engine/adapters/video/base.py
new file mode 100644
index 0000000..840db8b
--- /dev/null
+++ b/engine/adapters/video/base.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+
+class BaseVideoGen:
+    def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
+        raise NotImplementedError
+
diff --git a/engine/adapters/video/ltx_adapter.py b/engine/adapters/video/ltx_adapter.py
new file mode 100644
index 0000000..83b5c24
--- /dev/null
+++ b/engine/adapters/video/ltx_adapter.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from engine.config import AppConfig
+
+from .base import BaseVideoGen
+
+
+class LTXVideoGen(BaseVideoGen):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+
+    def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
+        # Reserved for future: direct image->video generation (LTX / diffusion video).
+        # Current project keeps clip generation via MoviePy for stability.
+        raise NotImplementedError("LTXVideoGen is not implemented yet")
+
diff --git a/engine/adapters/video/moviepy_adapter.py b/engine/adapters/video/moviepy_adapter.py
new file mode 100644
index 0000000..847fe2e
--- /dev/null
+++ b/engine/adapters/video/moviepy_adapter.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from moviepy import AudioFileClip, VideoClip
+from PIL import Image
+
+from engine.config import AppConfig
+
+from .base import BaseVideoGen
+
+
+class MoviePyVideoGen(BaseVideoGen):
+    def __init__(self, cfg: AppConfig):
+        self.cfg = cfg
+
+    def generate(self, image_path: str, prompt: dict, output_path: str | Path) -> str:
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Required prompt fields for shot rendering.
+        duration_s = float(prompt.get("duration_s", 3))
+        fps = int(prompt.get("fps", self.cfg.get("video.mock_fps", 24)))
+        audio_path = prompt.get("audio_path")
+
+        # Clip resolution.
+        size = prompt.get("size")
+        if isinstance(size, (list, tuple)) and len(size) == 2:
+            w, h = int(size[0]), int(size[1])
+        else:
+            mock_size = self.cfg.get("video.mock_size", [1024, 576])
+            w, h = int(mock_size[0]), int(mock_size[1])
+
+        base_img = Image.open(image_path).convert("RGB")
+
+        def make_frame(t: float):
+            progress = float(t) / max(duration_s, 1e-6)
+            progress = max(0.0, min(1.0, progress))
+            scale = 1.0 + 0.03 * progress
+            new_w = max(w, int(w * scale))
+            new_h = max(h, int(h * scale))
+            frame = base_img.resize((new_w, new_h), Image.LANCZOS)
+            left = (new_w - w) // 2
+            top = (new_h - h) // 2
+            frame = frame.crop((left, top, left + w, top + h))
+            return np.array(frame)
+
+        video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
+
+        # Optional audio.
+        if audio_path and os.path.exists(str(audio_path)):
+            a = AudioFileClip(str(audio_path))
+            video = video.with_audio(a)
+        else:
+            a = None
+
+        try:
+            video.write_videofile(
+                str(output_path),
+                fps=fps,
+                codec="libx264",
+                audio_codec="aac",
+                preset="veryfast",
+                threads=2,
+            )
+        finally:
+            try:
+                video.close()
+            except Exception:
+                pass
+            if a is not None:
+                try:
+                    a.close()
+                except Exception:
+                    pass
+
+        return str(output_path)
+
diff --git a/engine/comfy_client.py b/engine/comfy_client.py
index bc7005c..624837c 100644
--- a/engine/comfy_client.py
+++ b/engine/comfy_client.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 
 import asyncio
 import json
+import time
 import uuid
 from dataclasses import dataclass
 from pathlib import Path
@@ -186,3 +187,215 @@ class ComfyClient:
 
             # unreachable
             # return ComfyResult(prompt_id=prompt_id, output_files=last_files)
+
+
+# ---------------------------------------------------------------------------
+# Minimal "text->image" helpers (used by shot rendering)
+# ---------------------------------------------------------------------------
+
+
+def _build_simple_workflow(
+    prompt_text: str,
+    *,
+    seed: int,
+    ckpt_name: str,
+    width: int,
+    height: int,
+    steps: int = 20,
+    cfg: float = 8.0,
+    sampler_name: str = "euler",
+    scheduler: str = "normal",
+    denoise: float = 1.0,
+    filename_prefix: str = "shot",
+    negative_text: str = "low quality, blurry",
+) -> dict[str, Any]:
+    # Best-effort workflow. If your ComfyUI nodes/models differ, generation must fallback.
+    return {
+        "3": {
+            "class_type": "KSampler",
+            "inputs": {
+                "seed": int(seed),
+                "steps": int(steps),
+                "cfg": float(cfg),
+                "sampler_name": sampler_name,
+                "scheduler": scheduler,
+                "denoise": float(denoise),
+                "model": ["4", 0],
+                "positive": ["6", 0],
+                "negative": ["7", 0],
+                "latent_image": ["5", 0],
+            },
+        },
+        "4": {
+            "class_type": "CheckpointLoaderSimple",
+            "inputs": {
+                "ckpt_name": ckpt_name,
+            },
+        },
+        "5": {
+            "class_type": "EmptyLatentImage",
+            "inputs": {
+                "width": int(width),
+                "height": int(height),
+                "batch_size": 1,
+            },
+        },
+        "6": {
+            "class_type": "CLIPTextEncode",
+            "inputs": {
+                "text": prompt_text,
+                "clip": ["4", 1],
+            },
+        },
+        "7": {
+            "class_type": "CLIPTextEncode",
+            "inputs": {
+                "text": negative_text,
+                "clip": ["4", 1],
+            },
+        },
+        "8": {
+            "class_type": "VAEDecode",
+            "inputs": {
+                "samples": ["3", 0],
+                "vae": ["4", 2],
+            },
+        },
+        "9": {
+            "class_type": "SaveImage",
+            "inputs": {
+                "images": ["8", 0],
+                "filename_prefix": filename_prefix,
+            },
+        },
+    }
+
+
+def _queue_prompt(base_url: str, workflow: dict[str, Any], client_id: str) -> str:
+    r = httpx.post(
+        base_url.rstrip("/") + "/prompt",
+        json={"prompt": workflow, "client_id": client_id},
+        timeout=30.0,
+    )
+    r.raise_for_status()
+    data = r.json()
+    pid = data.get("prompt_id")
+    if not isinstance(pid, str) or not pid:
+        raise RuntimeError(f"Unexpected /prompt response: {data}")
+    return pid
+
+
+def _get_history_item(base_url: str, prompt_id: str) -> dict[str, Any] | None:
+    for url in (f"{base_url.rstrip('/')}/history/{prompt_id}", f"{base_url.rstrip('/')}/history"):
+        try:
+            r = httpx.get(url, timeout=30.0)
+            if r.status_code == 404:
+                continue
+            r.raise_for_status()
+            data = r.json()
+            if isinstance(data, dict):
+                if prompt_id in data and isinstance(data[prompt_id], dict):
+                    return data[prompt_id]
+                if url.endswith(f"/{prompt_id}") and isinstance(data, dict):
+                    return data
+            return None
+        except Exception:
+            continue
+    return None
+
+
+def _extract_first_image_view_target(history_item: dict[str, Any]) -> tuple[str, str] | None:
+    outputs = history_item.get("outputs")
+    if not isinstance(outputs, dict):
+        return None
+
+    def walk(v: Any) -> list[dict[str, Any]]:
+        found: list[dict[str, Any]] = []
+        if isinstance(v, dict):
+            if isinstance(v.get("filename"), str) and v.get("filename").strip():
+                found.append(v)
+            for vv in v.values():
+                found.extend(walk(vv))
+        elif isinstance(v, list):
+            for vv in v:
+                found.extend(walk(vv))
+        return found
+
+    candidates = walk(outputs)
+    for c in candidates:
+        fn = str(c.get("filename", "")).strip()
+        sf = str(c.get("subfolder", "") or "").strip()
+        if fn:
+            return fn, sf
+    return None
+
+
+def generate_image(
+    prompt_text: str,
+    output_dir: str | Path,
+    *,
+    cfg: AppConfig | None = None,
+    timeout_s: int = 60,
+    retry: int = 2,
+    width: int | None = None,
+    height: int | None = None,
+    filename_prefix: str = "shot",
+    ckpt_candidates: list[str] | None = None,
+    negative_text: str | None = None,
+) -> Path:
+    cfg2 = cfg or AppConfig.load("./configs/config.yaml")
+    base_url = str(cfg2.get("app.comfy_base_url", "http://comfyui:8188")).rstrip("/")
+
+    out_dir = Path(output_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    if width is None or height is None:
+        mock_size = cfg2.get("video.mock_size", [1024, 576])
+        width = int(width or mock_size[0])
+        height = int(height or mock_size[1])
+
+    if negative_text is None:
+        negative_text = "low quality, blurry"
+
+    if ckpt_candidates is None:
+        ckpt_candidates = [
+            "v1-5-pruned-emaonly.ckpt",
+            "v1-5-pruned-emaonly.safetensors",
+            "sd-v1-5-tiny.safetensors",
+        ]
+
+    last_err: Exception | None = None
+    for _attempt in range(max(1, retry)):
+        for ckpt_name in ckpt_candidates:
+            client_id = str(uuid.uuid4())
+            seed = int(uuid.uuid4().int % 2_147_483_647)
+            workflow = _build_simple_workflow(
+                prompt_text,
+                seed=seed,
+                ckpt_name=ckpt_name,
+                width=width,
+                height=height,
+                filename_prefix=filename_prefix,
+                    negative_text=negative_text,
+            )
+            try:
+                prompt_id = _queue_prompt(base_url, workflow, client_id)
+                start = time.time()
+                while time.time() - start < timeout_s:
+                    item = _get_history_item(base_url, prompt_id)
+                    if isinstance(item, dict):
+                        img_target = _extract_first_image_view_target(item)
+                        if img_target:
+                            filename, subfolder = img_target
+                            view_url = f"{base_url}/view?filename={filename}&subfolder={subfolder}"
+                            img_resp = httpx.get(view_url, timeout=60.0)
+                            img_resp.raise_for_status()
+                            image_path = out_dir / filename
+                            image_path.write_bytes(img_resp.content)
+                            return image_path
+                    time.sleep(1.0)
+            except Exception as e:
+                last_err = e
+                continue
+
+    raise RuntimeError(f"ComfyUI image generation failed after retries: {last_err}")
diff --git a/engine/main.py b/engine/main.py
index 2c9bf28..9c80e17 100644
--- a/engine/main.py
+++ b/engine/main.py
@@ -12,13 +12,14 @@ from typing import Any
 from moviepy import ImageClip
 from PIL import Image, ImageDraw, ImageFont
 
-from engine.audio_gen import synthesize_scenes
+from engine.model_factory import get_model
+from engine.prompt_injector import inject_prompt
+from engine.adapters.image.mock_adapter import MockImageGen
 from engine.assembler import assemble_clips
 from engine.comfy_client import ComfyClient
 from engine.config import AppConfig
 from engine.director import scenes_to_shots
 from engine.shot_executor import render_shot
-from engine.script_gen import generate_scenes, refine_scene
 from engine.task_store import create_task, update_shot_status, update_task_status
 from engine.types import Scene
 from engine.video_editor import Segment, render_final
@@ -28,13 +29,15 @@ def _emit(line: str) -> None:
     print(line, flush=True)
 
 
-def _emit_scene(scene_idx: int, scene: Scene) -> None:
+def _emit_scene(scene_idx: int, scene: Scene, extra: dict[str, Any] | None = None) -> None:
     payload = {
         "index": scene_idx,
         "image_prompt": scene.image_prompt,
         "video_motion": scene.video_motion,
         "narration": scene.narration,
     }
+    if extra:
+        payload.update(extra)
     _emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
 
 
@@ -136,9 +139,50 @@ def _fallback_scenes(prompt: str) -> list[Scene]:
     ]
 
 
+def _generate_scene_preview(
+    *,
+    cfg: AppConfig,
+    out_dir: Path,
+    image_prompt: str,
+    style: str | None,
+    character: str | None,
+) -> str | None:
+    try:
+        image_gen = get_model("image", cfg)
+    except Exception:
+        image_gen = get_model("image_fallback", cfg)
+
+    global_cfg = dict(cfg.get("global", {}) or {})
+    if style:
+        global_cfg["style"] = style
+    if character:
+        global_cfg["character"] = character
+
+    prompt_obj = inject_prompt(global_cfg, {"prompt": image_prompt})
+    try:
+        image_path = image_gen.generate(prompt_obj, out_dir)
+    except Exception:
+        try:
+            image_path = get_model("image_fallback", cfg).generate(prompt_obj, out_dir)
+        except Exception:
+            # Last-resort hard fallback: never block script stage due to preview failures.
+            image_path = MockImageGen().generate(prompt_obj, out_dir)
+
+    p = Path(str(image_path))
+    if not p.exists():
+        return None
+    return f"/api/static/{out_dir.name}/{p.name}"
+
+
 def _has_llm_key(cfg: AppConfig) -> bool:
-    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
-    return bool(os.environ.get(api_key_env))
+    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "OPENAI_API_KEY").strip()
+    # Env var name case.
+    if os.environ.get(api_key_env):
+        return True
+    # Literal key case (DashScope / OpenAI-compatible).
+    if api_key_env.startswith("sk-"):
+        return True
+    return False
 
 
 def _parse_scenes_from_obj(obj: Any) -> list[Scene]:
@@ -239,7 +283,8 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
         # fallback scenes still should include global injection
         scenes = _fallback_scenes(prompt)
     else:
-        scenes = generate_scenes(prompt2, cfg)
+        llm = get_model("llm", cfg)
+        scenes = llm.generate_script(prompt2, context=None)
 
     out_dir.mkdir(parents=True, exist_ok=True)
     _emit("SCRIPT_BEGIN")
@@ -249,7 +294,14 @@ def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, c
             video_motion=s.video_motion,
             narration=s.narration,
         )
-        _emit_scene(idx, s2)
+        preview_url = _generate_scene_preview(
+            cfg=cfg,
+            out_dir=out_dir,
+            image_prompt=s2.image_prompt,
+            style=style,
+            character=character,
+        )
+        _emit_scene(idx, s2, extra={"preview_url": preview_url or ""})
     _emit("SCRIPT_END")
     (out_dir / "scenes.json").write_text(
         json.dumps(
@@ -292,8 +344,9 @@ def step_refine(
             narration=(s.narration + "（更凝练）")[:30],
         )
     else:
-        # Ensure globals are visible to LLM, and inject to output image prompt.
-        refined0 = refine_scene(prompt=prompt2, scenes=scenes, target_index=target_index, cfg=cfg)
+        llm = get_model("llm", cfg)
+        # Context carries prompt + scenes for consistent refinement.
+        refined0 = llm.refine_scene(scenes[target_index - 1], context={"prompt": prompt2, "scenes": scenes, "target_index": target_index})
         refined = Scene(
             image_prompt=_decorate_image_prompt(refined0.image_prompt, style=style, character=character),
             video_motion=refined0.video_motion,
@@ -301,7 +354,14 @@ def step_refine(
         )
 
     # Keep the original index for frontend replacement.
-    _emit_scene(scene_index, refined)
+    preview_url = _generate_scene_preview(
+        cfg=cfg,
+        out_dir=out_dir,
+        image_prompt=refined.image_prompt,
+        style=style,
+        character=character,
+    )
+    _emit_scene(scene_index, refined, extra={"preview_url": preview_url or ""})
     out_dir.mkdir(parents=True, exist_ok=True)
     (out_dir / f"refine_scene_{scene_index}.json").write_text(
         json.dumps(
diff --git a/engine/model_factory.py b/engine/model_factory.py
new file mode 100644
index 0000000..ca9479a
--- /dev/null
+++ b/engine/model_factory.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import os
+from typing import Any
+
+from engine.config import AppConfig
+
+
+def _provider(cfg: AppConfig, path: str, default: str) -> str:
+    env_map = {
+        "llm.provider": "ENGINE_LLM_PROVIDER",
+        "image.provider": "ENGINE_IMAGE_PROVIDER",
+        "image_fallback.provider": "ENGINE_IMAGE_FALLBACK_PROVIDER",
+        "video.provider": "ENGINE_VIDEO_PROVIDER",
+        "tts.provider": "ENGINE_TTS_PROVIDER",
+    }
+    env_key = env_map.get(path)
+    if env_key:
+        env_val = str(os.environ.get(env_key, "")).strip()
+        if env_val:
+            return env_val
+    v = cfg.get(path, default)
+    return str(v or default).strip() or default
+
+
+def get_model(name: str, cfg: AppConfig) -> Any:
+    if name == "llm":
+        provider = _provider(cfg, "llm.provider", "openai")
+        if provider == "mock":
+            from engine.adapters.llm.mock_adapter import MockLLM
+
+            return MockLLM()
+        from engine.adapters.llm.openai_adapter import OpenAIAdapter
+
+        return OpenAIAdapter(cfg)
+
+    if name in ("image", "image_fallback"):
+        section = "image" if name == "image" else "image_fallback"
+        # Important: fallback must default to mock, not follow primary image provider.
+        provider_default = "mock" if name == "image_fallback" else _provider(cfg, "image.provider", "mock")
+        provider = _provider(cfg, f"{section}.provider", provider_default)
+        if provider == "comfy":
+            from engine.adapters.image.comfy_adapter import ComfyAdapter
+
+            return ComfyAdapter(cfg)
+        if provider == "replicate":
+            from engine.adapters.image.replicate_adapter import ReplicateAdapter
+
+            return ReplicateAdapter(cfg)
+        if provider == "openai":
+            from engine.adapters.image.openai_image_adapter import OpenAIImageAdapter
+
+            return OpenAIImageAdapter(cfg)
+
+        from engine.adapters.image.mock_adapter import MockImageGen
+
+        return MockImageGen()
+
+    if name == "video":
+        provider = _provider(cfg, "video.provider", "moviepy")
+        if provider == "ltx":
+            from engine.adapters.video.ltx_adapter import LTXVideoGen
+
+            return LTXVideoGen(cfg)
+        from engine.adapters.video.moviepy_adapter import MoviePyVideoGen
+
+        return MoviePyVideoGen(cfg)
+
+    if name == "tts":
+        provider = _provider(cfg, "tts.provider", "edge")
+        if provider == "mock":
+            from engine.adapters.tts.mock_adapter import MockTTS
+
+            return MockTTS()
+        from engine.adapters.tts.edge_adapter import EdgeTTS
+
+        return EdgeTTS(cfg)
+
+    raise ValueError(f"Unknown model adapter name: {name}")
+
diff --git a/engine/prompt_injector.py b/engine/prompt_injector.py
new file mode 100644
index 0000000..de4ac5e
--- /dev/null
+++ b/engine/prompt_injector.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+def inject_prompt(global_cfg: dict[str, Any] | None, scene: dict[str, Any]) -> dict[str, str]:
+    """
+    Unified positive/negative prompt builder.
+    Note: current pipeline already injects some globals into `scene["image_prompt"]`.
+    """
+    global_cfg = global_cfg or {}
+    character = str(global_cfg.get("character", "") or "").strip()
+    style = str(global_cfg.get("style", "") or "").strip()
+    negative = str(global_cfg.get("negative_prompt", "") or "").strip()
+
+    base = str(scene.get("prompt") or scene.get("image_prompt") or "").strip()
+    if not base:
+        base = str(scene.get("image_prompt") or "")
+
+    positive_parts = [p for p in [character, style, base] if p]
+    positive = ", ".join(positive_parts).strip(", ")
+    return {"positive": positive, "negative": negative}
+
diff --git a/engine/render_pipeline.py b/engine/render_pipeline.py
new file mode 100644
index 0000000..8dd62f0
--- /dev/null
+++ b/engine/render_pipeline.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from engine.model_factory import get_model
+from engine.prompt_injector import inject_prompt
+from engine.adapters.image.mock_adapter import MockImageGen
+
+
+def render_shot(shot: dict[str, Any], cfg, out_dir: str | Path, *, mock: bool = False) -> str:
+    out_dir = Path(out_dir)
+    clips_dir = out_dir / "clips"
+    audio_dir = out_dir / "audio"
+    clips_dir.mkdir(parents=True, exist_ok=True)
+    audio_dir.mkdir(parents=True, exist_ok=True)
+
+    shot_id = str(shot.get("shot_id", "unknown"))
+    duration_s = float(shot.get("duration", 3))
+    narration = str(shot.get("tts", "")).strip()
+
+    # Models from config.
+    image_fallback_gen = get_model("image_fallback", cfg)
+    try:
+        image_gen = get_model("image", cfg)
+    except Exception as e:
+        # Covers missing optional deps at adapter init time (e.g. replicate/openai packages).
+        print(f"[WARN] image provider init failed, fallback to image_fallback: {e}")
+        image_gen = image_fallback_gen
+    tts = get_model("tts", cfg)
+    video_gen = get_model("video", cfg)
+
+    # Prompt injection.
+    global_cfg = cfg.get("global", {}) if hasattr(cfg, "get") else {}
+    prompt_obj = inject_prompt(global_cfg, {"prompt": shot.get("image_prompt", "")})
+    positive_prompt = prompt_obj.get("positive", "")
+    # Prompt enrichment: keeps ComfyUI generations cinematic and detailed.
+    enrich_style = "cinematic, ultra realistic, 4k, detailed lighting"
+    if enrich_style not in positive_prompt:
+        positive_prompt = f"{positive_prompt}, {enrich_style}".strip(", ")
+        prompt_obj["positive"] = positive_prompt
+
+    # 1) image
+    try:
+        image_path = image_gen.generate(prompt_obj, out_dir)
+    except Exception as e:
+        # Config-driven fallback; keeps provider switching non-invasive.
+        print(f"[WARN] Image generation failed, fallback to image_fallback: {e}")
+        try:
+            image_path = image_fallback_gen.generate(prompt_obj, out_dir)
+        except Exception as e2:
+            print(f"[WARN] image_fallback also failed, hard fallback to mock: {e2}")
+            image_path = MockImageGen().generate(prompt_obj, out_dir)
+
+    scene_label = str(shot.get("scene_id") or shot.get("shot_id") or "scene_unknown")
+    print(f"[SHOT_RENDER] {scene_label} -> image generated: {image_path}")
+
+    # 2) audio (optional)
+    audio_path = None
+    if narration:
+        # Use a stable per-shot audio filename.
+        ap = audio_dir / f"shot_{shot_id}.mp3"
+        try:
+            audio_path = tts.generate(narration, ap)
+        except Exception as e:
+            # Don't fail the whole render due to TTS issues.
+            print(f"[WARN] TTS failed, continue without audio: {e}")
+            audio_path = None
+
+    # 3) clip
+    clip_out = clips_dir / f"shot_{shot_id}.mp4"
+    prompt = {
+        "duration_s": duration_s,
+        "fps": int(cfg.get("video.mock_fps", 24)),
+        "audio_path": audio_path,
+        "size": cfg.get("video.mock_size", None),
+    }
+    clip_path = video_gen.generate(image_path, prompt, clip_out)
+    return clip_path
+
diff --git a/engine/script_gen.py b/engine/script_gen.py
index 3238cf6..5b67b95 100644
--- a/engine/script_gen.py
+++ b/engine/script_gen.py
@@ -10,6 +10,38 @@ from .config import AppConfig
 from .types import Scene
 
 
+def _looks_like_api_key(v: str) -> bool:
+    vv = (v or "").strip()
+    # Common prefixes: DashScope uses "sk-..."; we keep it minimal and permissive.
+    return bool(vv) and vv.startswith("sk-")
+
+
+def _looks_like_url(v: str) -> bool:
+    vv = (v or "").strip()
+    return vv.startswith("http://") or vv.startswith("https://")
+
+
+def _resolve_openai_credentials(cfg: AppConfig) -> tuple[str, str | None]:
+    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY") or "").strip()
+    base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL") or "").strip()
+
+    # 1) Resolve api_key: allow both "env var name" and "literal key" for safety.
+    api_key = os.environ.get(api_key_env) if api_key_env else None
+    if not api_key and api_key_env and _looks_like_api_key(api_key_env):
+        api_key = api_key_env
+
+    if not api_key:
+        raise RuntimeError(f"Missing OpenAI compatible API key (env={api_key_env})")
+
+    # 2) Resolve base_url: allow both "env var name" and "literal URL".
+    base_url = os.environ.get(base_url_env) if base_url_env else None
+    if not base_url and base_url_env and _looks_like_url(base_url_env):
+        base_url = base_url_env
+    if base_url:
+        base_url = str(base_url).strip() or None
+    return str(api_key), base_url
+
+
 def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
     return f"""你是一个专业短视频编剧与分镜师。
 请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
@@ -56,17 +88,13 @@ def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
     min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
     max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
 
-    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
-    base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
     model = str(cfg.get("openai.model", "gpt-4o-mini"))
 
-    api_key = os.environ.get(api_key_env)
-    if not api_key:
-        raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
+    api_key, base_url = _resolve_openai_credentials(cfg)
 
     client = OpenAI(
         api_key=api_key,
-        base_url=os.environ.get(base_url_env) or None,
+        base_url=base_url,
     )
 
     resp = client.chat.completions.create(
@@ -105,17 +133,13 @@ def refine_scene(*, prompt: str, scenes: list[Scene], target_index: int, cfg: Ap
     min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
     max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
 
-    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
-    base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
     model = str(cfg.get("openai.model", "gpt-4o-mini"))
 
-    api_key = os.environ.get(api_key_env)
-    if not api_key:
-        raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
+    api_key, base_url = _resolve_openai_credentials(cfg)
 
     client = OpenAI(
         api_key=api_key,
-        base_url=os.environ.get(base_url_env) or None,
+        base_url=base_url,
     )
 
     scenes_payload = [
diff --git a/engine/shot_executor.py b/engine/shot_executor.py
index 99bc95c..77b0dde 100644
--- a/engine/shot_executor.py
+++ b/engine/shot_executor.py
@@ -1,42 +1,53 @@
 from __future__ import annotations
 
 import asyncio
+import os
 import random
 from pathlib import Path
 from typing import Any
 
-from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, vfx
+import numpy as np
+from moviepy import AudioFileClip, VideoClip
+from PIL import Image
+from urllib.request import urlopen
 
 from .audio_gen import synthesize_one
-from .comfy_client import ComfyClient
+from .comfy_client import generate_image as comfy_generate_image
 from .config import AppConfig
+from .render_pipeline import render_shot as render_shot_pipeline
 
 
-def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
-    if audio.duration is None or video.duration is None:
-        return video.with_audio(audio)
-    if audio.duration > video.duration:
-        video = video.with_effects([vfx.Loop(duration=audio.duration)])
-    elif video.duration > audio.duration:
-        video = video.subclipped(0, audio.duration)
-    return video.with_audio(audio)
+ASSETS_DIR = "assets"
+DEMO_IMAGE = os.path.join(ASSETS_DIR, "demo.jpg")
 
 
-def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
-    return (
-        TextClip(
-            text=text,
-            font_size=44,
-            color="white",
-            stroke_color="black",
-            stroke_width=2,
-            size=(int(size[0] * 0.92), None),
-            method="caption",
-        )
-        .with_position(("center", "bottom"))
-        .with_duration(duration)
-        .with_opacity(0.95)
-    )
+def ensure_demo_image() -> None:
+    os.makedirs(ASSETS_DIR, exist_ok=True)
+    if os.path.exists(DEMO_IMAGE):
+        return
+
+    # Simple placeholder image source.
+    url = "https://picsum.photos/1280/720"
+    with urlopen(url, timeout=30) as resp:
+        data = resp.read()
+
+    with open(DEMO_IMAGE, "wb") as f:
+        f.write(data)
+
+
+def generate_image_mock(prompt: str) -> str:
+    # Keep interface compatible with the requested interface.
+    _ = prompt
+    ensure_demo_image()
+    return DEMO_IMAGE
+
+
+def enrich_prompt(prompt_text: str) -> str:
+    style = "cinematic, ultra realistic, 4k, detailed lighting"
+    pt = (prompt_text or "").strip()
+    if not pt:
+        return style
+    return f"{pt}, {style}"
 
 
 async def _render_shot_async(
@@ -55,49 +66,102 @@ async def _render_shot_async(
 
     shot_id = str(shot.get("shot_id", "unknown"))
     image_prompt = str(shot.get("image_prompt", "")).strip()
-    motion = str(shot.get("motion", "")).strip()
+    prompt_text = str(shot.get("prompt", image_prompt) or image_prompt).strip()
     tts_text = str(shot.get("tts", "")).strip()
     duration_s = max(1.0, float(shot.get("duration", 3)))
 
     voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
     rate = str(cfg.get("tts.rate", "+0%"))
     volume = str(cfg.get("tts.volume", "+0%"))
-    audio_path = audio_dir / f"shot_{shot_id}.mp3"
-    audio_asset = await synthesize_one(tts_text or " ", audio_path, voice, rate, volume)
+    audio_asset: Any | None = None
+    if tts_text:
+        audio_path = audio_dir / f"shot_{shot_id}.mp3"
+        audio_asset = await synthesize_one(tts_text, audio_path, voice, rate, volume)
 
+    # Use config-defined output resolution for stable concatenation.
+    mock_size = cfg.get("video.mock_size", [1024, 576])
+    w, h = int(mock_size[0]), int(mock_size[1])
+    fps = int(cfg.get("video.mock_fps", 24))
+
+    if audio_asset and audio_asset.duration_s:
+        duration_s = max(duration_s, float(audio_asset.duration_s))
+
+    # shot -> image (ComfyUI first; fallback to demo.jpg)
+    image_path: str
     if mock:
-        from engine.main import _ensure_mock_image, _make_mock_video  # local import to avoid circular at module import
-
-        mock_size = cfg.get("video.mock_size", [1024, 576])
-        w, h = int(mock_size[0]), int(mock_size[1])
-        mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
-        fps = int(cfg.get("video.mock_fps", 24))
-        raw_video_path = out_dir / f"shot_raw_{shot_id}.mp4"
-        _make_mock_video(raw_video_path, mock_image, max(duration_s, audio_asset.duration_s), fps=fps)
+        image_path = generate_image_mock(prompt_text)
     else:
-        comfy = ComfyClient(cfg)
-        wf = comfy.load_workflow()
-        seed = random.randint(1, 2_147_483_647)
-        wf_i = comfy.inject_params(wf, image_prompt=image_prompt, seed=seed, motion_prompt=motion or None)
-        result = await comfy.run_workflow(wf_i)
-        candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
-        raw_video_path = candidates[0] if candidates else result.output_files[0]
-
-    clip_out = clips_dir / f"shot_{shot_id}.mp4"
-    v = VideoFileClip(str(raw_video_path))
-    a = AudioFileClip(str(audio_asset.path))
-    try:
-        v2 = _fit_video_to_audio(v, a)
-        w2, h2 = v2.size
-        subtitle = _subtitle_clip(tts_text, (w2, h2), v2.duration or a.duration or duration_s)
-        comp = CompositeVideoClip([v2, subtitle])
         try:
-            comp.write_videofile(str(clip_out), codec="libx264", audio_codec="aac", fps=v2.fps or 24, preset="veryfast")
-        finally:
-            comp.close()
+            enriched = enrich_prompt(prompt_text)
+            # Store generated images directly under outputs/{task_id}
+            # (as required by verification: outputs/{task_id}/*.png).
+            image_path = str(
+                comfy_generate_image(
+                    enriched,
+                    out_dir,
+                    cfg=cfg,
+                    timeout_s=60,
+                    retry=2,
+                    filename_prefix=f"shot_{shot_id}",
+                )
+            )
+            print(f"[SHOT_RENDER] {shot_id} -> image generated: {image_path}")
+        except Exception as e:
+            print(f"[WARN] Comfy failed, fallback to demo: {e}")
+            image_path = generate_image_mock(prompt_text)
+
+    # Ensure image exists before rendering.
+    if not image_path or not os.path.exists(image_path):
+        image_path = generate_image_mock(prompt_text)
+    base_img = Image.open(image_path).convert("RGB")
+
+    def make_frame(t: float):
+        # Subtle zoom-in from 1.00 to ~1.03 over the clip duration.
+        progress = float(t) / max(duration_s, 1e-6)
+        progress = max(0.0, min(1.0, progress))
+        scale = 1.0 + 0.03 * progress
+
+        new_w = max(w, int(w * scale))
+        new_h = max(h, int(h * scale))
+
+        frame = base_img.resize((new_w, new_h), Image.LANCZOS)
+        left = (new_w - w) // 2
+        top = (new_h - h) // 2
+        frame = frame.crop((left, top, left + w, top + h))
+        return np.array(frame)
+
+    # image -> video
+    video = VideoClip(make_frame, duration=duration_s, has_constant_size=True)
+
+    # optional audio -> clip
+    audio_clip: AudioFileClip | None = None
+    if audio_asset and os.path.exists(str(audio_asset.path)):
+        audio_clip = AudioFileClip(str(audio_asset.path))
+        video = video.with_audio(audio_clip)
+
+    # output
+    clip_out = clips_dir / f"shot_{shot_id}.mp4"
+    print(f"[SHOT_RENDER] {shot_id} -> {clip_out}")
+    try:
+        video.write_videofile(
+            str(clip_out),
+            fps=fps,
+            codec="libx264",
+            audio_codec="aac",
+            preset="veryfast",
+            threads=2,
+        )
     finally:
-        v.close()
-        a.close()
+        try:
+            video.close()
+        except Exception:
+            pass
+        if audio_clip is not None:
+            try:
+                audio_clip.close()
+            except Exception:
+                pass
+
     return str(clip_out)
 
 
@@ -109,5 +173,5 @@ def render_shot(
     mock: bool = False,
 ) -> str:
     cfg2 = cfg or AppConfig.load("./configs/config.yaml")
-    return asyncio.run(_render_shot_async(shot, output_dir, cfg2, mock=mock))
+    return render_shot_pipeline(shot, cfg2, output_dir, mock=mock)
 
diff --git a/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/audio/shot_scene_01_01.mp3 b/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/task.json b/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/task.json
new file mode 100644
index 0000000..1467223
--- /dev/null
+++ b/outputs/'06b0a90f-c964-4a88-8e80-6ff668e031b3'/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "'06b0a90f-c964-4a88-8e80-6ff668e031b3'",
+  "status": "failed",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "running"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "pending"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "pending"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_01_01.mp3 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_02_01.mp3 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_03_01.mp3 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_01_01.mp4 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_02_01.mp4 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..681f666
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_03_01.mp4 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9c2119b
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/final.mp4 b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/final.mp4
new file mode 100644
index 0000000..c4f4c32
Binary files /dev/null and b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/final.mp4 differ
diff --git a/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/task.json b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/task.json
new file mode 100644
index 0000000..debb37e
--- /dev/null
+++ b/outputs/'13c9b724-77e3-4553-aebf-dfc845dd17c1'/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "'13c9b724-77e3-4553-aebf-dfc845dd17c1'",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/0d546f5e-0274-4372-b91d-fb64ace85d49/scenes.json b/outputs/0d546f5e-0274-4372-b91d-fb64ace85d49/scenes.json
new file mode 100644
index 0000000..5b08e9f
--- /dev/null
+++ b/outputs/0d546f5e-0274-4372-b91d-fb64ace85d49/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_01_01.mp3 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_02_01.mp3 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_03_01.mp3 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_01_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..a78a1c3
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_02_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..5a3388e
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_03_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..5390725
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/final.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/final.mp4
new file mode 100644
index 0000000..1af94fe
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/final.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/scenes.json b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/scenes.json
new file mode 100644
index 0000000..5b08e9f
--- /dev/null
+++ b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_01_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_01_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_01_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_02_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_02_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_02_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_03_01.mp4 b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_03_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/shot_raw_scene_03_01.mp4 differ
diff --git a/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/task.json b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/task.json
new file mode 100644
index 0000000..81a28f5
--- /dev/null
+++ b/outputs/3ef0c0b8-c90f-49a8-88e4-e8ca735312f0/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "3ef0c0b8-c90f-49a8-88e4-e8ca735312f0",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/audio/shot_scene_01_01.mp3 b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/clips/shot_scene_01_01.mp4 b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/final.mp4 b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/final.mp4
new file mode 100644
index 0000000..8f18ee1
Binary files /dev/null and b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/final.mp4 differ
diff --git a/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/task.json b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/task.json
new file mode 100644
index 0000000..0447844
--- /dev/null
+++ b/outputs/3f82b1ce-da18-4f82-9147-25eb0abeaf2c/task.json
@@ -0,0 +1,10 @@
+{
+  "task_id": "3f82b1ce-da18-4f82-9147-25eb0abeaf2c",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_01_01.mp3 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_02_01.mp3 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_03_01.mp3 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_01_01.mp4 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_02_01.mp4 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..681f666
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_03_01.mp4 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9c2119b
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/final.mp4 b/outputs/62da5541-43d2-4ead-a243-e68345877dff/final.mp4
new file mode 100644
index 0000000..c4f4c32
Binary files /dev/null and b/outputs/62da5541-43d2-4ead-a243-e68345877dff/final.mp4 differ
diff --git a/outputs/62da5541-43d2-4ead-a243-e68345877dff/task.json b/outputs/62da5541-43d2-4ead-a243-e68345877dff/task.json
new file mode 100644
index 0000000..0d09ca5
--- /dev/null
+++ b/outputs/62da5541-43d2-4ead-a243-e68345877dff/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "62da5541-43d2-4ead-a243-e68345877dff",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_01_01.mp3 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_02_01.mp3 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_03_01.mp3 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_01_01.mp4 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_02_01.mp4 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..681f666
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_03_01.mp4 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9c2119b
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/final.mp4 b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/final.mp4
new file mode 100644
index 0000000..c4f4c32
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/final.mp4 differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/scenes.json b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/scenes.json
new file mode 100644
index 0000000..75b4740
--- /dev/null
+++ b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_10743d29878a41dd9e5c8b6b5c84a743.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_10743d29878a41dd9e5c8b6b5c84a743.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_10743d29878a41dd9e5c8b6b5c84a743.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_2fef6555f3f34a42b0e100b01cd4c281.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_2fef6555f3f34a42b0e100b01cd4c281.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_2fef6555f3f34a42b0e100b01cd4c281.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_42485af317eb4e888efaaa55ee66cd33.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_42485af317eb4e888efaaa55ee66cd33.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_42485af317eb4e888efaaa55ee66cd33.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_6aa8cc90ce644ab88c6c022a9ac71168.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_6aa8cc90ce644ab88c6c022a9ac71168.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_6aa8cc90ce644ab88c6c022a9ac71168.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_d39b04ab653b4496ada1ac9385f0abac.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_d39b04ab653b4496ada1ac9385f0abac.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_d39b04ab653b4496ada1ac9385f0abac.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_fdd6eb4905fe4644a9ff4140dcff7251.png b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_fdd6eb4905fe4644a9ff4140dcff7251.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/shot_fdd6eb4905fe4644a9ff4140dcff7251.png differ
diff --git a/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/task.json b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/task.json
new file mode 100644
index 0000000..c03e34a
--- /dev/null
+++ b/outputs/7b8255ea-ed2f-4356-8a57-d5c77e351351/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "7b8255ea-ed2f-4356-8a57-d5c77e351351",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/scenes.json b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/scenes.json
new file mode 100644
index 0000000..75b4740
--- /dev/null
+++ b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_0e00ee0e06544cd49f00ba68a65a68d2.png b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_0e00ee0e06544cd49f00ba68a65a68d2.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_0e00ee0e06544cd49f00ba68a65a68d2.png differ
diff --git a/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_349333bf77ca465a93e9ecc6f09ddde1.png b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_349333bf77ca465a93e9ecc6f09ddde1.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_349333bf77ca465a93e9ecc6f09ddde1.png differ
diff --git a/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_6a6376abbc2449b2a6646a24064d0430.png b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_6a6376abbc2449b2a6646a24064d0430.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/a77ef82b-81ea-4ff5-b592-db0ebb047df4/shot_6a6376abbc2449b2a6646a24064d0430.png differ
diff --git a/outputs/ab4bb47e-cf35-4a99-977f-63097bdac9ed/scenes.json b/outputs/ab4bb47e-cf35-4a99-977f-63097bdac9ed/scenes.json
new file mode 100644
index 0000000..5b08e9f
--- /dev/null
+++ b/outputs/ab4bb47e-cf35-4a99-977f-63097bdac9ed/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_01_01.mp3 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_02_01.mp3 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_03_01.mp3 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_01_01.mp4 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_02_01.mp4 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..681f666
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_03_01.mp4 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9c2119b
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/final.mp4 b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/final.mp4
new file mode 100644
index 0000000..c4f4c32
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/final.mp4 differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/scenes.json b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/scenes.json
new file mode 100644
index 0000000..75b4740
--- /dev/null
+++ b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_225525ed518042e99d6cf7b430e126e0.png b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_225525ed518042e99d6cf7b430e126e0.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_225525ed518042e99d6cf7b430e126e0.png differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_66e3402393964b22a11bc3b06459989d.png b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_66e3402393964b22a11bc3b06459989d.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_66e3402393964b22a11bc3b06459989d.png differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_8d1b1091edd94171a2795aabf6637f5f.png b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_8d1b1091edd94171a2795aabf6637f5f.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/shot_8d1b1091edd94171a2795aabf6637f5f.png differ
diff --git a/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/task.json b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/task.json
new file mode 100644
index 0000000..d0ad240
--- /dev/null
+++ b/outputs/ab68ccf6-0de0-4465-b4d7-1843f88d0201/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "ab68ccf6-0de0-4465-b4d7-1843f88d0201",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/b4d67faf-fabe-4ead-8803-7bfbb7ee4ad2/scenes.json b/outputs/b4d67faf-fabe-4ead-8803-7bfbb7ee4ad2/scenes.json
new file mode 100644
index 0000000..dda94aa
--- /dev/null
+++ b/outputs/b4d67faf-fabe-4ead-8803-7bfbb7ee4ad2/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "Cinematic night shot, wet street reflecting neon lights, Xiao Lin walking away, beige trench coat, white scarf, cold tone background, bokeh.",
+      "video_motion": "镜头缓慢跟随背影移动，雨丝飘落。",
+      "narration": "霓虹灯下城市结束喧嚣，夜色格外温柔。"
+    },
+    {
+      "image_prompt": "Medium shot inside convenience store, warm yellow lighting, Xiao Lin holding hot coffee, steam rising, soft facial lighting, cinematic depth of field.",
+      "video_motion": "镜头缓缓推进，捕捉蒸汽升腾动态。",
+      "narration": "街角便利店的灯光，是深夜里最暖的守候。"
+    },
+    {
+      "image_prompt": "Close-up of Xiao Lin smiling slightly, blurred city light bokeh background, beige coat collar visible, warm atmosphere, high quality portrait.",
+      "video_motion": "固定镜头微距拍摄，眼神自然眨动。",
+      "narration": "捧一杯热茶，原来幸福就藏在平凡夜晚里。"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/b77f2668-6451-47ff-81da-48b498ecb436/scenes.json b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/scenes.json
new file mode 100644
index 0000000..75b4740
--- /dev/null
+++ b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_4c28ead0e7d14b9bbd88f011ea70fa84.png b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_4c28ead0e7d14b9bbd88f011ea70fa84.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_4c28ead0e7d14b9bbd88f011ea70fa84.png differ
diff --git a/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_7a2a3c7725b741bd8d1c967d1b9e3f53.png b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_7a2a3c7725b741bd8d1c967d1b9e3f53.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_7a2a3c7725b741bd8d1c967d1b9e3f53.png differ
diff --git a/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_c03030151d7d48fbbdfe6a38f3ea290e.png b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_c03030151d7d48fbbdfe6a38f3ea290e.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/b77f2668-6451-47ff-81da-48b498ecb436/shot_c03030151d7d48fbbdfe6a38f3ea290e.png differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_01_01.mp3 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..edb407f
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_02_01.mp3 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..811e6cd
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_03_01.mp3 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..9e95ca7
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_01_01.mp4 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..4e84896
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_02_01.mp4 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..f164bcb
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_03_01.mp4 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9f5ab50
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/final.mp4 b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/final.mp4
new file mode 100644
index 0000000..bdc1cc2
Binary files /dev/null and b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/final.mp4 differ
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/scenes.json b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/scenes.json
new file mode 100644
index 0000000..e36e51c
--- /dev/null
+++ b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "电影感夜景，湿润街道反射霓虹，小林独自走在街头，米色羊毛大衣，灰色围巾，背景虚化光斑，35mm 镜头",
+      "video_motion": "低角度慢速跟拍，镜头随人物步伐轻微晃动，霓虹光影流动",
+      "narration": "城市的夜晚有些冷，霓虹灯在风中闪烁。"
+    },
+    {
+      "image_prompt": "特写镜头，小林双手捧着热纸杯，蒸汽升腾，街边摊位暖光照明，米色大衣袖口可见，电影级布光",
+      "video_motion": "固定镜头，焦点从热气缓缓移至人物眼睛，蒸汽向上飘散",
+      "narration": "街角摊贩的热气，温暖了冻僵的双手。"
+    },
+    {
+      "image_prompt": "中景背影，小林走向远处灯火，米色大衣，灰色围巾，城市背景光斑模糊，暖色调夜景，景深效果",
+      "video_motion": "镜头缓慢后拉，人物走入景深深处，背景光斑逐渐扩大",
+      "narration": "捧着这份温度，回家的路不再漫长。"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/task.json b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/task.json
new file mode 100644
index 0000000..d83dfef
--- /dev/null
+++ b/outputs/be02bd5e-7890-4d0e-8f53-60443d713db8/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "be02bd5e-7890-4d0e-8f53-60443d713db8",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/scenes.json b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/scenes.json
new file mode 100644
index 0000000..75b4740
--- /dev/null
+++ b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事\n\n\n[Global Constraints]\n- Global Style: 电影感\n请严格遵守上述全局信息，并保持三分镜主角一致。，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5b88a978a09a416e83380540a100f578.png b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5b88a978a09a416e83380540a100f578.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5b88a978a09a416e83380540a100f578.png differ
diff --git a/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5bb49e0bc63649c386901afba4579880.png b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5bb49e0bc63649c386901afba4579880.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_5bb49e0bc63649c386901afba4579880.png differ
diff --git a/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_b25526b451b646ab95eb45b23a52cb2f.png b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_b25526b451b646ab95eb45b23a52cb2f.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/c0a5eb46-51ce-4e55-b653-541103917c20/shot_b25526b451b646ab95eb45b23a52cb2f.png differ
diff --git a/outputs/d4de4ab3-29bb-41b1-bc97-daa3d131baaf/scenes.json b/outputs/d4de4ab3-29bb-41b1-bc97-daa3d131baaf/scenes.json
new file mode 100644
index 0000000..c4da354
--- /dev/null
+++ b/outputs/d4de4ab3-29bb-41b1-bc97-daa3d131baaf/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "hello，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "hello，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "hello，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_01_01.mp3 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_02_01.mp3 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_03_01.mp3 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_01_01.mp4 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..c9faece
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_02_01.mp4 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..681f666
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_03_01.mp4 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..9c2119b
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/final.mp4 b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/final.mp4
new file mode 100644
index 0000000..c4f4c32
Binary files /dev/null and b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/final.mp4 differ
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/scenes.json b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/scenes.json
new file mode 100644
index 0000000..5b08e9f
--- /dev/null
+++ b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/task.json b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/task.json
new file mode 100644
index 0000000..fcfde4d
--- /dev/null
+++ b/outputs/f2810b54-03a6-45e9-9032-9e6b2e33c104/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "f2810b54-03a6-45e9-9032-9e6b2e33c104",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_01_01.mp3 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..36c5dc2
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_02_01.mp3 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..cee3639
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_03_01.mp3 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_03_01.mp3
new file mode 100644
index 0000000..e9e8d2a
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/audio/shot_scene_03_01.mp3 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_01_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..a78a1c3
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_02_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..5a3388e
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_03_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_03_01.mp4
new file mode 100644
index 0000000..5390725
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/clips/shot_scene_03_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/final.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/final.mp4
new file mode 100644
index 0000000..1af94fe
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/final.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/scenes.json b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/scenes.json
new file mode 100644
index 0000000..5b08e9f
--- /dev/null
+++ b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "写一个温暖的城市夜景故事，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_01_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_01_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_01_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_02_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_02_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_02_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_03_01.mp4 b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_03_01.mp4
new file mode 100644
index 0000000..fe4e343
Binary files /dev/null and b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/shot_raw_scene_03_01.mp4 differ
diff --git a/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/task.json b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/task.json
new file mode 100644
index 0000000..f4619ed
--- /dev/null
+++ b/outputs/ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77/task.json
@@ -0,0 +1,18 @@
+{
+  "task_id": "ff47613e-6516-4e5d-a9e8-fd0c7ef1ba77",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_03_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-fallback-hard/scenes.json b/outputs/smoke-fallback-hard/scenes.json
new file mode 100644
index 0000000..41d4636
--- /dev/null
+++ b/outputs/smoke-fallback-hard/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "fallback-hard-test，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "fallback-hard-test，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "fallback-hard-test，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-fallback-hard/shot_4f0d67d8750444418ff2192abf0bcf7a.png b/outputs/smoke-fallback-hard/shot_4f0d67d8750444418ff2192abf0bcf7a.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-fallback-hard/shot_4f0d67d8750444418ff2192abf0bcf7a.png differ
diff --git a/outputs/smoke-fallback-hard/shot_7c95c8ed44144ebca4d2a338339c1b62.png b/outputs/smoke-fallback-hard/shot_7c95c8ed44144ebca4d2a338339c1b62.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-fallback-hard/shot_7c95c8ed44144ebca4d2a338339c1b62.png differ
diff --git a/outputs/smoke-fallback-hard/shot_f5aa777799864916915a176883fb5ab2.png b/outputs/smoke-fallback-hard/shot_f5aa777799864916915a176883fb5ab2.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-fallback-hard/shot_f5aa777799864916915a176883fb5ab2.png differ
diff --git a/outputs/smoke-multi-1/audio/shot_scene_01_01.mp3 b/outputs/smoke-multi-1/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..173c951
Binary files /dev/null and b/outputs/smoke-multi-1/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/smoke-multi-1/clips/shot_scene_01_01.mp4 b/outputs/smoke-multi-1/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..e12eccd
Binary files /dev/null and b/outputs/smoke-multi-1/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/smoke-multi-1/final.mp4 b/outputs/smoke-multi-1/final.mp4
new file mode 100644
index 0000000..5d3c539
Binary files /dev/null and b/outputs/smoke-multi-1/final.mp4 differ
diff --git a/outputs/smoke-multi-1/shot_7db8c0342c0b406b84e2919fc9271ba3.png b/outputs/smoke-multi-1/shot_7db8c0342c0b406b84e2919fc9271ba3.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-multi-1/shot_7db8c0342c0b406b84e2919fc9271ba3.png differ
diff --git a/outputs/smoke-multi-1/task.json b/outputs/smoke-multi-1/task.json
new file mode 100644
index 0000000..5b40339
--- /dev/null
+++ b/outputs/smoke-multi-1/task.json
@@ -0,0 +1,10 @@
+{
+  "task_id": "smoke-multi-1",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-refine-1/refine_scene_1.json b/outputs/smoke-refine-1/refine_scene_1.json
new file mode 100644
index 0000000..4fcda21
--- /dev/null
+++ b/outputs/smoke-refine-1/refine_scene_1.json
@@ -0,0 +1,6 @@
+{
+  "index": 1,
+  "image_prompt": "a warm city night",
+  "video_motion": "缓慢推进镜头",
+  "narration": "夜色温柔（更凝练）"
+}
\ No newline at end of file
diff --git a/outputs/smoke-refine-sse/refine_scene_1.json b/outputs/smoke-refine-sse/refine_scene_1.json
new file mode 100644
index 0000000..a93b224
--- /dev/null
+++ b/outputs/smoke-refine-sse/refine_scene_1.json
@@ -0,0 +1,6 @@
+{
+  "index": 1,
+  "image_prompt": "test",
+  "video_motion": "move",
+  "narration": "旁白（更凝练）"
+}
\ No newline at end of file
diff --git a/outputs/smoke-refine-sse/scenes.json b/outputs/smoke-refine-sse/scenes.json
new file mode 100644
index 0000000..87bf95e
--- /dev/null
+++ b/outputs/smoke-refine-sse/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "refine-sse-test，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "refine-sse-test，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "refine-sse-test，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-refine-sse/shot_2bc87cbac84f4848bce8bf7a4d28e0d1.png b/outputs/smoke-refine-sse/shot_2bc87cbac84f4848bce8bf7a4d28e0d1.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-refine-sse/shot_2bc87cbac84f4848bce8bf7a4d28e0d1.png differ
diff --git a/outputs/smoke-refine-sse/shot_81e1f292749d4631ad90f399978d7031.png b/outputs/smoke-refine-sse/shot_81e1f292749d4631ad90f399978d7031.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-refine-sse/shot_81e1f292749d4631ad90f399978d7031.png differ
diff --git a/outputs/smoke-refine-sse/shot_9ee28ddcf6c44fc59ad5d7a5b4d23b9e.png b/outputs/smoke-refine-sse/shot_9ee28ddcf6c44fc59ad5d7a5b4d23b9e.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-refine-sse/shot_9ee28ddcf6c44fc59ad5d7a5b4d23b9e.png differ
diff --git a/outputs/smoke-refine-sse/shot_a85bc815e422450393e0815bb3dd7d41.png b/outputs/smoke-refine-sse/shot_a85bc815e422450393e0815bb3dd7d41.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-refine-sse/shot_a85bc815e422450393e0815bb3dd7d41.png differ
diff --git a/outputs/smoke-script-1/scenes.json b/outputs/smoke-script-1/scenes.json
new file mode 100644
index 0000000..c4da354
--- /dev/null
+++ b/outputs/smoke-script-1/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "hello，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "hello，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "hello，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-test-1/audio/shot_scene_01_01.mp3 b/outputs/smoke-test-1/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..173c951
Binary files /dev/null and b/outputs/smoke-test-1/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/smoke-test-1/audio/shot_scene_02_01.mp3 b/outputs/smoke-test-1/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..5ffa77b
Binary files /dev/null and b/outputs/smoke-test-1/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/smoke-test-1/clips/shot_scene_01_01.mp4 b/outputs/smoke-test-1/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..e12eccd
Binary files /dev/null and b/outputs/smoke-test-1/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/smoke-test-1/clips/shot_scene_02_01.mp4 b/outputs/smoke-test-1/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..243a7b5
Binary files /dev/null and b/outputs/smoke-test-1/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/smoke-test-1/final.mp4 b/outputs/smoke-test-1/final.mp4
new file mode 100644
index 0000000..1a88b15
Binary files /dev/null and b/outputs/smoke-test-1/final.mp4 differ
diff --git a/outputs/smoke-test-1/task.json b/outputs/smoke-test-1/task.json
new file mode 100644
index 0000000..3d6eac1
--- /dev/null
+++ b/outputs/smoke-test-1/task.json
@@ -0,0 +1,14 @@
+{
+  "task_id": "smoke-test-1",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-test-2/audio/shot_scene_01_01.mp3 b/outputs/smoke-test-2/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..173c951
Binary files /dev/null and b/outputs/smoke-test-2/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/smoke-test-2/audio/shot_scene_02_01.mp3 b/outputs/smoke-test-2/audio/shot_scene_02_01.mp3
new file mode 100644
index 0000000..5ffa77b
Binary files /dev/null and b/outputs/smoke-test-2/audio/shot_scene_02_01.mp3 differ
diff --git a/outputs/smoke-test-2/clips/shot_scene_01_01.mp4 b/outputs/smoke-test-2/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..e12eccd
Binary files /dev/null and b/outputs/smoke-test-2/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/smoke-test-2/clips/shot_scene_02_01.mp4 b/outputs/smoke-test-2/clips/shot_scene_02_01.mp4
new file mode 100644
index 0000000..243a7b5
Binary files /dev/null and b/outputs/smoke-test-2/clips/shot_scene_02_01.mp4 differ
diff --git a/outputs/smoke-test-2/final.mp4 b/outputs/smoke-test-2/final.mp4
new file mode 100644
index 0000000..1a88b15
Binary files /dev/null and b/outputs/smoke-test-2/final.mp4 differ
diff --git a/outputs/smoke-test-2/task.json b/outputs/smoke-test-2/task.json
new file mode 100644
index 0000000..462095e
--- /dev/null
+++ b/outputs/smoke-test-2/task.json
@@ -0,0 +1,14 @@
+{
+  "task_id": "smoke-test-2",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    },
+    {
+      "shot_id": "scene_02_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-test-3/audio/shot_scene_01_01.mp3 b/outputs/smoke-test-3/audio/shot_scene_01_01.mp3
new file mode 100644
index 0000000..173c951
Binary files /dev/null and b/outputs/smoke-test-3/audio/shot_scene_01_01.mp3 differ
diff --git a/outputs/smoke-test-3/clips/shot_scene_01_01.mp4 b/outputs/smoke-test-3/clips/shot_scene_01_01.mp4
new file mode 100644
index 0000000..e12eccd
Binary files /dev/null and b/outputs/smoke-test-3/clips/shot_scene_01_01.mp4 differ
diff --git a/outputs/smoke-test-3/final.mp4 b/outputs/smoke-test-3/final.mp4
new file mode 100644
index 0000000..5d3c539
Binary files /dev/null and b/outputs/smoke-test-3/final.mp4 differ
diff --git a/outputs/smoke-test-3/shot_813d7ff6067346c2875b688874c3ed54.png b/outputs/smoke-test-3/shot_813d7ff6067346c2875b688874c3ed54.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-test-3/shot_813d7ff6067346c2875b688874c3ed54.png differ
diff --git a/outputs/smoke-test-3/task.json b/outputs/smoke-test-3/task.json
new file mode 100644
index 0000000..8a2977b
--- /dev/null
+++ b/outputs/smoke-test-3/task.json
@@ -0,0 +1,10 @@
+{
+  "task_id": "smoke-test-3",
+  "status": "done",
+  "shots": [
+    {
+      "shot_id": "scene_01_01",
+      "status": "done"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-ui-1/scenes.json b/outputs/smoke-ui-1/scenes.json
new file mode 100644
index 0000000..9a2bb52
--- /dev/null
+++ b/outputs/smoke-ui-1/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "自动分镜测试，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "自动分镜测试，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "自动分镜测试，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-ui-1/shot_19fd419d32b046b2ab9652d013616106.png b/outputs/smoke-ui-1/shot_19fd419d32b046b2ab9652d013616106.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-1/shot_19fd419d32b046b2ab9652d013616106.png differ
diff --git a/outputs/smoke-ui-1/shot_acb4acee6a51464b9af2f83226c21f0c.png b/outputs/smoke-ui-1/shot_acb4acee6a51464b9af2f83226c21f0c.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-1/shot_acb4acee6a51464b9af2f83226c21f0c.png differ
diff --git a/outputs/smoke-ui-1/shot_f0cdc48d52de427fb8665382c139d290.png b/outputs/smoke-ui-1/shot_f0cdc48d52de427fb8665382c139d290.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-1/shot_f0cdc48d52de427fb8665382c139d290.png differ
diff --git a/outputs/smoke-ui-2/scenes.json b/outputs/smoke-ui-2/scenes.json
new file mode 100644
index 0000000..19c7638
--- /dev/null
+++ b/outputs/smoke-ui-2/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "状态机测试，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "状态机测试，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "状态机测试，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-ui-2/shot_665af13cab614672b5934a54bf2a5970.png b/outputs/smoke-ui-2/shot_665af13cab614672b5934a54bf2a5970.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-2/shot_665af13cab614672b5934a54bf2a5970.png differ
diff --git a/outputs/smoke-ui-2/shot_9e9813f55d824ae88f0eaa2e35be4a45.png b/outputs/smoke-ui-2/shot_9e9813f55d824ae88f0eaa2e35be4a45.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-2/shot_9e9813f55d824ae88f0eaa2e35be4a45.png differ
diff --git a/outputs/smoke-ui-2/shot_caadfc482f8c421982ea21853ceba306.png b/outputs/smoke-ui-2/shot_caadfc482f8c421982ea21853ceba306.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-2/shot_caadfc482f8c421982ea21853ceba306.png differ
diff --git a/outputs/smoke-ui-3/scenes.json b/outputs/smoke-ui-3/scenes.json
new file mode 100644
index 0000000..725e6cb
--- /dev/null
+++ b/outputs/smoke-ui-3/scenes.json
@@ -0,0 +1,19 @@
+{
+  "scenes": [
+    {
+      "image_prompt": "schema版本验证，城市夜景，霓虹灯，电影感",
+      "video_motion": "缓慢推进镜头，轻微摇镜",
+      "narration": "夜色温柔落在街灯上"
+    },
+    {
+      "image_prompt": "schema版本验证，咖啡店窗边，暖光，细雨",
+      "video_motion": "侧向平移，人物轻轻抬头",
+      "narration": "雨声里藏着一段回忆"
+    },
+    {
+      "image_prompt": "schema版本验证，桥上远景，车流光轨，温暖",
+      "video_motion": "拉远全景，光轨流动",
+      "narration": "我们在光里学会告别"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/outputs/smoke-ui-3/shot_354103ba62a64dff8177c05b4c247541.png b/outputs/smoke-ui-3/shot_354103ba62a64dff8177c05b4c247541.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-3/shot_354103ba62a64dff8177c05b4c247541.png differ
diff --git a/outputs/smoke-ui-3/shot_71005f19e64c4c81a3d7b7f35c1a4cb9.png b/outputs/smoke-ui-3/shot_71005f19e64c4c81a3d7b7f35c1a4cb9.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-3/shot_71005f19e64c4c81a3d7b7f35c1a4cb9.png differ
diff --git a/outputs/smoke-ui-3/shot_fa0a1de7cd9c4a0993ca681830d5a6e1.png b/outputs/smoke-ui-3/shot_fa0a1de7cd9c4a0993ca681830d5a6e1.png
new file mode 100644
index 0000000..4c86bf9
Binary files /dev/null and b/outputs/smoke-ui-3/shot_fa0a1de7cd9c4a0993ca681830d5a6e1.png differ
diff --git a/server/index.js b/server/index.js
index 10a0480..4714761 100644
--- a/server/index.js
+++ b/server/index.js
@@ -59,6 +59,25 @@ function sseSend(res, event, data) {
   res.write("\n");
 }
 
+function sseStageUpdate(res, payload) {
+  // Unified schema for frontend stage rendering.
+  const safe = {
+    schema_version: 1,
+    stage: String(payload && payload.stage ? payload.stage : "Unknown"),
+    progress:
+      payload && typeof payload.progress === "number" && Number.isFinite(payload.progress)
+        ? Math.max(0, Math.min(1, payload.progress))
+        : null,
+    scene_index: payload && Number.isFinite(payload.scene_index) ? Number(payload.scene_index) : null,
+    scene_json: payload && payload.scene_json && typeof payload.scene_json === "object" ? payload.scene_json : null,
+    shot_id: payload && payload.shot_id ? String(payload.shot_id) : null,
+    shot_status: payload && payload.shot_status ? String(payload.shot_status) : null,
+    message: payload && payload.message ? String(payload.message) : "",
+    timestamp: Date.now(),
+  };
+  sseSend(res, "stage_update", JSON.stringify(safe));
+}
+
 function newTaskId() {
   // `crypto.randomUUID()` exists on newer Node versions; fall back for older runtimes.
   if (crypto && typeof crypto.randomUUID === "function") return crypto.randomUUID();
@@ -81,7 +100,19 @@ function ensureTaskDir(taskId) {
   return dir;
 }
 
-function spawnPythonStep({ step, prompt, configPath, mock, globalStyle, character, taskId, sceneIndex }) {
+function spawnPythonStep({
+  step,
+  prompt,
+  configPath,
+  mock,
+  globalStyle,
+  character,
+  taskId,
+  sceneIndex,
+  llmProvider,
+  imageProvider,
+  imageFallbackProvider,
+}) {
   const py = process.env.PYTHON_BIN || "python3.10";
   const args = [
     "-m",
@@ -99,7 +130,11 @@ function spawnPythonStep({ step, prompt, configPath, mock, globalStyle, characte
   if (globalStyle) args.push("--global-style", globalStyle);
   if (character) args.push("--character", character);
   if (mock) args.push("--mock");
-  return spawn(py, args, { cwd: repoRoot, env: process.env, stdio: ["pipe", "pipe", "pipe"] });
+  const childEnv = { ...process.env };
+  if (llmProvider) childEnv.ENGINE_LLM_PROVIDER = String(llmProvider).trim();
+  if (imageProvider) childEnv.ENGINE_IMAGE_PROVIDER = String(imageProvider).trim();
+  if (imageFallbackProvider) childEnv.ENGINE_IMAGE_FALLBACK_PROVIDER = String(imageFallbackProvider).trim();
+  return spawn(py, args, { cwd: repoRoot, env: childEnv, stdio: ["pipe", "pipe", "pipe"] });
 }
 
 app.get("/api/script", (req, res) => {
@@ -108,6 +143,9 @@ app.get("/api/script", (req, res) => {
   const globalStyle = String(req.query.global_style || "").trim();
   const character = String(req.query.character || "").trim();
   const configPath = String(req.query.config || "./configs/config.yaml");
+  const llmProvider = String(req.query.llm_provider || "").trim();
+  const imageProvider = String(req.query.image_provider || "").trim();
+  const imageFallbackProvider = String(req.query.image_fallback_provider || "").trim();
 
   if (!prompt) {
     res.status(400).json({ error: "missing prompt" });
@@ -129,9 +167,13 @@ app.get("/api/script", (req, res) => {
     globalStyle,
     character,
     taskId,
+    llmProvider,
+    imageProvider,
+    imageFallbackProvider,
   });
 
   let buf = "";
+  let sceneCount = 0;
   child.stdout.setEncoding("utf8");
   child.stdout.on("data", (chunk) => {
     buf += chunk;
@@ -139,15 +181,37 @@ app.get("/api/script", (req, res) => {
     buf = parts.pop() || "";
     for (const line of parts) {
       if (!line) continue;
-      if (line.startsWith("SCENE_JSON ")) sseSend(res, "scene", line.slice("SCENE_JSON ".length));
-      else if (line.startsWith("PROG ")) sseSend(res, "prog", line.slice("PROG ".length));
-      else sseSend(res, "line", line);
+      if (line.startsWith("SCENE_JSON ")) {
+        try {
+          const scene = JSON.parse(line.slice("SCENE_JSON ".length));
+          sceneCount += 1;
+          sseStageUpdate(res, {
+            stage: "Script",
+            scene_index: Number(scene.index || sceneCount) - 1,
+            scene_json: scene,
+            progress: Math.min(0.9, sceneCount / 3),
+            message: "scene_generated",
+          });
+        } catch {
+          sseSend(res, "line", line);
+        }
+      } else if (line.startsWith("PROG ")) {
+        try {
+          const p = JSON.parse(line.slice("PROG ".length));
+          sseStageUpdate(res, { stage: "Script", progress: Number(p.p || 0), message: p.msg || "" });
+        } catch {
+          sseSend(res, "line", line);
+        }
+      } else {
+        sseSend(res, "line", line);
+      }
     }
   });
 
   child.stderr.setEncoding("utf8");
   child.stderr.on("data", (chunk) => {
-    sseSend(res, "error", chunk);
+    // stderr can contain non-fatal logs/warnings; keep as a normal line event.
+    sseSend(res, "line", "[stderr] " + chunk);
   });
 
   req.on("close", () => {
@@ -156,6 +220,7 @@ app.get("/api/script", (req, res) => {
 
   child.on("exit", (code) => {
     if (buf.trim()) sseSend(res, "line", buf.trim());
+    if (code !== 0) sseSend(res, "error", `[ERROR] python exit_code=${code}`);
     sseSend(res, "done", String(code != null ? code : 0));
     res.end();
   });
@@ -170,6 +235,9 @@ app.post("/api/refine", (req, res) => {
   const globalStyle = String((req.body && req.body.global_style) || "").trim();
   const character = String((req.body && req.body.character) || "").trim();
   const configPath = String((req.body && req.body.config) || "./configs/config.yaml");
+  const llmProvider = String((req.body && req.body.llm_provider) || "").trim();
+  const imageProvider = String((req.body && req.body.image_provider) || "").trim();
+  const imageFallbackProvider = String((req.body && req.body.image_fallback_provider) || "").trim();
   const taskId = String((req.body && req.body.task_id) || "").trim() || newTaskId();
 
   if (!prompt) return res.status(400).json({ error: "missing prompt" });
@@ -178,6 +246,9 @@ app.post("/api/refine", (req, res) => {
     return res.status(400).json({ error: "missing scene or scenes[]" });
   }
   ensureTaskDir(taskId);
+  sseHeaders(res);
+  sseSend(res, "task", JSON.stringify({ task_id: taskId }));
+  sseStageUpdate(res, { stage: "Refine", progress: 0.05, message: "refine_start" });
 
   const child = spawnPythonStep({
     step: "refine",
@@ -188,6 +259,9 @@ app.post("/api/refine", (req, res) => {
     character,
     taskId,
     sceneIndex,
+    llmProvider,
+    imageProvider,
+    imageFallbackProvider,
   });
   if (Array.isArray(scenes)) {
     child.stdin.end(JSON.stringify({ scenes }));
@@ -197,19 +271,53 @@ app.post("/api/refine", (req, res) => {
 
   let out = "";
   let err = "";
+  let buf = "";
   child.stdout.setEncoding("utf8");
   child.stderr.setEncoding("utf8");
-  child.stdout.on("data", (c) => (out += c));
+  child.stdout.on("data", (chunk) => {
+    out += chunk;
+    buf += chunk;
+    const parts = buf.split(/\r?\n/);
+    buf = parts.pop() || "";
+    for (const line of parts) {
+      if (!line) continue;
+      if (line.startsWith("SCENE_JSON ")) {
+        try {
+          const scenePayload = JSON.parse(line.slice("SCENE_JSON ".length));
+          sseStageUpdate(res, {
+            stage: "Refine",
+            progress: 1,
+            scene_index: Number(scenePayload.index || sceneIndex) - 1,
+            scene_json: scenePayload,
+            message: "scene_refined",
+          });
+        } catch {
+          sseSend(res, "line", line);
+        }
+      } else if (line.startsWith("PROG ")) {
+        try {
+          const p = JSON.parse(line.slice("PROG ".length));
+          sseStageUpdate(res, { stage: "Refine", progress: Number(p.p || 0), message: p.msg || "" });
+        } catch {
+          sseSend(res, "line", line);
+        }
+      } else {
+        sseSend(res, "line", line);
+      }
+    }
+  });
   child.stderr.on("data", (c) => (err += c));
   child.on("exit", (code) => {
-    if (code !== 0) return res.status(500).json({ error: "python failed", stderr: err, stdout: out });
-    const line = out
-      .split(/\r?\n/)
-      .map((s) => s.trim())
-      .find((s) => s.startsWith("SCENE_JSON "));
-    if (!line) return res.status(500).json({ error: "no SCENE_JSON", stderr: err, stdout: out });
-    const payload = JSON.parse(line.slice("SCENE_JSON ".length));
-    return res.json({ task_id: taskId, scene: payload, stderr: err });
+    if (buf.trim()) sseSend(res, "line", buf.trim());
+    if (err.trim()) sseSend(res, "line", "[stderr] " + err.trim());
+    if (code !== 0) {
+      sseStageUpdate(res, { stage: "Refine", progress: null, message: `refine_failed(exit=${code})` });
+      sseSend(res, "error", `[ERROR] python exit_code=${code}`);
+      return res.end();
+    }
+    sseStageUpdate(res, { stage: "Refine", progress: 1, message: "refine_done" });
+    sseSend(res, "done", JSON.stringify({ exit_code: code != null ? code : 0, task_id: taskId }));
+    return res.end();
   });
 });
 
@@ -222,6 +330,9 @@ app.post("/api/render", (req, res) => {
   const globalStyle = String((req.body && req.body.global_style) || "").trim();
   const character = String((req.body && req.body.character) || "").trim();
   const configPath = String((req.body && req.body.config) || "./configs/config.yaml");
+  const llmProvider = String((req.body && req.body.llm_provider) || "").trim();
+  const imageProvider = String((req.body && req.body.image_provider) || "").trim();
+  const imageFallbackProvider = String((req.body && req.body.image_fallback_provider) || "").trim();
   const taskId = String((req.body && req.body.task_id) || "").trim() || newTaskId();
 
   if (!prompt) return res.status(400).json({ error: "missing prompt" });
@@ -245,6 +356,9 @@ app.post("/api/render", (req, res) => {
     globalStyle,
     character,
     taskId,
+    llmProvider,
+    imageProvider,
+    imageFallbackProvider,
   });
   child.stdin.end(JSON.stringify({ scenes }));
 
@@ -258,14 +372,26 @@ app.post("/api/render", (req, res) => {
     buf = parts.pop() || "";
     for (const line of parts) {
       if (!line) continue;
-      if (line.startsWith("PROG ")) sseSend(res, "prog", line.slice("PROG ".length));
-      else if (line.startsWith("PROG_SHOT ")) {
+      if (line.startsWith("PROG ")) {
+        try {
+          const p = JSON.parse(line.slice("PROG ".length));
+          sseStageUpdate(res, { stage: "Render", progress: Number(p.p || 0), message: p.msg || "" });
+        } catch {
+          sseSend(res, "line", line);
+        }
+      } else if (line.startsWith("PROG_SHOT ")) {
         const rest = line.slice("PROG_SHOT ".length).trim();
         const firstSpace = rest.indexOf(" ");
         if (firstSpace > 0) {
           const shotId = rest.slice(0, firstSpace).trim();
           const status = rest.slice(firstSpace + 1).trim();
-          sseSend(res, "shot_progress", JSON.stringify({ shot_id: shotId, status }));
+          sseStageUpdate(res, {
+            stage: "Render",
+            progress: null,
+            shot_id: shotId,
+            shot_status: status,
+            message: "shot_progress",
+          });
         } else {
           sseSend(res, "line", line);
         }
@@ -276,7 +402,8 @@ app.post("/api/render", (req, res) => {
   });
 
   child.stderr.on("data", (chunk) => {
-    sseSend(res, "error", chunk);
+    // stderr can contain non-fatal logs/warnings; keep as a normal line event.
+    sseSend(res, "line", "[stderr] " + chunk);
   });
 
   req.on("close", () => {
diff --git a/server/public/index.html b/server/public/index.html
index 66df199..c4bb3e4 100644
--- a/server/public/index.html
+++ b/server/public/index.html
@@ -63,78 +63,82 @@
         const [globalStyle, setGlobalStyle] = useState("电影感");
         const [characterPreset, setCharacterPreset] = useState("");
         const [mock, setMock] = useState(true);
-        const [logs, setLogs] = useState("");
-        const [scenes, setScenes] = useState([null, null, null]);
+        const [llmProvider, setLlmProvider] = useState("mock");
+        const [imageProvider, setImageProvider] = useState("mock");
+        const [imageFallbackProvider, setImageFallbackProvider] = useState("mock");
+        const [scenes, setScenes] = useState([]);
+        const [stageLogs, setStageLogs] = useState({ Script: [], Refine: [], Render: [] });
+        const [stageState, setStageState] = useState({
+          Script: "pending",
+          Refine: "pending",
+          Render: "pending",
+        });
+        const [activeLogStage, setActiveLogStage] = useState("Script");
+        const [renderProgress, setRenderProgress] = useState(0);
         const [canRender, setCanRender] = useState(false);
         const [finalVideoUrl, setFinalVideoUrl] = useState("");
         const [taskId, setTaskId] = useState("");
         const [toast, setToast] = useState("");
+        const [scriptRunning, setScriptRunning] = useState(false);
 
         const esRef = useRef(null);
         const logRef = useRef(null);
+        const autoTimerRef = useRef(null);
 
-        const appendLog = (line) => {
-          setLogs((prev) => prev + line + "\n");
+        const appendStageLog = (stage, line) => {
+          setStageLogs((prev) => ({ ...prev, [stage]: [...(prev[stage] || []), String(line)] }));
         };
 
         const showToast = (msg) => {
           setToast(String(msg || "发生错误"));
-          // auto hide
           setTimeout(() => setToast(""), 6000);
         };
 
         useEffect(() => {
           if (!logRef.current) return;
           logRef.current.scrollTop = logRef.current.scrollHeight;
-        }, [logs]);
+        }, [stageLogs]);
 
-        const startScript = () => {
-          stopScript();
-          setLogs("");
-          setScenes([null, null, null]);
-          setCanRender(false);
-          setFinalVideoUrl("");
-          setTaskId("");
+        const normalizeScene = (raw, idx) => {
+          const s = raw || {};
+          return {
+            index: Number(s.index || idx + 1),
+            image_prompt: String(s.image_prompt || ""),
+            video_motion: String(s.video_motion || ""),
+            narration: String(s.narration || ""),
+            preview_url: String(s.preview_url || ""),
+            motion_camera: String(s.motion_camera || ""),
+            motion_direction: String(s.motion_direction || ""),
+            motion_speed: String(s.motion_speed || "normal"),
+          };
+        };
 
-          const url = `/api/script?prompt=${encodeURIComponent(prompt.trim())}&mock=${mock ? "1" : "0"}&global_style=${encodeURIComponent(globalStyle)}&character=${encodeURIComponent(characterPreset)}`;
-          const es = new EventSource(url);
-          esRef.current = es;
+        const parseStageUpdate = (raw) => {
+          if (!raw || typeof raw !== "object") return null;
+          if (Number(raw.schema_version) !== 1) return null;
+          const stage = String(raw.stage || "");
+          if (!["Script", "Refine", "Render"].includes(stage)) return null;
+          return raw;
+        };
 
-          es.addEventListener("status", (e) => appendLog("[status] " + e.data));
-          es.addEventListener("error", (e) => {
-            const m = (e && e.data) ? e.data : "连接或后端错误";
-            appendLog("[ERROR] " + m);
-            showToast(m);
-          });
-          es.addEventListener("task", (e) => {
-            try { setTaskId(JSON.parse(e.data).task_id || ""); } catch (err) { }
-          });
-          es.addEventListener("done", (e) => {
-            appendLog("[done] exit_code=" + e.data);
-            stopScript();
-          });
-          es.addEventListener("scene", (e) => {
-            try {
-              const obj = JSON.parse(e.data);
-              setScenes((prev) => {
-                const next = [...prev];
-                next[obj.index - 1] = {
-                  index: obj.index,
-                  image_prompt: obj.image_prompt || "",
-                  video_motion: obj.video_motion || "",
-                  narration: obj.narration || "",
-                };
-                return next;
-              });
-            } catch (err) {
-              appendLog("[parse_error] " + err);
-            }
-          });
-          es.addEventListener("line", (e) => {
-            appendLog(e.data);
-            if (e.data === "SCRIPT_END") setCanRender(true);
-          });
-          es.onerror = () => appendLog("[error] connection error");
+        const applyStageUpdate = (u) => {
+          const stage = String(u.stage || "Script");
+          setStageState((prev) => ({ ...prev, [stage]: prev[stage] === "done" ? "done" : "running" }));
+          if (typeof u.progress === "number" && stage === "Render") {
+            setRenderProgress(Math.max(0, Math.min(1, u.progress)));
+          }
+          if (u.scene_json) {
+            const scene = u.scene_json;
+            const idx = Math.max(0, Number(scene.index || 1) - 1);
+            setScenes((prev) => {
+              const next = [...prev];
+              next[idx] = normalizeScene(scene, idx);
+              return next;
+            });
+            setCanRender(true);
+          }
+          if (u.message) appendStageLog(stage, u.message);
+          if (u.shot_id && u.shot_status) appendStageLog(stage, `${u.shot_id} -> ${u.shot_status}`);
         };
 
         const stopScript = () => {
@@ -142,68 +146,116 @@
             esRef.current.close();
             esRef.current = null;
           }
+          setScriptRunning(false);
+        };
+
+        const startScript = () => {
+          const p = prompt.trim();
+          if (!p) return;
+          stopScript();
+          setScriptRunning(true);
+          setCanRender(false);
+          setFinalVideoUrl("");
+          setRenderProgress(0);
+          setScenes([]);
+          setStageLogs({ Script: [], Refine: [], Render: [] });
+          setStageState({ Script: "running", Refine: "pending", Render: "pending" });
+          setActiveLogStage("Script");
+
+          const q = new URLSearchParams({
+            prompt: p,
+            mock: mock ? "1" : "0",
+            global_style: globalStyle,
+            character: characterPreset,
+            llm_provider: llmProvider,
+            image_provider: imageProvider,
+            image_fallback_provider: imageFallbackProvider,
+          });
+          const es = new EventSource(`/api/script?${q.toString()}`);
+          esRef.current = es;
+
+          es.addEventListener("task", (e) => {
+            try { setTaskId(JSON.parse(e.data).task_id || ""); } catch (err) { }
+          });
+          es.addEventListener("stage_update", (e) => {
+            try {
+              const parsed = parseStageUpdate(JSON.parse(e.data));
+              if (!parsed) return appendStageLog("Script", "[schema_error] invalid stage_update payload");
+              applyStageUpdate(parsed);
+            } catch (err) { appendStageLog("Script", "[parse_error] " + err); }
+          });
+          es.addEventListener("line", (e) => appendStageLog("Script", e.data));
+          es.addEventListener("error", (e) => {
+            setStageState((prev) => ({ ...prev, Script: "failed" }));
+            showToast((e && e.data) ? e.data : "script连接错误");
+          });
+          es.addEventListener("done", () => {
+            stopScript();
+            setCanRender(true);
+            setStageState((prev) => ({ ...prev, Script: "done" }));
+          });
+        };
+
+        useEffect(() => {
+          if (autoTimerRef.current) clearTimeout(autoTimerRef.current);
+          autoTimerRef.current = setTimeout(() => {
+            if (!prompt.trim()) return;
+            startScript();
+          }, 700);
+          return () => {
+            if (autoTimerRef.current) clearTimeout(autoTimerRef.current);
+          };
+        }, [prompt, globalStyle, characterPreset, llmProvider, imageProvider, imageFallbackProvider, mock]);
+
+        const updateMotionField = (idx, field, value) => {
+          setScenes((prev) => {
+            const next = [...prev];
+            const cur = normalizeScene(next[idx], idx);
+            const merged = { ...cur, [field]: value };
+            merged.video_motion = [merged.motion_camera, merged.motion_direction, merged.motion_speed].filter(Boolean).join(" | ");
+            next[idx] = merged;
+            return next;
+          });
         };
 
         const onEdit = (idx, field, value) => {
           setScenes((prev) => {
             const next = [...prev];
-            const cur = next[idx] || { index: idx + 1, image_prompt: "", video_motion: "", narration: "" };
+            const cur = normalizeScene(next[idx], idx);
             next[idx] = { ...cur, [field]: value };
             return next;
           });
         };
 
         const refineOne = async (sceneIndex) => {
-          appendLog(`[refine] scene ${sceneIndex}...`);
+          setStageState((prev) => ({ ...prev, Refine: "running" }));
+          appendStageLog("Refine", `scene ${sceneIndex} refining...`);
           const s0 = scenes[sceneIndex - 1] || {};
-          const payloadScene = {
-            image_prompt: s0.image_prompt || "",
-            video_motion: s0.video_motion || "",
-            narration: s0.narration || "",
-          };
           const resp = await fetch("/api/refine", {
             method: "POST",
             headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ prompt, scene: payloadScene, scene_index: sceneIndex, mock, global_style: globalStyle, character: characterPreset, task_id: taskId }),
-          });
-          const data = await resp.json();
-          if (!resp.ok) {
-            appendLog("[refine_error] " + JSON.stringify(data));
-            showToast((data && (data.error || data.msg)) || "润色失败");
-            return;
-          }
-          const s = data.scene;
-          setScenes((prev) => {
-            const next = [...prev];
-            next[s.index - 1] = {
-              index: s.index,
-              image_prompt: s.image_prompt || "",
-              video_motion: s.video_motion || "",
-              narration: s.narration || "",
-            };
-            return next;
-          });
-          appendLog(`[refine] scene ${sceneIndex} done`);
-        };
-
-        const renderVideo = async () => {
-          appendLog("[render] start...");
-          const payloadScenes = scenes.map((s, i) => ({
-            image_prompt: (s && s.image_prompt) || "",
-            video_motion: (s && s.video_motion) || "",
-            narration: (s && s.narration) || "",
-          }));
-          const resp = await fetch("/api/render", {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({ prompt, scenes: payloadScenes, mock, global_style: globalStyle, character: characterPreset, task_id: taskId }),
+            body: JSON.stringify({
+              prompt,
+              scene: {
+                image_prompt: s0.image_prompt || "",
+                video_motion: s0.video_motion || "",
+                narration: s0.narration || "",
+              },
+              scene_index: sceneIndex,
+              mock,
+              global_style: globalStyle,
+              character: characterPreset,
+              task_id: taskId,
+              llm_provider: llmProvider,
+              image_provider: imageProvider,
+              image_fallback_provider: imageFallbackProvider,
+            }),
           });
           if (!resp.ok) {
-            appendLog("[render_error] http " + resp.status);
-            showToast("渲染请求失败（HTTP " + resp.status + "）");
+            setStageState((prev) => ({ ...prev, Refine: "failed" }));
+            showToast("润色请求失败（HTTP " + resp.status + "）");
             return;
           }
-          // Parse SSE from fetch (POST)
           const reader = resp.body.getReader();
           const decoder = new TextDecoder("utf-8");
           let buf = "";
@@ -224,40 +276,146 @@
               const data = dataLines.join("\n");
               if (event === "task") {
                 try { setTaskId(JSON.parse(data).task_id || ""); } catch (err) { }
-              } else if (event === "prog") {
-                appendLog("[prog] " + data);
+              } else if (event === "stage_update") {
+                try {
+                  const parsed = parseStageUpdate(JSON.parse(data));
+                  if (!parsed) return appendStageLog("Refine", "[schema_error] invalid stage_update payload");
+                  applyStageUpdate(parsed);
+                } catch (err) { appendStageLog("Refine", "[parse_error] " + err); }
               } else if (event === "error") {
-                appendLog("[ERROR] " + data);
+                appendStageLog("Refine", "[ERROR] " + data);
+                setStageState((prev) => ({ ...prev, Refine: "failed" }));
+                showToast(data);
+              } else if (event === "done") {
+                appendStageLog("Refine", `scene ${sceneIndex} done`);
+                setStageState((prev) => ({ ...prev, Refine: "done" }));
+              } else if (event === "line") {
+                appendStageLog("Refine", data);
+              }
+            }
+          }
+        };
+
+        const renderVideo = async () => {
+          setStageState((prev) => ({ ...prev, Render: "running" }));
+          setRenderProgress(0);
+          appendStageLog("Render", "render start...");
+          const payloadScenes = scenes.map((s, i) => ({
+            image_prompt: (s && s.image_prompt) || "",
+            video_motion: (s && s.video_motion) || "",
+            narration: (s && s.narration) || "",
+          }));
+          const resp = await fetch("/api/render", {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              prompt,
+              scenes: payloadScenes,
+              mock,
+              global_style: globalStyle,
+              character: characterPreset,
+              task_id: taskId,
+              llm_provider: llmProvider,
+              image_provider: imageProvider,
+              image_fallback_provider: imageFallbackProvider,
+            }),
+          });
+          if (!resp.ok) {
+            setStageState((prev) => ({ ...prev, Render: "failed" }));
+            showToast("渲染请求失败（HTTP " + resp.status + "）");
+            return;
+          }
+          const reader = resp.body.getReader();
+          const decoder = new TextDecoder("utf-8");
+          let buf = "";
+          while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            buf += decoder.decode(value, { stream: true });
+            const chunks = buf.split("\n\n");
+            buf = chunks.pop() || "";
+            for (const c of chunks) {
+              const lines = c.split("\n").filter(Boolean);
+              let event = "message";
+              const dataLines = [];
+              for (const line of lines) {
+                if (line.startsWith("event:")) event = line.slice(6).trim();
+                else if (line.startsWith("data:")) dataLines.push(line.slice(5).trim());
+              }
+              const data = dataLines.join("\n");
+              if (event === "task") {
+                try { setTaskId(JSON.parse(data).task_id || ""); } catch (err) { }
+              } else if (event === "stage_update") {
+                try {
+                  const parsed = parseStageUpdate(JSON.parse(data));
+                  if (!parsed) return appendStageLog("Render", "[schema_error] invalid stage_update payload");
+                  applyStageUpdate(parsed);
+                } catch (err) { appendStageLog("Render", "[parse_error] " + err); }
+              } else if (event === "error") {
+                appendStageLog("Render", "[ERROR] " + data);
+                setStageState((prev) => ({ ...prev, Render: "failed" }));
                 showToast(data);
               } else if (event === "done") {
                 try {
                   const obj = JSON.parse(data);
                   const file = String(obj.output || "").split("/").pop() || "final.mp4";
                   const tid = taskId || (obj.task_id || "");
-                  appendLog("[render] done: " + file);
                   if (tid) setFinalVideoUrl(`/api/static/${encodeURIComponent(tid)}/${encodeURIComponent(file)}?t=${Date.now()}`);
+                  setRenderProgress(1);
+                  setStageState((prev) => ({ ...prev, Render: "done" }));
                 } catch (e) {
-                  appendLog("[render_done_parse_error] " + e);
+                  setStageState((prev) => ({ ...prev, Render: "failed" }));
                   showToast("渲染完成消息解析失败");
                 }
-              } else {
-                appendLog(data);
+              } else if (event === "line") {
+                appendStageLog("Render", data);
               }
             }
           }
         };
 
+        const activeLogs = useMemo(() => (stageLogs[activeLogStage] || []).join("\n"), [stageLogs, activeLogStage]);
+        const stageColor = (s) => {
+          if (s === "done") return "#16a34a";
+          if (s === "running") return "#2563eb";
+          if (s === "failed") return "#dc2626";
+          return "#9ca3af";
+        };
+
         return (
           <div>
-            <h2>AiVideo POC：双向交互手搓平台</h2>
-            <p className="muted">分镜可编辑、可单条润色，渲染完成后可直接预览与下载。</p>
+            <h2>AiVideo POC：多后端模型平台</h2>
+            <p className="muted">自动拆分 Prompt，SSE 实时反馈，Render 全链路可视化。</p>
 
             <div className="row">
-              <input
-                type="text"
-                value={prompt}
-                onChange={(e) => setPrompt(e.target.value)}
-              />
+              <input type="text" value={prompt} onChange={(e) => setPrompt(e.target.value)} />
+            </div>
+
+            <div className="row">
+              <label>LLM Provider:
+                <select value={llmProvider} onChange={(e) => setLlmProvider(e.target.value)} style={{ marginLeft: 6, padding: 8 }}>
+                  <option value="mock">mock</option>
+                  <option value="openai">openai</option>
+                </select>
+              </label>
+              <label>Image Provider:
+                <select value={imageProvider} onChange={(e) => setImageProvider(e.target.value)} style={{ marginLeft: 6, padding: 8 }}>
+                  <option value="mock">mock</option>
+                  <option value="comfy">comfy</option>
+                  <option value="replicate">replicate</option>
+                  <option value="openai">openai</option>
+                </select>
+              </label>
+              <label>Fallback:
+                <select value={imageFallbackProvider} onChange={(e) => setImageFallbackProvider(e.target.value)} style={{ marginLeft: 6, padding: 8 }}>
+                  <option value="mock">mock</option>
+                  <option value="comfy">comfy</option>
+                </select>
+              </label>
+              <label className="row" style={{ gap: 6 }}>
+                <input type="checkbox" checked={mock} onChange={(e) => setMock(e.target.checked)} />
+                mock flag
+              </label>
             </div>
 
             <div className="row">
@@ -271,26 +429,42 @@
               </label>
               <label className="row" style={{ gap: 6 }}>
                 Character Preset:
-                <input
-                  type="text"
-                  value={characterPreset}
-                  onChange={(e) => setCharacterPreset(e.target.value)}
-                  placeholder="例如：黑发短发、穿风衣的年轻侦探、冷静目光"
-                  style={{ width: "min(640px, 100%)", padding: "10px 12px", fontSize: 14 }}
-                />
+                <input type="text" value={characterPreset} onChange={(e) => setCharacterPreset(e.target.value)} style={{ width: "min(520px, 100%)", padding: "10px 12px", fontSize: 14 }} />
               </label>
-              <label className="row" style={{ gap: 6 }}>
-                <input type="checkbox" checked={mock} onChange={(e) => setMock(e.target.checked)} />
-                mock（无 ComfyUI / 无 Key 也能跑）
-              </label>
-              <button onClick={startScript}>生成分镜</button>
+              <button onClick={startScript}>立即重生分镜</button>
               <button onClick={stopScript}>停止</button>
-              {canRender ? (
-                <button onClick={renderVideo}>确认并开始渲染视频</button>
-              ) : null}
+              {canRender ? <button onClick={renderVideo}>开始渲染</button> : null}
               {taskId ? <span className="muted">task_id: {taskId}</span> : null}
             </div>
 
+            <div className="row" style={{ marginTop: 10, alignItems: "stretch" }}>
+              {["Script", "Refine", "Render"].map((s) => (
+                <button
+                  key={s}
+                  onClick={() => setActiveLogStage(s)}
+                  style={{
+                    flex: 1,
+                    border: `1px solid ${stageColor(stageState[s])}`,
+                    borderRadius: 10,
+                    padding: 10,
+                    textAlign: "left",
+                    background: activeLogStage === s ? "#eef2ff" : "#fff",
+                    cursor: "pointer",
+                  }}
+                >
+                  <div style={{ fontWeight: 700 }}>{s}</div>
+                  <div className="muted" style={{ color: stageColor(stageState[s]) }}>{stageState[s]}</div>
+                </button>
+              ))}
+            </div>
+
+            <div style={{ marginTop: 10 }}>
+              <div className="muted">Render Progress: {(renderProgress * 100).toFixed(0)}%</div>
+              <div style={{ width: "100%", height: 10, background: "#e5e7eb", borderRadius: 8, overflow: "hidden" }}>
+                <div style={{ width: `${Math.max(0, Math.min(100, renderProgress * 100))}%`, height: "100%", background: "#2563eb" }} />
+              </div>
+            </div>
+
             <div className="scenes">
               {scenes.map((s, idx) => (
                 <div className="card" key={idx}>
@@ -298,21 +472,27 @@
                     <strong>Scene {idx + 1}</strong>
                     <button style={{ padding: "6px 10px" }} onClick={() => refineOne(idx + 1)}>🔄 重新润色</button>
                   </div>
+                  {s && s.preview_url ? (
+                    <div style={{ marginTop: 8 }}>
+                      <img src={s.preview_url + `?t=${Date.now()}`} style={{ width: "100%", borderRadius: 8 }} />
+                    </div>
+                  ) : null}
                   <div className="k">image_prompt</div>
-                  <textarea rows="3" style={{ width: "100%", padding: 8 }}
-                    value={(s && s.image_prompt) || ""}
-                    onChange={(e) => onEdit(idx, "image_prompt", e.target.value)}
-                  />
-                  <div className="k">video_motion</div>
-                  <textarea rows="2" style={{ width: "100%", padding: 8 }}
-                    value={(s && s.video_motion) || ""}
-                    onChange={(e) => onEdit(idx, "video_motion", e.target.value)}
-                  />
+                  <textarea rows="3" style={{ width: "100%", padding: 8 }} value={(s && s.image_prompt) || ""} onChange={(e) => onEdit(idx, "image_prompt", e.target.value)} />
+                  <div className="k">motion</div>
+                  <div className="row">
+                    <input type="text" placeholder="camera(如 push-in)" value={(s && s.motion_camera) || ""} onChange={(e) => updateMotionField(idx, "motion_camera", e.target.value)} style={{ padding: 8 }} />
+                    <input type="text" placeholder="direction(如 left->right)" value={(s && s.motion_direction) || ""} onChange={(e) => updateMotionField(idx, "motion_direction", e.target.value)} style={{ padding: 8 }} />
+                    <select value={(s && s.motion_speed) || "normal"} onChange={(e) => updateMotionField(idx, "motion_speed", e.target.value)} style={{ padding: 8 }}>
+                      <option value="slow">slow</option>
+                      <option value="normal">normal</option>
+                      <option value="fast">fast</option>
+                    </select>
+                  </div>
+                  <div className="k">video_motion (raw)</div>
+                  <textarea rows="2" style={{ width: "100%", padding: 8 }} value={(s && s.video_motion) || ""} onChange={(e) => onEdit(idx, "video_motion", e.target.value)} />
                   <div className="k">narration</div>
-                  <textarea rows="2" style={{ width: "100%", padding: 8 }}
-                    value={(s && s.narration) || ""}
-                    onChange={(e) => onEdit(idx, "narration", e.target.value)}
-                  />
+                  <textarea rows="2" style={{ width: "100%", padding: 8 }} value={(s && s.narration) || ""} onChange={(e) => onEdit(idx, "narration", e.target.value)} />
                 </div>
               ))}
             </div>
@@ -323,19 +503,16 @@
                 <div>
                   <video controls src={finalVideoUrl}></video>
                   <div className="row" style={{ marginTop: 10 }}>
-                    <a href={finalVideoUrl} download>
-                      <button>下载视频</button>
-                    </a>
+                    <a href={finalVideoUrl} download><button>下载 final.mp4</button></a>
                   </div>
-                  <div className="muted">URL: {finalVideoUrl}</div>
                 </div>
               ) : (
                 <div className="muted">尚未渲染完成。</div>
               )}
             </div>
 
-            <h3>原始日志（stdout/stderr）</h3>
-            <pre ref={logRef}>{logs}</pre>
+            <h3>分阶段日志：{activeLogStage}</h3>
+            <pre ref={logRef}>{activeLogs}</pre>
 
             {toast ? (
               <div className="toast" role="alert">