diff --git a/Dockerfile b/Dockerfile
index 8699e45..6154e4f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
-FROM python:3.10-slim
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 AS builder
 
-ENV PYTHONDONTWRITEBYTECODE=1 \
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     PIP_DISABLE_PIP_VERSION_CHECK=1 \
     PIP_NO_CACHE_DIR=1 \
@@ -10,26 +11,53 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 
 WORKDIR /app
 
-# ffmpeg is required for MoviePy (audio duration + encoding).
-RUN if [ -f /etc/apt/sources.list ]; then \
-      sed -i 's|http://deb.debian.org/debian|https://mirrors.tuna.tsinghua.edu.cn/debian|g; s|http://security.debian.org/debian-security|https://mirrors.tuna.tsinghua.edu.cn/debian-security|g' /etc/apt/sources.list; \
-    fi \
-  && if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
-      sed -i 's|http://deb.debian.org/debian|https://mirrors.tuna.tsinghua.edu.cn/debian|g; s|http://security.debian.org/debian-security|https://mirrors.tuna.tsinghua.edu.cn/debian-security|g' /etc/apt/sources.list.d/debian.sources; \
-    fi \
-  && apt-get update && apt-get install -y --no-install-recommends \
-    ffmpeg \
-    fonts-dejavu-core \
-    nodejs \
-    npm \
-  && rm -rf /var/lib/apt/lists/*
+# Base deps + Python 3.10 + Node.js 20.x
+RUN sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g; s|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list \
+    && apt-get -o Acquire::Retries=5 update \
+    && apt-get -o Acquire::Retries=5 install -y --no-install-recommends --fix-missing \
+      ca-certificates curl gnupg \
+      python3.10 python3.10-distutils python3-pip \
+      ffmpeg fonts-dejavu-core \
+    && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
+    && apt-get -o Acquire::Retries=5 install -y --no-install-recommends --fix-missing nodejs \
+    && ln -sf /usr/bin/python3.10 /usr/local/bin/python \
+    && rm -rf /var/lib/apt/lists/*
 
 COPY requirements.txt /app/requirements.txt
-RUN pip install -r /app/requirements.txt
+RUN python3.10 -m pip install -r /app/requirements.txt
+
+COPY server/package.json server/package-lock.json /app/server/
+RUN cd /app/server && npm ci --omit=dev
 
 COPY . /app
 
-RUN cd /app/server && npm i --omit=dev
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 AS runtime
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
+    PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn \
+    NPM_CONFIG_REGISTRY=https://registry.npmmirror.com
+
+WORKDIR /app
+
+RUN sed -i 's|http://archive.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g; s|http://security.ubuntu.com/ubuntu|https://mirrors.tuna.tsinghua.edu.cn/ubuntu|g' /etc/apt/sources.list \
+    && apt-get -o Acquire::Retries=5 update \
+    && apt-get -o Acquire::Retries=5 install -y --no-install-recommends --fix-missing \
+      ca-certificates \
+      python3.10 python3.10-distutils python3-pip \
+      ffmpeg fonts-dejavu-core \
+    && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
+    && apt-get -o Acquire::Retries=5 install -y --no-install-recommends --fix-missing nodejs \
+    && ln -sf /usr/bin/python3.10 /usr/local/bin/python \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
+COPY --from=builder /usr/local/bin /usr/local/bin
+COPY --from=builder /app /app
 
 EXPOSE 3000
 CMD ["node", "/app/server/index.js"]
diff --git a/README.md b/README.md
index 3013f81..e293042 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
 - Output: a 3-scene narrated video `final_poc.mp4` (mock mode supported)
 
 ## Quick start (Docker)
+`docker compose up` includes a **ComfyUI** service (default image `jamesbrink/comfyui:latest` from Docker Hub). If you use another registry image, set `COMFYUI_IMAGE` in the environment.
+
 Build:
 
 ```bash
diff --git a/configs/config.yaml b/configs/config.yaml
index 4d4f361..be2485d 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -1,6 +1,6 @@
 app:
-  # ComfyUI base url (local)
-  comfy_base_url: "http://127.0.0.1:8188"
+  # ComfyUI base url (docker internal service)
+  comfy_base_url: "http://comfyui:8188"
   # ComfyUI output directory on the same machine running this code
   comfy_output_dir: "./ComfyUI/output"
 
@@ -26,7 +26,7 @@ tts:
 
 video:
   # Final output path
-  final_output: "./final_poc.mp4"
+  final_output: "./outputs/final_poc.mp4"
   # If ComfyUI is not ready, generate mock clips with this size & fps
   mock_size: [1024, 576]
   mock_fps: 24
diff --git a/dev.sh b/dev.sh
index c9c6961..f9bec04 100755
--- a/dev.sh
+++ b/dev.sh
@@ -18,7 +18,29 @@ shift || true
 
 case "$CMD" in
   up)
-    docker compose up --build "$@"
+    # Start in background, then wait for Node self-check + health endpoint.
+    docker compose up -d --build "$@"
+    echo "[dev] waiting for server health..."
+    deadline=$((SECONDS + 90))
+    ok=0
+    while [ $SECONDS -lt $deadline ]; do
+      if curl -fsS "http://127.0.0.1:3000/api/health" >/dev/null 2>&1; then
+        ok=1
+        break
+      fi
+      # If container exited, fail fast.
+      if ! docker compose ps --status running | grep -q "aivideo"; then
+        break
+      fi
+      sleep 1
+    done
+    if [ "$ok" -ne 1 ]; then
+      echo "[dev] server failed to become healthy (self-check likely failed)." >&2
+      docker compose logs --tail=200 aivideo || true
+      exit 1
+    fi
+    echo "[dev] server ready: http://127.0.0.1:3000"
+    docker compose logs -f --tail=50 aivideo
     ;;
   rebuild)
     docker compose build "$@"
diff --git a/docker-compose.yml b/docker-compose.yml
index cf09164..1eb1b46 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,8 @@ services:
   aivideo:
     build: .
     working_dir: /app
+    depends_on:
+      - comfyui
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - OPENAI_BASE_URL=${OPENAI_BASE_URL}
@@ -10,5 +12,18 @@ services:
       - ./:/app
     ports:
       - "3000:3000"
-    # On macOS, use host.docker.internal to reach host services like ComfyUI.
-    # Example: set app.comfy_base_url in configs/config.yaml to http://host.docker.internal:8188
+
+  # Default: Docker Hub (anonymous pull). GHCR comfyanonymous image often returns "denied" without login.
+  # Override: COMFYUI_IMAGE=ghcr.io/... after `docker login ghcr.io`
+  comfyui:
+    image: ${COMFYUI_IMAGE:-jamesbrink/comfyui:latest}
+    environment:
+      - CLI_ARGS=--listen 0.0.0.0 --port 8188
+    ports:
+      - "8188:8188"
+    volumes:
+      - ./ComfyUI/user:/comfyui/user
+      - ./ComfyUI/models:/comfyui/models
+      - ./ComfyUI/custom_nodes:/comfyui/custom_nodes
+      - ./ComfyUI/output:/comfyui/output
+      - ./ComfyUI/input:/comfyui/input
diff --git a/engine/main.py b/engine/main.py
new file mode 100644
index 0000000..2b05295
--- /dev/null
+++ b/engine/main.py
@@ -0,0 +1,354 @@
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import random
+import sys
+from pathlib import Path
+from typing import Any
+
+from moviepy import ImageClip
+from PIL import Image, ImageDraw, ImageFont
+
+from engine.audio_gen import synthesize_scenes
+from engine.comfy_client import ComfyClient
+from engine.config import AppConfig
+from engine.script_gen import generate_scenes, refine_scene
+from engine.types import Scene
+from engine.video_editor import Segment, render_final
+
+
+def _emit(line: str) -> None:
+    print(line, flush=True)
+
+
+def _emit_scene(scene_idx: int, scene: Scene) -> None:
+    payload = {
+        "index": scene_idx,
+        "image_prompt": scene.image_prompt,
+        "video_motion": scene.video_motion,
+        "narration": scene.narration,
+    }
+    _emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
+
+
+def _ensure_mock_image(path: Path, size: tuple[int, int]) -> Path:
+    if path.exists():
+        return path
+    path.parent.mkdir(parents=True, exist_ok=True)
+    img = Image.new("RGB", size, color=(20, 24, 33))
+    draw = ImageDraw.Draw(img)
+    text = "MOCK"
+    try:
+        font = ImageFont.load_default()
+    except Exception:
+        font = None
+    draw.text((size[0] // 2 - 30, size[1] // 2 - 10), text, fill=(240, 240, 240), font=font)
+    img.save(path)
+    return path
+
+
+def _make_mock_video(out_path: Path, image_path: Path, duration_s: float, fps: int) -> Path:
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    clip = ImageClip(str(image_path)).with_duration(max(0.5, duration_s)).with_fps(fps)
+    try:
+        clip.write_videofile(str(out_path), codec="libx264", audio=False, fps=fps, preset="veryfast")
+    finally:
+        clip.close()
+    return out_path
+
+
+def _prog(p: float, msg: str) -> None:
+    p2 = max(0.0, min(1.0, float(p)))
+    _emit("PROG " + json.dumps({"p": p2, "msg": msg}, ensure_ascii=False))
+
+
+def _normalize_style(style: str | None) -> str:
+    s = (style or "").strip()
+    if not s:
+        return ""
+    # Allow both Chinese labels and simple aliases
+    mapping = {
+        "电影感": "电影感",
+        "cinema": "电影感",
+        "二次元": "二次元",
+        "anime": "二次元",
+        "写实": "写实",
+        "real": "写实",
+    }
+    return mapping.get(s, s)
+
+
+def _inject_globals_into_prompt(prompt: str, *, style: str | None, character: str | None) -> str:
+    style_n = _normalize_style(style)
+    character_n = (character or "").strip()
+    if not style_n and not character_n:
+        return prompt
+    parts: list[str] = [prompt.strip(), "\n\n[Global Constraints]"]
+    if style_n:
+        parts.append(f"- Global Style: {style_n}")
+    if character_n:
+        parts.append(f"- Character Preset: {character_n}")
+    parts.append("请严格遵守上述全局信息，并保持三分镜主角一致。")
+    return "\n".join(parts).strip()
+
+
+def _decorate_image_prompt(image_prompt: str, *, style: str | None, character: str | None) -> str:
+    # Industrial rule: final_prompt = f"{global_character}, {global_style}, {scene_prompt}"
+    style_n = _normalize_style(style)
+    character_n = (character or "").strip()
+    parts = []
+    if character_n:
+        parts.append(character_n)
+    if style_n:
+        parts.append(style_n)
+    parts.append(image_prompt)
+    return ", ".join([p for p in parts if p]).strip(", ")
+
+
+def _fallback_scenes(prompt: str) -> list[Scene]:
+    return [
+        Scene(
+            image_prompt=f"{prompt}，城市夜景，霓虹灯，电影感",
+            video_motion="缓慢推进镜头，轻微摇镜",
+            narration="夜色温柔落在街灯上",
+        ),
+        Scene(
+            image_prompt=f"{prompt}，咖啡店窗边，暖光，细雨",
+            video_motion="侧向平移，人物轻轻抬头",
+            narration="雨声里藏着一段回忆",
+        ),
+        Scene(
+            image_prompt=f"{prompt}，桥上远景，车流光轨，温暖",
+            video_motion="拉远全景，光轨流动",
+            narration="我们在光里学会告别",
+        ),
+    ]
+
+
+def _has_llm_key(cfg: AppConfig) -> bool:
+    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
+    return bool(os.environ.get(api_key_env))
+
+
+def _parse_scenes_from_obj(obj: Any) -> list[Scene]:
+    if not isinstance(obj, dict):
+        raise ValueError("payload must be object")
+    if "scene" in obj and obj.get("scene") is not None:
+        s = obj.get("scene")
+        if not isinstance(s, dict):
+            raise ValueError("payload.scene must be object")
+        return [
+            Scene(
+                image_prompt=str(s.get("image_prompt", "")).strip(),
+                video_motion=str(s.get("video_motion", "")).strip(),
+                narration=str(s.get("narration", "")).strip(),
+            )
+        ]
+    scenes_raw = obj.get("scenes")
+    if not isinstance(scenes_raw, list) or not scenes_raw:
+        raise ValueError("payload.scenes must be non-empty array")
+    scenes: list[Scene] = []
+    for i, s in enumerate(scenes_raw, start=1):
+        if not isinstance(s, dict):
+            raise ValueError(f"scenes[{i}] must be object")
+        scenes.append(
+            Scene(
+                image_prompt=str(s.get("image_prompt", "")).strip(),
+                video_motion=str(s.get("video_motion", "")).strip(),
+                narration=str(s.get("narration", "")).strip(),
+            )
+        )
+    return scenes
+
+
+async def _render_from_scenes(
+    prompt: str,
+    scenes: list[Scene],
+    cfg: AppConfig,
+    mock: bool,
+    *,
+    style: str | None,
+    character: str | None,
+    out_dir: Path,
+) -> Path:
+    # Force-inject globals into image prompts for rendering.
+    scenes2 = [
+        Scene(
+            image_prompt=_decorate_image_prompt(s.image_prompt, style=style, character=character),
+            video_motion=s.video_motion,
+            narration=s.narration,
+        )
+        for s in scenes
+    ]
+
+    _prog(0.15, "Generating TTS")
+    audios = await synthesize_scenes([s.narration for s in scenes2], cfg)
+
+    segments: list[Segment] = []
+    fps = int(cfg.get("video.mock_fps", 24))
+    mock_size = cfg.get("video.mock_size", [1024, 576])
+    w, h = int(mock_size[0]), int(mock_size[1])
+    mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
+
+    if mock:
+        _prog(0.35, "Generating mock videos")
+        for i, (scene, audio) in enumerate(zip(scenes2, audios), start=1):
+            vpath = Path("./assets/mock_videos") / f"scene_{i:02d}.mp4"
+            _make_mock_video(vpath, mock_image, audio.duration_s, fps=fps)
+            segments.append(Segment(video_path=vpath, audio_path=audio.path, narration=scene.narration))
+        _prog(0.85, "Compositing final video")
+        out_path = out_dir / "final.mp4"
+        return render_final(segments, cfg, output_path=out_path)
+
+    comfy = ComfyClient(cfg)
+    wf = comfy.load_workflow()
+    for i, (scene, audio) in enumerate(zip(scenes2, audios), start=1):
+        _prog(0.25 + 0.45 * (i - 1) / max(1, len(scenes2)), f"Rendering scene {i} with ComfyUI")
+        seed = random.randint(1, 2_147_483_647)
+        wf_i = comfy.inject_params(wf, image_prompt=scene.image_prompt, seed=seed, motion_prompt=scene.video_motion or None)
+        result = await comfy.run_workflow(wf_i)
+        candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
+        video_path = candidates[0] if candidates else result.output_files[0]
+        segments.append(Segment(video_path=video_path, audio_path=audio.path, narration=scene.narration))
+    _prog(0.85, "Compositing final video")
+    out_path = out_dir / "final.mp4"
+    return render_final(segments, cfg, output_path=out_path)
+
+
+def _read_stdin_json() -> Any:
+    raw = sys.stdin.read()
+    if not raw.strip():
+        return None
+    return json.loads(raw)
+
+
+def step_script(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, character: str | None, out_dir: Path) -> int:
+    prompt2 = _inject_globals_into_prompt(prompt, style=style, character=character)
+    if mock and not _has_llm_key(cfg):
+        # fallback scenes still should include global injection
+        scenes = _fallback_scenes(prompt)
+    else:
+        scenes = generate_scenes(prompt2, cfg)
+
+    out_dir.mkdir(parents=True, exist_ok=True)
+    _emit("SCRIPT_BEGIN")
+    for idx, s in enumerate(scenes, start=1):
+        s2 = Scene(
+            image_prompt=_decorate_image_prompt(s.image_prompt, style=style, character=character),
+            video_motion=s.video_motion,
+            narration=s.narration,
+        )
+        _emit_scene(idx, s2)
+    _emit("SCRIPT_END")
+    (out_dir / "scenes.json").write_text(
+        json.dumps(
+            {"scenes": [{"image_prompt": s.image_prompt, "video_motion": s.video_motion, "narration": s.narration} for s in scenes]},
+            ensure_ascii=False,
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+    return 0
+
+
+def step_refine(
+    prompt: str,
+    cfg: AppConfig,
+    mock: bool,
+    scene_index: int,
+    *,
+    style: str | None,
+    character: str | None,
+    out_dir: Path,
+) -> int:
+    prompt2 = _inject_globals_into_prompt(prompt, style=style, character=character)
+    payload = _read_stdin_json()
+    scenes = _parse_scenes_from_obj(payload)
+    # If client only sent one scene, treat it as the target scene.
+    if len(scenes) == 1:
+        target_index = 1
+    else:
+        target_index = scene_index
+        if not (1 <= target_index <= len(scenes)):
+            raise ValueError("scene_index out of range")
+
+    if mock and not _has_llm_key(cfg):
+        # Simple fallback: append a tiny polish hint to narration
+        s = scenes[target_index - 1]
+        refined = Scene(
+            image_prompt=_decorate_image_prompt(s.image_prompt, style=style, character=character),
+            video_motion=s.video_motion,
+            narration=(s.narration + "（更凝练）")[:30],
+        )
+    else:
+        # Ensure globals are visible to LLM, and inject to output image prompt.
+        refined0 = refine_scene(prompt=prompt2, scenes=scenes, target_index=target_index, cfg=cfg)
+        refined = Scene(
+            image_prompt=_decorate_image_prompt(refined0.image_prompt, style=style, character=character),
+            video_motion=refined0.video_motion,
+            narration=refined0.narration,
+        )
+
+    # Keep the original index for frontend replacement.
+    _emit_scene(scene_index, refined)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    (out_dir / f"refine_scene_{scene_index}.json").write_text(
+        json.dumps(
+            {"index": scene_index, "image_prompt": refined.image_prompt, "video_motion": refined.video_motion, "narration": refined.narration},
+            ensure_ascii=False,
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+    return 0
+
+
+def step_render(prompt: str, cfg: AppConfig, mock: bool, *, style: str | None, character: str | None, out_dir: Path) -> int:
+    payload = _read_stdin_json()
+    scenes = _parse_scenes_from_obj(payload)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    _prog(0.05, "Start render")
+    out = asyncio.run(_render_from_scenes(prompt, scenes, cfg, mock=mock, style=style, character=character, out_dir=out_dir))
+    _prog(1.0, "Render finished")
+    _emit("RENDER_DONE " + json.dumps({"output": str(out)}, ensure_ascii=False))
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="AIGC interactive POC entry")
+    parser.add_argument("--prompt", required=True, help="User creative prompt")
+    parser.add_argument("--config", default="./configs/config.yaml", help="Config yaml path")
+    parser.add_argument("--mock", action="store_true", help="Mock mode (no ComfyUI needed)")
+    parser.add_argument("--step", default="script", choices=["script", "render", "refine"])
+    parser.add_argument("--scene-index", type=int, default=1, help="For --step=refine only (1-based)")
+    parser.add_argument("--global-style", default="", help="Global style lock (e.g. 电影感/二次元/写实)")
+    parser.add_argument("--character", default="", help="Character preset lock (main character description)")
+    parser.add_argument("--task-id", required=True, help="Task id (UUID). Outputs go to outputs/{task_id}/")
+    args = parser.parse_args()
+
+    cfg = AppConfig.load(args.config)
+    out_dir = Path("./outputs") / str(args.task_id)
+
+    if args.step == "script":
+        return step_script(args.prompt, cfg, mock=args.mock, style=args.global_style, character=args.character, out_dir=out_dir)
+    if args.step == "render":
+        return step_render(args.prompt, cfg, mock=args.mock, style=args.global_style, character=args.character, out_dir=out_dir)
+    if args.step == "refine":
+        return step_refine(
+            args.prompt,
+            cfg,
+            mock=args.mock,
+            scene_index=args.scene_index,
+            style=args.global_style,
+            character=args.character,
+            out_dir=out_dir,
+        )
+    raise SystemExit(2)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
diff --git a/engine/script_gen.py b/engine/script_gen.py
index 2a512b8..3238cf6 100644
--- a/engine/script_gen.py
+++ b/engine/script_gen.py
@@ -33,6 +33,24 @@ def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
 """
 
 
+def _refine_system_prompt(min_chars: int, max_chars: int) -> str:
+    return f"""你是短视频分镜润色助手。
+你会收到用户的原始创意 prompt、以及一组三分镜（其中主角设定需一致）。
+你的任务：只润色指定的一个 Scene，使其更具体、更镜头化、更适合生成视频，同时保持主角描述与其它分镜一致。
+
+硬性约束：
+1) 只修改目标 Scene，不要改其它 Scene。
+2) 目标 Scene 必须包含：image_prompt, video_motion, narration。
+3) narration 为中文旁白，每段控制在约 {min_chars}-{max_chars} 字左右。
+4) 输出只允许 JSON，不要解释、不要 markdown。
+
+输出 JSON Schema：
+{{
+  "scene": {{"image_prompt":"...","video_motion":"...","narration":"..."}}
+}}
+"""
+
+
 def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
     scene_count = int(cfg.get("script_gen.scene_count", 3))
     min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
@@ -78,3 +96,56 @@ def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
             raise ValueError(f"Scene[{i}] missing required fields")
         scenes.append(Scene(image_prompt=image_prompt, video_motion=video_motion, narration=narration))
     return scenes
+
+
+def refine_scene(*, prompt: str, scenes: list[Scene], target_index: int, cfg: AppConfig) -> Scene:
+    if not (1 <= target_index <= len(scenes)):
+        raise ValueError("target_index out of range")
+
+    min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
+    max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
+
+    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
+    base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
+    model = str(cfg.get("openai.model", "gpt-4o-mini"))
+
+    api_key = os.environ.get(api_key_env)
+    if not api_key:
+        raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
+
+    client = OpenAI(
+        api_key=api_key,
+        base_url=os.environ.get(base_url_env) or None,
+    )
+
+    scenes_payload = [
+        {"image_prompt": s.image_prompt, "video_motion": s.video_motion, "narration": s.narration}
+        for s in scenes
+    ]
+    user_payload = {
+        "prompt": prompt,
+        "target_index": target_index,
+        "scenes": scenes_payload,
+    }
+
+    resp = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": _refine_system_prompt(min_chars, max_chars)},
+            {"role": "user", "content": json.dumps(user_payload, ensure_ascii=False)},
+        ],
+        response_format={"type": "json_object"},
+        temperature=0.6,
+    )
+
+    content = resp.choices[0].message.content or "{}"
+    data: Any = json.loads(content)
+    s = data.get("scene")
+    if not isinstance(s, dict):
+        raise ValueError("Model refine output missing scene")
+    image_prompt = str(s.get("image_prompt", "")).strip()
+    video_motion = str(s.get("video_motion", "")).strip()
+    narration = str(s.get("narration", "")).strip()
+    if not image_prompt or not narration:
+        raise ValueError("Refined scene missing required fields")
+    return Scene(image_prompt=image_prompt, video_motion=video_motion, narration=narration)
diff --git a/final_poc.mp4 b/final_poc.mp4
index 38d1761..8791b02 100644
Binary files a/final_poc.mp4 and b/final_poc.mp4 differ
diff --git a/main.py b/main.py
index c3ce198..7a55735 100644
--- a/main.py
+++ b/main.py
@@ -7,154 +7,11 @@ import os
 import random
 from pathlib import Path
 
-from fastapi import FastAPI
-from moviepy import ImageClip
-from PIL import Image, ImageDraw, ImageFont
-
-from engine.audio_gen import synthesize_scenes
-from engine.comfy_client import ComfyClient
-from engine.config import AppConfig
-from engine.script_gen import generate_scenes
-from engine.types import Scene
-from engine.video_editor import Segment, render_final
-
-
-app = FastAPI(title="AiVideo POC")
-
-
-def _ensure_mock_image(path: Path, size: tuple[int, int]) -> Path:
-    if path.exists():
-        return path
-    path.parent.mkdir(parents=True, exist_ok=True)
-    img = Image.new("RGB", size, color=(20, 24, 33))
-    draw = ImageDraw.Draw(img)
-    text = "MOCK"
-    try:
-        font = ImageFont.load_default()
-    except Exception:
-        font = None
-    draw.text((size[0] // 2 - 30, size[1] // 2 - 10), text, fill=(240, 240, 240), font=font)
-    img.save(path)
-    return path
-
-
-def _make_mock_video(out_path: Path, image_path: Path, duration_s: float, fps: int) -> Path:
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    clip = ImageClip(str(image_path)).with_duration(max(0.5, duration_s)).with_fps(fps)
-    try:
-        clip.write_videofile(str(out_path), codec="libx264", audio=False, fps=fps, preset="veryfast")
-    finally:
-        clip.close()
-    return out_path
-
-
-def _emit(line: str) -> None:
-    print(line, flush=True)
-
-
-def _emit_scene(scene_idx: int, scene: Scene) -> None:
-    payload = {
-        "index": scene_idx,
-        "image_prompt": scene.image_prompt,
-        "video_motion": scene.video_motion,
-        "narration": scene.narration,
-    }
-    _emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
-
-
-def _fallback_scenes(prompt: str) -> list[Scene]:
-    return [
-        Scene(
-            image_prompt=f"{prompt}，城市夜景，霓虹灯，电影感",
-            video_motion="缓慢推进镜头，轻微摇镜",
-            narration="夜色温柔落在街灯上",
-        ),
-        Scene(
-            image_prompt=f"{prompt}，咖啡店窗边，暖光，细雨",
-            video_motion="侧向平移，人物轻轻抬头",
-            narration="雨声里藏着一段回忆",
-        ),
-        Scene(
-            image_prompt=f"{prompt}，桥上远景，车流光轨，温暖",
-            video_motion="拉远全景，光轨流动",
-            narration="我们在光里学会告别",
-        ),
-    ]
-
-
-def _should_allow_llm_without_key(cfg: AppConfig) -> bool:
-    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
-    return bool(os.environ.get(api_key_env))
-
-
-def _generate_scenes_for_run(prompt: str, cfg: AppConfig, mock: bool) -> list[Scene]:
-    if mock and not _should_allow_llm_without_key(cfg):
-        return _fallback_scenes(prompt)
-    try:
-        return generate_scenes(prompt, cfg)
-    except Exception:
-        if mock:
-            return _fallback_scenes(prompt)
-        raise
-
-
-async def run_pipeline(prompt: str, cfg: AppConfig, mock: bool) -> Path:
-    scenes = _generate_scenes_for_run(prompt, cfg, mock=mock)
-    audios = await synthesize_scenes([s.narration for s in scenes], cfg)
-
-    segments: list[Segment] = []
-    fps = int(cfg.get("video.mock_fps", 24))
-    mock_size = cfg.get("video.mock_size", [1024, 576])
-    w, h = int(mock_size[0]), int(mock_size[1])
-    mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
-
-    if mock:
-        for i, (scene, audio) in enumerate(zip(scenes, audios), start=1):
-            vpath = Path("./assets/mock_videos") / f"scene_{i:02d}.mp4"
-            _make_mock_video(vpath, mock_image, audio.duration_s, fps=fps)
-            segments.append(Segment(video_path=vpath, audio_path=audio.path, narration=scene.narration))
-        return render_final(segments, cfg)
-
-    comfy = ComfyClient(cfg)
-    wf = comfy.load_workflow()
-    for i, (scene, audio) in enumerate(zip(scenes, audios), start=1):
-        seed = random.randint(1, 2_147_483_647)
-        wf_i = comfy.inject_params(wf, image_prompt=scene.image_prompt, seed=seed, motion_prompt=scene.video_motion or None)
-        result = await comfy.run_workflow(wf_i)
-        # pick first mp4-like output; if none, fall back to first file.
-        candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
-        video_path = candidates[0] if candidates else result.output_files[0]
-        segments.append(Segment(video_path=video_path, audio_path=audio.path, narration=scene.narration))
-    return render_final(segments, cfg)
-
-
-def script_only(prompt: str, cfg: AppConfig, mock: bool) -> int:
-    scenes = _generate_scenes_for_run(prompt, cfg, mock=mock)
-    _emit("SCRIPT_BEGIN")
-    for idx, s in enumerate(scenes, start=1):
-        _emit_scene(idx, s)
-    _emit("SCRIPT_END")
-    return 0
-
-
 def main() -> int:
-    parser = argparse.ArgumentParser(description="AIGC auto video generation POC")
-    parser.add_argument("--prompt", required=True, help="User creative prompt")
-    parser.add_argument("--config", default="./configs/config.yaml", help="Config yaml path")
-    parser.add_argument("--mock", action="store_true", help="Mock mode (no ComfyUI needed)")
-    parser.add_argument(
-        "--script-only",
-        action="store_true",
-        help="Only generate script/scenes and print to stdout (for Node.js streaming)",
-    )
-    args = parser.parse_args()
+    # Backward-compatible entry: delegate to engine/main.py
+    from engine.main import main as engine_main
 
-    cfg = AppConfig.load(args.config)
-    if args.script_only:
-        return script_only(args.prompt, cfg, mock=args.mock)
-    out = asyncio.run(run_pipeline(args.prompt, cfg, mock=args.mock))
-    print(str(out))
-    return 0
+    return engine_main()
 
 
 if __name__ == "__main__":
diff --git a/outputs/final_poc.mp4 b/outputs/final_poc.mp4
new file mode 100644
index 0000000..38d1761
Binary files /dev/null and b/outputs/final_poc.mp4 differ
diff --git a/scripts/check_comfy.py b/scripts/check_comfy.py
index a03413e..66b2102 100644
--- a/scripts/check_comfy.py
+++ b/scripts/check_comfy.py
@@ -3,9 +3,11 @@ from __future__ import annotations
 import argparse
 import json
 import sys
+from pathlib import Path
 from typing import Any
 
 import httpx
+import yaml
 
 
 def fetch_object_info(base_url: str, timeout_s: float = 5.0) -> dict[str, Any]:
@@ -19,19 +21,40 @@ def fetch_object_info(base_url: str, timeout_s: float = 5.0) -> dict[str, Any]:
         return data
 
 
+def read_base_url_from_config(config_path: str) -> str | None:
+    p = Path(config_path)
+    if not p.exists():
+        return None
+    try:
+        raw = yaml.safe_load(p.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    if not isinstance(raw, dict):
+        return None
+    app = raw.get("app")
+    if not isinstance(app, dict):
+        return None
+    v = app.get("comfy_base_url")
+    if isinstance(v, str) and v.strip():
+        return v.strip()
+    return None
+
+
 def main() -> int:
     parser = argparse.ArgumentParser(description="Check ComfyUI API connectivity")
     parser.add_argument(
         "--base-url",
-        default="http://127.0.0.1:8188",
-        help="ComfyUI base URL (default: http://127.0.0.1:8188)",
+        default="",
+        help="ComfyUI base URL (if empty, read from config app.comfy_base_url)",
     )
+    parser.add_argument("--config", default="./configs/config.yaml", help="Config yaml path")
     parser.add_argument("--timeout", type=float, default=5.0, help="Request timeout seconds")
     parser.add_argument("--pretty", action="store_true", help="Pretty print JSON")
     args = parser.parse_args()
 
     try:
-        data = fetch_object_info(args.base_url, timeout_s=args.timeout)
+        base_url = args.base_url.strip() or read_base_url_from_config(args.config) or "http://127.0.0.1:8188"
+        data = fetch_object_info(base_url, timeout_s=args.timeout)
         out = json.dumps(data, ensure_ascii=False, indent=2 if args.pretty else None)
         sys.stdout.write(out + "\n")
         return 0
diff --git a/scripts/inspect_comfy_node.py b/scripts/inspect_comfy_node.py
new file mode 100644
index 0000000..45afe5e
--- /dev/null
+++ b/scripts/inspect_comfy_node.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any, Iterable
+
+import httpx
+import yaml
+
+
+def fetch_object_info(base_url: str, timeout_s: float = 5.0) -> dict[str, Any]:
+    url = base_url.rstrip("/") + "/object_info"
+    with httpx.Client(timeout=timeout_s) as client:
+        r = client.get(url)
+        r.raise_for_status()
+        data = r.json()
+        if not isinstance(data, dict):
+            raise RuntimeError(f"Unexpected object_info type: {type(data)}")
+        return data
+
+
+def load_yaml(path: str | Path) -> dict[str, Any]:
+    p = Path(path)
+    if not p.exists():
+        return {}
+    raw = yaml.safe_load(p.read_text(encoding="utf-8"))
+    return raw if isinstance(raw, dict) else {}
+
+
+def load_json(path: str | Path) -> Any:
+    p = Path(path)
+    if not p.exists():
+        return None
+    return json.loads(p.read_text(encoding="utf-8"))
+
+
+def iter_node_class_types(object_info: dict[str, Any]) -> Iterable[str]:
+    for k in object_info.keys():
+        if isinstance(k, str):
+            yield k
+
+
+def find_ckpt_values(object_info: dict[str, Any]) -> list[str]:
+    """
+    Heuristic: locate any node input that looks like checkpoint selector.
+    ComfyUI commonly uses CheckpointLoaderSimple.inputs.required.ckpt_name = [[...values...]]
+    """
+    vals: list[str] = []
+    for node_name, node_info in object_info.items():
+        if not isinstance(node_info, dict):
+            continue
+        inputs = node_info.get("input")
+        if not isinstance(inputs, dict):
+            continue
+        required = inputs.get("required")
+        if not isinstance(required, dict):
+            continue
+        for key in ("ckpt_name", "checkpoint", "model_name"):
+            entry = required.get(key)
+            # expected shape: [ [values...], {meta...} ] or [ [values...] ]
+            if isinstance(entry, list) and entry:
+                first = entry[0]
+                if isinstance(first, list):
+                    for v in first:
+                        if isinstance(v, str):
+                            vals.append(v)
+    # de-dup
+    seen: set[str] = set()
+    out: list[str] = []
+    for v in vals:
+        if v not in seen:
+            seen.add(v)
+            out.append(v)
+    return out
+
+
+def has_ksampler_seed(object_info: dict[str, Any], ks_classes: list[str], seed_key: str) -> bool:
+    for cls in ks_classes:
+        info = object_info.get(cls)
+        if not isinstance(info, dict):
+            continue
+        inputs = info.get("input")
+        if not isinstance(inputs, dict):
+            continue
+        required = inputs.get("required")
+        optional = inputs.get("optional")
+        if isinstance(required, dict) and seed_key in required:
+            return True
+        if isinstance(optional, dict) and seed_key in optional:
+            return True
+    return False
+
+
+def resolve_seed_target_from_workflow(workflow: Any, seed_class_types: list[str]) -> tuple[str | None, str | None]:
+    """
+    Returns (node_id, class_type) by scanning workflow dict for first matching class_type.
+    workflow_api.json is typically { node_id: {class_type, inputs, ...}, ... }
+    """
+    if not isinstance(workflow, dict):
+        return (None, None)
+    want = set(seed_class_types)
+    for node_id, node in workflow.items():
+        if not isinstance(node, dict):
+            continue
+        ct = node.get("class_type")
+        if isinstance(ct, str) and ct in want:
+            return (str(node_id), ct)
+    return (None, None)
+
+
+def _workflow_nodes(workflow: Any) -> dict[str, Any]:
+    if not isinstance(workflow, dict):
+        raise ValueError("workflow_api.json root must be an object mapping node_id -> node")
+    return workflow
+
+
+def _get_node(workflow: dict[str, Any], node_id: str) -> dict[str, Any]:
+    node = workflow.get(str(node_id))
+    if not isinstance(node, dict):
+        raise KeyError(f"workflow missing node_id={node_id}")
+    return node
+
+
+def _validate_configured_node_id(
+    *,
+    workflow: dict[str, Any],
+    node_id: Any,
+    allowed_class_types: list[str],
+    name: str,
+) -> list[str]:
+    errs: list[str] = []
+    if node_id is None or not str(node_id).strip():
+        return errs
+    nid = str(node_id).strip()
+    try:
+        node = _get_node(workflow, nid)
+    except Exception as e:
+        return [f"{name}: configured node_id={nid} not found in workflow ({e})"]
+    ct = node.get("class_type")
+    if allowed_class_types and isinstance(ct, str) and ct not in set(allowed_class_types):
+        errs.append(f"{name}: node_id={nid} class_type={ct} not in allowed {allowed_class_types}")
+    return errs
+
+
+def _workflow_has_ltx_node(workflow: dict[str, Any], keyword: str) -> bool:
+    kw = keyword.lower()
+    for _nid, node in workflow.items():
+        if not isinstance(node, dict):
+            continue
+        ct = node.get("class_type")
+        if isinstance(ct, str) and kw in ct.lower():
+            return True
+    return False
+
+
+def main() -> int:
+    p = argparse.ArgumentParser(description="Inspect ComfyUI /object_info for LTX + checkpoints + sampler override readiness")
+    p.add_argument("--base-url", default="")
+    p.add_argument("--timeout", type=float, default=8.0)
+    p.add_argument("--config", default="./configs/config.yaml")
+    p.add_argument("--workflow", default="./workflow_api.json")
+    p.add_argument(
+        "--expected-checkpoint",
+        action="append",
+        default=[],
+        help="Expected checkpoint name (can repeat). Exact match against ckpt list.",
+    )
+    p.add_argument(
+        "--ltx-keyword",
+        default="LTX",
+        help="Keyword to detect LTX-Video nodes in object_info keys (default: LTX)",
+    )
+    args = p.parse_args()
+
+    cfg = load_yaml(args.config)
+    base_url = (args.base_url or "").strip()
+    if not base_url:
+        app_cfg = (cfg.get("app") or {}) if isinstance(cfg, dict) else {}
+        if isinstance(app_cfg, dict):
+            base_url = str(app_cfg.get("comfy_base_url", "")).strip()
+    if not base_url:
+        base_url = "http://127.0.0.1:8188"
+
+    comfy_cfg = (cfg.get("comfy_workflow") or {}) if isinstance(cfg, dict) else {}
+    seed_key = str(comfy_cfg.get("seed_input_key", "seed"))
+    seed_class_types = comfy_cfg.get("seed_node_class_types") or ["KSampler", "KSamplerAdvanced"]
+    if not isinstance(seed_class_types, list):
+        seed_class_types = ["KSampler", "KSamplerAdvanced"]
+    seed_class_types = [str(x) for x in seed_class_types]
+
+    # Industrial hard requirement: workflow must exist for ID matching checks
+    wf_path = Path(args.workflow)
+    if not wf_path.exists():
+        sys.stderr.write(f"[inspect] FAIL: workflow_api.json not found at {wf_path}\n")
+        return 3
+
+    try:
+        object_info = fetch_object_info(base_url, timeout_s=args.timeout)
+    except Exception as e:
+        sys.stderr.write(f"[inspect] ERROR fetch /object_info: {e}\n")
+        return 2
+
+    # 1) LTX-Video plugin activated? (heuristic)
+    keyword = str(args.ltx_keyword or "LTX")
+    ltx_hits = sorted([k for k in iter_node_class_types(object_info) if keyword.lower() in k.lower()])
+    ltx_ok = len(ltx_hits) > 0
+
+    # 2) checkpoint list includes expected
+    ckpts = find_ckpt_values(object_info)
+    expected = list(args.expected_checkpoint or [])
+    missing = [x for x in expected if x not in ckpts]
+    ckpt_ok = len(missing) == 0 if expected else True
+
+    # 3) KSampler defaults overridden by our python? (readiness check)
+    # /object_info cannot prove runtime override happened, but we can validate:
+    # - ComfyUI exposes a sampler node class with a 'seed' input key
+    # - our config intends to override that same key
+    ks_ok = has_ksampler_seed(object_info, seed_class_types, seed_key)
+
+    wf = load_json(args.workflow)
+    try:
+        wf_nodes = _workflow_nodes(wf)
+    except Exception as e:
+        sys.stderr.write(f"[inspect] FAIL: invalid workflow format: {e}\n")
+        return 3
+
+    seed_node_id, seed_node_class = resolve_seed_target_from_workflow(wf_nodes, seed_class_types)
+    wf_ok = seed_node_id is not None
+
+    # Hard validation: configured node IDs must exist and match expected class_type families
+    prompt_allowed = [str(x) for x in (comfy_cfg.get("prompt_node_class_types") or []) if str(x).strip()]
+    seed_allowed = [str(x) for x in (comfy_cfg.get("seed_node_class_types") or []) if str(x).strip()]
+    save_allowed = [str(x) for x in (comfy_cfg.get("save_node_class_types") or []) if str(x).strip()]
+    errs: list[str] = []
+    errs += _validate_configured_node_id(
+        workflow=wf_nodes,
+        node_id=comfy_cfg.get("prompt_node_id"),
+        allowed_class_types=prompt_allowed,
+        name="prompt_node_id",
+    )
+    errs += _validate_configured_node_id(
+        workflow=wf_nodes,
+        node_id=comfy_cfg.get("seed_node_id"),
+        allowed_class_types=seed_allowed,
+        name="seed_node_id",
+    )
+    errs += _validate_configured_node_id(
+        workflow=wf_nodes,
+        node_id=comfy_cfg.get("save_node_id"),
+        allowed_class_types=save_allowed,
+        name="save_node_id",
+    )
+    errs += _validate_configured_node_id(
+        workflow=wf_nodes,
+        node_id=comfy_cfg.get("motion_node_id"),
+        allowed_class_types=[],
+        name="motion_node_id",
+    )
+
+    # Hard validation: workflow must contain LTX node(s) if we're using LTX-Video pipeline
+    wf_ltx_ok = _workflow_has_ltx_node(wf_nodes, str(args.ltx_keyword or "LTX"))
+
+    # Hard validation: seed node in workflow must expose the seed input key (so it can be overridden)
+    wf_seed_key_ok = False
+    if wf_ok:
+        try:
+            node = _get_node(wf_nodes, str(seed_node_id))
+            inputs = node.get("inputs")
+            wf_seed_key_ok = isinstance(inputs, dict) and seed_key in inputs
+        except Exception:
+            wf_seed_key_ok = False
+
+    report = {
+        "base_url": base_url,
+        "ltx": {
+            "keyword": keyword,
+            "activated": ltx_ok,
+            "matching_nodes": ltx_hits[:50],
+            "match_count": len(ltx_hits),
+        },
+        "checkpoints": {
+            "expected": expected,
+            "found_count": len(ckpts),
+            "missing": missing,
+            "ok": ckpt_ok,
+            "sample": ckpts[:50],
+        },
+        "sampler_override_readiness": {
+            "seed_input_key_from_config": seed_key,
+            "seed_node_class_types_from_config": seed_class_types,
+            "comfy_has_seed_input": ks_ok,
+            "workflow_path": args.workflow,
+            "workflow_seed_node_detected": wf_ok,
+            "workflow_seed_node_id": seed_node_id,
+            "workflow_seed_node_class_type": seed_node_class,
+            "workflow_seed_node_has_seed_key": wf_seed_key_ok,
+            "note": "object_info cannot prove runtime override; this enforces key alignment + workflow ID/class checks.",
+        },
+        "workflow_validation": {
+            "ltx_node_in_workflow": wf_ltx_ok,
+            "configured_node_id_errors": errs,
+        },
+        "ok": bool(ltx_ok and ckpt_ok and ks_ok and wf_ok and wf_ltx_ok and wf_seed_key_ok and not errs),
+    }
+
+    sys.stdout.write(json.dumps(report, ensure_ascii=False, indent=2) + "\n")
+
+    if not ltx_ok:
+        sys.stderr.write(f"[inspect] FAIL: no node matched keyword '{keyword}' (LTX plugin may be missing)\n")
+    if not ckpt_ok:
+        sys.stderr.write(f"[inspect] FAIL: missing checkpoints: {missing}\n")
+    if not ks_ok:
+        sys.stderr.write(f"[inspect] FAIL: ComfyUI sampler classes {seed_class_types} do not expose input '{seed_key}'\n")
+    if not wf_ok:
+        sys.stderr.write(f"[inspect] FAIL: workflow does not contain a seed node of class types {seed_class_types}\n")
+    if not wf_ltx_ok:
+        sys.stderr.write(f"[inspect] FAIL: workflow has no node with class_type containing '{args.ltx_keyword}'\n")
+    if wf_ok and not wf_seed_key_ok:
+        sys.stderr.write(f"[inspect] FAIL: workflow seed node {seed_node_id} does not expose inputs['{seed_key}']\n")
+    if errs:
+        for e in errs:
+            sys.stderr.write(f"[inspect] FAIL: {e}\n")
+
+    return 0 if report["ok"] else 3
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
diff --git a/server/index.js b/server/index.js
index d5ee17d..3ce4085 100644
--- a/server/index.js
+++ b/server/index.js
@@ -2,14 +2,34 @@ import express from "express";
 import { spawn } from "node:child_process";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
+import fs from "node:fs";
+import crypto from "node:crypto";
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 const repoRoot = path.resolve(__dirname, "..");
+const outputsDir = path.join(repoRoot, "outputs");
+fs.mkdirSync(outputsDir, { recursive: true });
 
 const app = express();
+app.use(express.json({ limit: "2mb" }));
+app.use(
+  "/api/static",
+  express.static(outputsDir, {
+    fallthrough: true,
+    setHeaders: (res) => {
+      // Important: avoid stale video preview.
+      res.setHeader("Cache-Control", "no-cache, no-transform");
+    },
+  })
+);
 app.use(express.static(path.join(__dirname, "public")));
 
+app.get("/api/health", (_req, res) => {
+  res.setHeader("Cache-Control", "no-cache");
+  res.status(200).json({ ok: true });
+});
+
 function sseHeaders(res) {
   res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
   res.setHeader("Cache-Control", "no-cache, no-transform");
@@ -25,9 +45,46 @@ function sseSend(res, event, data) {
   res.write("\n");
 }
 
-app.get("/api/run", (req, res) => {
+function newTaskId() {
+  return crypto.randomUUID();
+}
+
+function taskDir(taskId) {
+  return path.join(outputsDir, taskId);
+}
+
+function ensureTaskDir(taskId) {
+  const dir = taskDir(taskId);
+  fs.mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+function spawnPythonStep({ step, prompt, configPath, mock, globalStyle, character, taskId, sceneIndex }) {
+  const py = process.env.PYTHON_BIN || "python3.10";
+  const args = [
+    "-m",
+    "engine.main",
+    "--prompt",
+    prompt,
+    "--config",
+    configPath,
+    "--step",
+    step,
+    "--task-id",
+    taskId,
+  ];
+  if (sceneIndex) args.push("--scene-index", String(sceneIndex));
+  if (globalStyle) args.push("--global-style", globalStyle);
+  if (character) args.push("--character", character);
+  if (mock) args.push("--mock");
+  return spawn(py, args, { cwd: repoRoot, env: process.env, stdio: ["pipe", "pipe", "pipe"] });
+}
+
+app.get("/api/script", (req, res) => {
   const prompt = String(req.query.prompt || "").trim();
   const mock = String(req.query.mock || "1") === "1";
+  const globalStyle = String(req.query.global_style || "").trim();
+  const character = String(req.query.character || "").trim();
   const configPath = String(req.query.config || "./configs/config.yaml");
 
   if (!prompt) {
@@ -35,25 +92,21 @@ app.get("/api/run", (req, res) => {
     return;
   }
 
+  const taskId = newTaskId();
+  ensureTaskDir(taskId);
+
   sseHeaders(res);
+  sseSend(res, "task", JSON.stringify({ task_id: taskId }));
   sseSend(res, "status", "starting");
 
-  // Unified in-container execution: Node spawns python directly.
-  const py = process.env.PYTHON_BIN || "python";
-  const args = [
-    path.join(repoRoot, "main.py"),
-    "--prompt",
+  const child = spawnPythonStep({
+    step: "script",
     prompt,
-    "--config",
     configPath,
-    "--script-only",
-  ];
-  if (mock) args.push("--mock");
-
-  const child = spawn(py, args, {
-    cwd: repoRoot,
-    env: process.env,
-    stdio: ["ignore", "pipe", "pipe"],
+    mock,
+    globalStyle,
+    character,
+    taskId,
   });
 
   let buf = "";
@@ -64,14 +117,15 @@ app.get("/api/run", (req, res) => {
     buf = parts.pop() || "";
     for (const line of parts) {
       if (!line) continue;
-      // Forward raw lines. Frontend will parse SCENE_JSON.
-      sseSend(res, "line", line);
+      if (line.startsWith("SCENE_JSON ")) sseSend(res, "scene", line.slice("SCENE_JSON ".length));
+      else if (line.startsWith("PROG ")) sseSend(res, "prog", line.slice("PROG ".length));
+      else sseSend(res, "line", line);
     }
   });
 
   child.stderr.setEncoding("utf8");
   child.stderr.on("data", (chunk) => {
-    sseSend(res, "stderr", chunk);
+    sseSend(res, "error", chunk);
   });
 
   req.on("close", () => {
@@ -80,13 +134,177 @@ app.get("/api/run", (req, res) => {
 
   child.on("exit", (code) => {
     if (buf.trim()) sseSend(res, "line", buf.trim());
-    sseSend(res, "done", String(code ?? 0));
+    sseSend(res, "done", String(code != null ? code : 0));
     res.end();
   });
 });
 
-const port = Number(process.env.PORT || 3000);
-app.listen(port, () => {
-  console.log(`[server] http://127.0.0.1:${port}`);
+app.post("/api/refine", (req, res) => {
+  const prompt = String((req.body && req.body.prompt) || "").trim();
+  const sceneIndex = Number((req.body && req.body.scene_index) || 1);
+  const scenes = req.body && req.body.scenes;
+  const scene = req.body && req.body.scene;
+  const mock = Boolean((req.body && req.body.mock) != null ? req.body.mock : true);
+  const globalStyle = String((req.body && req.body.global_style) || "").trim();
+  const character = String((req.body && req.body.character) || "").trim();
+  const configPath = String((req.body && req.body.config) || "./configs/config.yaml");
+  const taskId = String((req.body && req.body.task_id) || "").trim() || newTaskId();
+
+  if (!prompt) return res.status(400).json({ error: "missing prompt" });
+  if (!Number.isFinite(sceneIndex) || sceneIndex < 1) return res.status(400).json({ error: "bad scene_index" });
+  if (!Array.isArray(scenes) && (!scene || typeof scene !== "object")) {
+    return res.status(400).json({ error: "missing scene or scenes[]" });
+  }
+  ensureTaskDir(taskId);
+
+  const child = spawnPythonStep({
+    step: "refine",
+    prompt,
+    configPath,
+    mock,
+    globalStyle,
+    character,
+    taskId,
+    sceneIndex,
+  });
+  if (Array.isArray(scenes)) {
+    child.stdin.end(JSON.stringify({ scenes }));
+  } else {
+    child.stdin.end(JSON.stringify({ scene }));
+  }
+
+  let out = "";
+  let err = "";
+  child.stdout.setEncoding("utf8");
+  child.stderr.setEncoding("utf8");
+  child.stdout.on("data", (c) => (out += c));
+  child.stderr.on("data", (c) => (err += c));
+  child.on("exit", (code) => {
+    if (code !== 0) return res.status(500).json({ error: "python failed", stderr: err, stdout: out });
+    const line = out
+      .split(/\r?\n/)
+      .map((s) => s.trim())
+      .find((s) => s.startsWith("SCENE_JSON "));
+    if (!line) return res.status(500).json({ error: "no SCENE_JSON", stderr: err, stdout: out });
+    const payload = JSON.parse(line.slice("SCENE_JSON ".length));
+    return res.json({ task_id: taskId, scene: payload, stderr: err });
+  });
 });
 
+let isBusy = false;
+
+app.post("/api/render", (req, res) => {
+  const prompt = String((req.body && req.body.prompt) || "").trim();
+  const scenes = req.body && req.body.scenes;
+  const mock = Boolean((req.body && req.body.mock) != null ? req.body.mock : false);
+  const globalStyle = String((req.body && req.body.global_style) || "").trim();
+  const character = String((req.body && req.body.character) || "").trim();
+  const configPath = String((req.body && req.body.config) || "./configs/config.yaml");
+  const taskId = String((req.body && req.body.task_id) || "").trim() || newTaskId();
+
+  if (!prompt) return res.status(400).json({ error: "missing prompt" });
+  if (!Array.isArray(scenes)) return res.status(400).json({ error: "missing scenes[]" });
+  ensureTaskDir(taskId);
+
+  if (isBusy) {
+    return res.status(429).json({ error: "busy", msg: "GPU is busy, try later" });
+  }
+  isBusy = true;
+
+  sseHeaders(res);
+  sseSend(res, "task", JSON.stringify({ task_id: taskId }));
+  sseSend(res, "status", "render_start");
+
+  const child = spawnPythonStep({
+    step: "render",
+    prompt,
+    configPath,
+    mock,
+    globalStyle,
+    character,
+    taskId,
+  });
+  child.stdin.end(JSON.stringify({ scenes }));
+
+  let buf = "";
+  child.stdout.setEncoding("utf8");
+  child.stderr.setEncoding("utf8");
+
+  child.stdout.on("data", (chunk) => {
+    buf += chunk;
+    const parts = buf.split(/\r?\n/);
+    buf = parts.pop() || "";
+    for (const line of parts) {
+      if (!line) continue;
+      if (line.startsWith("PROG ")) sseSend(res, "prog", line.slice("PROG ".length));
+      else if (line.startsWith("RENDER_DONE ")) sseSend(res, "done", line.slice("RENDER_DONE ".length));
+      else sseSend(res, "line", line);
+    }
+  });
+
+  child.stderr.on("data", (chunk) => {
+    sseSend(res, "error", chunk);
+  });
+
+  req.on("close", () => {
+    child.kill("SIGTERM");
+  });
+
+  child.on("exit", (code) => {
+    isBusy = false;
+    if (buf.trim()) sseSend(res, "line", buf.trim());
+    if (code !== 0) sseSend(res, "error", `[ERROR] python exit_code=${code}`);
+    res.end();
+  });
+});
+
+async function runSelfCheck() {
+  const py = process.env.PYTHON_BIN || "python3.10";
+  const checks = [
+    { name: "check_comfy", args: ["scripts/check_comfy.py"] },
+    { name: "inspect_comfy_node", args: ["scripts/inspect_comfy_node.py"] },
+  ];
+  for (const c of checks) {
+    const deadline = Date.now() + 90_000;
+    let lastErr = "";
+    while (Date.now() < deadline) {
+      try {
+        await new Promise((resolve, reject) => {
+          const child = spawn(py, c.args, { cwd: repoRoot, env: process.env, stdio: ["ignore", "pipe", "pipe"] });
+          let out = "";
+          let err = "";
+          child.stdout.setEncoding("utf8");
+          child.stderr.setEncoding("utf8");
+          child.stdout.on("data", (d) => (out += d));
+          child.stderr.on("data", (d) => (err += d));
+          child.on("exit", (code) => {
+            if (code === 0) return resolve(true);
+            reject(new Error(`${c.name} failed (code=${code})\n${err || out}`));
+          });
+        });
+        lastErr = "";
+        break;
+      } catch (e) {
+        lastErr = String(e);
+        await new Promise((r) => setTimeout(r, 2000));
+      }
+    }
+    if (lastErr) {
+      throw new Error(lastErr);
+    }
+  }
+}
+
+const port = Number(process.env.PORT || 3000);
+(async () => {
+  try {
+    await runSelfCheck();
+    app.listen(port, () => {
+      console.log(`[server] http://127.0.0.1:${port}`);
+    });
+  } catch (e) {
+    console.error(String(e));
+    process.exit(1);
+  }
+})();
+
diff --git a/server/public/index.html b/server/public/index.html
index 1e92cea..a10cf62 100644
--- a/server/public/index.html
+++ b/server/public/index.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="utf-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>AiVideo POC - Script Stream Test</title>
+    <title>AiVideo POC - Interactive</title>
     <style>
       body { font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "PingFang SC", "Noto Sans CJK SC", "Microsoft YaHei", sans-serif; margin: 24px; }
       .row { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }
@@ -15,108 +15,327 @@
       .k { color: #6b7280; font-size: 12px; margin: 8px 0 2px; }
       .v { white-space: pre-wrap; }
       .muted { color: #6b7280; }
+      .videoBox { margin-top: 16px; border-top: 1px solid #e5e7eb; padding-top: 16px; }
+      video { width: min(980px, 100%); background: #000; border-radius: 10px; }
+      .toast {
+        position: fixed;
+        right: 18px;
+        bottom: 18px;
+        max-width: min(520px, calc(100vw - 36px));
+        background: rgba(20, 24, 33, 0.96);
+        color: #fff;
+        border: 1px solid rgba(255,255,255,0.12);
+        border-radius: 12px;
+        padding: 12px 14px;
+        box-shadow: 0 10px 30px rgba(0,0,0,0.35);
+        z-index: 9999;
+      }
+      .toast .title { font-weight: 700; margin-bottom: 6px; }
+      .toast .msg { white-space: pre-wrap; font-size: 13px; opacity: 0.95; }
+      .toast .close { float: right; cursor: pointer; opacity: 0.8; }
     </style>
   </head>
   <body>
-    <h2>AiVideo POC：实时分镜脚本流测试</h2>
-    <p class="muted">点击运行后，页面会通过 SSE 实时接收 Python stdout，并把分镜渲染到下方。</p>
+    <div id="root"></div>
 
-    <div class="row">
-      <input id="prompt" type="text" value="写一个温暖的城市夜景故事" />
-      <label class="row" style="gap:6px;">
-        <input id="mock" type="checkbox" checked />
-        mock（无 ComfyUI / 无 Key 也能跑）
-      </label>
-      <button id="run">运行</button>
-      <button id="stop">停止</button>
-    </div>
+    <script crossorigin src="https://unpkg.com/react@18/umd/react.production.min.js"></script>
+    <script crossorigin src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js"></script>
+    <script crossorigin src="https://unpkg.com/babel-standalone@6/babel.min.js"></script>
 
-    <div class="scenes" id="scenes"></div>
+    <script type="text/babel">
+      const { useEffect, useMemo, useRef, useState } = React;
 
-    <h3>原始日志（stdout/stderr）</h3>
-    <pre id="log"></pre>
+      function App() {
+        const [prompt, setPrompt] = useState("写一个温暖的城市夜景故事");
+        const [globalStyle, setGlobalStyle] = useState("电影感");
+        const [characterPreset, setCharacterPreset] = useState("");
+        const [mock, setMock] = useState(true);
+        const [logs, setLogs] = useState("");
+        const [scenes, setScenes] = useState([null, null, null]);
+        const [canRender, setCanRender] = useState(false);
+        const [finalVideoUrl, setFinalVideoUrl] = useState("");
+        const [taskId, setTaskId] = useState("");
+        const [toast, setToast] = useState("");
 
-    <script>
-      const $ = (id) => document.getElementById(id);
-      const logEl = $("log");
-      const scenesEl = $("scenes");
-      let es = null;
+        const esRef = useRef(null);
+        const logRef = useRef(null);
 
-      function log(line) {
-        logEl.textContent += line + "\n";
-        logEl.scrollTop = logEl.scrollHeight;
-      }
+        const appendLog = (line) => {
+          setLogs((prev) => prev + line + "\n");
+        };
 
-      function upsertScene(scene) {
-        const id = "scene-" + scene.index;
-        let card = document.getElementById(id);
-        if (!card) {
-          card = document.createElement("div");
-          card.className = "card";
-          card.id = id;
-          scenesEl.appendChild(card);
-        }
-        card.innerHTML = `
-          <div><strong>Scene ${scene.index}</strong></div>
-          <div class="k">image_prompt</div><div class="v">${escapeHtml(scene.image_prompt)}</div>
-          <div class="k">video_motion</div><div class="v">${escapeHtml(scene.video_motion || "")}</div>
-          <div class="k">narration</div><div class="v">${escapeHtml(scene.narration)}</div>
-        `;
-      }
+        const showToast = (msg) => {
+          setToast(String(msg || "发生错误"));
+          // auto hide
+          setTimeout(() => setToast(""), 6000);
+        };
 
-      function escapeHtml(s) {
-        return String(s)
-          .replaceAll("&", "&amp;")
-          .replaceAll("<", "&lt;")
-          .replaceAll(">", "&gt;")
-          .replaceAll('"', "&quot;")
-          .replaceAll("'", "&#039;");
-      }
+        useEffect(() => {
+          if (!logRef.current) return;
+          logRef.current.scrollTop = logRef.current.scrollHeight;
+        }, [logs]);
 
-      function start() {
-        stop();
-        logEl.textContent = "";
-        scenesEl.innerHTML = "";
+        const startScript = () => {
+          stopScript();
+          setLogs("");
+          setScenes([null, null, null]);
+          setCanRender(false);
+          setFinalVideoUrl("");
+          setTaskId("");
 
-        const prompt = $("prompt").value.trim();
-        const mock = $("mock").checked ? "1" : "0";
-        if (!prompt) return;
+          const url = `/api/script?prompt=${encodeURIComponent(prompt.trim())}&mock=${mock ? "1" : "0"}&global_style=${encodeURIComponent(globalStyle)}&character=${encodeURIComponent(characterPreset)}`;
+          const es = new EventSource(url);
+          esRef.current = es;
 
-        const url = `/api/run?prompt=${encodeURIComponent(prompt)}&mock=${mock}`;
-        es = new EventSource(url);
-
-        es.addEventListener("status", (e) => log("[status] " + e.data));
-        es.addEventListener("stderr", (e) => log("[stderr] " + e.data));
-        es.addEventListener("done", (e) => {
-          log("[done] exit_code=" + e.data);
-          stop();
-        });
-        es.addEventListener("line", (e) => {
-          const line = e.data;
-          log(line);
-          if (line.startsWith("SCENE_JSON ")) {
+          es.addEventListener("status", (e) => appendLog("[status] " + e.data));
+          es.addEventListener("error", (e) => {
+            const m = (e && e.data) ? e.data : "连接或后端错误";
+            appendLog("[ERROR] " + m);
+            showToast(m);
+          });
+          es.addEventListener("task", (e) => {
+            try { setTaskId(JSON.parse(e.data).task_id || ""); } catch { }
+          });
+          es.addEventListener("done", (e) => {
+            appendLog("[done] exit_code=" + e.data);
+            stopScript();
+          });
+          es.addEventListener("scene", (e) => {
             try {
-              const obj = JSON.parse(line.slice("SCENE_JSON ".length));
-              upsertScene(obj);
+              const obj = JSON.parse(e.data);
+              setScenes((prev) => {
+                const next = [...prev];
+                next[obj.index - 1] = {
+                  index: obj.index,
+                  image_prompt: obj.image_prompt || "",
+                  video_motion: obj.video_motion || "",
+                  narration: obj.narration || "",
+                };
+                return next;
+              });
             } catch (err) {
-              log("[parse_error] " + err);
+              appendLog("[parse_error] " + err);
+            }
+          });
+          es.addEventListener("line", (e) => {
+            appendLog(e.data);
+            if (e.data === "SCRIPT_END") setCanRender(true);
+          });
+          es.onerror = () => appendLog("[error] connection error");
+        };
+
+        const stopScript = () => {
+          if (esRef.current) {
+            esRef.current.close();
+            esRef.current = null;
+          }
+        };
+
+        const onEdit = (idx, field, value) => {
+          setScenes((prev) => {
+            const next = [...prev];
+            const cur = next[idx] || { index: idx + 1, image_prompt: "", video_motion: "", narration: "" };
+            next[idx] = { ...cur, [field]: value };
+            return next;
+          });
+        };
+
+        const refineOne = async (sceneIndex) => {
+          appendLog(`[refine] scene ${sceneIndex}...`);
+          const s0 = scenes[sceneIndex - 1] || {};
+          const payloadScene = {
+            image_prompt: s0.image_prompt || "",
+            video_motion: s0.video_motion || "",
+            narration: s0.narration || "",
+          };
+          const resp = await fetch("/api/refine", {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({ prompt, scene: payloadScene, scene_index: sceneIndex, mock, global_style: globalStyle, character: characterPreset, task_id: taskId }),
+          });
+          const data = await resp.json();
+          if (!resp.ok) {
+            appendLog("[refine_error] " + JSON.stringify(data));
+            showToast((data && (data.error || data.msg)) || "润色失败");
+            return;
+          }
+          const s = data.scene;
+          setScenes((prev) => {
+            const next = [...prev];
+            next[s.index - 1] = {
+              index: s.index,
+              image_prompt: s.image_prompt || "",
+              video_motion: s.video_motion || "",
+              narration: s.narration || "",
+            };
+            return next;
+          });
+          appendLog(`[refine] scene ${sceneIndex} done`);
+        };
+
+        const renderVideo = async () => {
+          appendLog("[render] start...");
+          const payloadScenes = scenes.map((s, i) => ({
+            image_prompt: (s && s.image_prompt) || "",
+            video_motion: (s && s.video_motion) || "",
+            narration: (s && s.narration) || "",
+          }));
+          const resp = await fetch("/api/render", {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({ prompt, scenes: payloadScenes, mock, global_style: globalStyle, character: characterPreset, task_id: taskId }),
+          });
+          if (!resp.ok) {
+            appendLog("[render_error] http " + resp.status);
+            showToast("渲染请求失败（HTTP " + resp.status + "）");
+            return;
+          }
+          // Parse SSE from fetch (POST)
+          const reader = resp.body.getReader();
+          const decoder = new TextDecoder("utf-8");
+          let buf = "";
+          while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            buf += decoder.decode(value, { stream: true });
+            const chunks = buf.split("\n\n");
+            buf = chunks.pop() || "";
+            for (const c of chunks) {
+              const lines = c.split("\n").filter(Boolean);
+              let event = "message";
+              const dataLines = [];
+              for (const line of lines) {
+                if (line.startsWith("event:")) event = line.slice(6).trim();
+                else if (line.startsWith("data:")) dataLines.push(line.slice(5).trim());
+              }
+              const data = dataLines.join("\n");
+              if (event === "task") {
+                try { setTaskId(JSON.parse(data).task_id || ""); } catch { }
+              } else if (event === "prog") {
+                appendLog("[prog] " + data);
+              } else if (event === "error") {
+                appendLog("[ERROR] " + data);
+                showToast(data);
+              } else if (event === "done") {
+                try {
+                  const obj = JSON.parse(data);
+                  const file = String(obj.output || "").split("/").pop() || "final.mp4";
+                  const tid = taskId || (obj.task_id || "");
+                  appendLog("[render] done: " + file);
+                  if (tid) setFinalVideoUrl(`/api/static/${encodeURIComponent(tid)}/${encodeURIComponent(file)}?t=${Date.now()}`);
+                } catch (e) {
+                  appendLog("[render_done_parse_error] " + e);
+                  showToast("渲染完成消息解析失败");
+                }
+              } else {
+                appendLog(data);
+              }
             }
           }
-        });
-        es.onerror = () => {
-          log("[error] connection error");
         };
+
+        return (
+          <div>
+            <h2>AiVideo POC：双向交互手搓平台</h2>
+            <p className="muted">分镜可编辑、可单条润色，渲染完成后可直接预览与下载。</p>
+
+            <div className="row">
+              <input
+                type="text"
+                value={prompt}
+                onChange={(e) => setPrompt(e.target.value)}
+              />
+            </div>
+
+            <div className="row">
+              <label className="row" style={{ gap: 6 }}>
+                Global Style:
+                <select value={globalStyle} onChange={(e) => setGlobalStyle(e.target.value)} style={{ padding: "8px" }}>
+                  <option value="电影感">电影感</option>
+                  <option value="二次元">二次元</option>
+                  <option value="写实">写实</option>
+                </select>
+              </label>
+              <label className="row" style={{ gap: 6 }}>
+                Character Preset:
+                <input
+                  type="text"
+                  value={characterPreset}
+                  onChange={(e) => setCharacterPreset(e.target.value)}
+                  placeholder="例如：黑发短发、穿风衣的年轻侦探、冷静目光"
+                  style={{ width: "min(640px, 100%)", padding: "10px 12px", fontSize: 14 }}
+                />
+              </label>
+              <label className="row" style={{ gap: 6 }}>
+                <input type="checkbox" checked={mock} onChange={(e) => setMock(e.target.checked)} />
+                mock（无 ComfyUI / 无 Key 也能跑）
+              </label>
+              <button onClick={startScript}>生成分镜</button>
+              <button onClick={stopScript}>停止</button>
+              {canRender ? (
+                <button onClick={renderVideo}>确认并开始渲染视频</button>
+              ) : null}
+              {taskId ? <span className="muted">task_id: {taskId}</span> : null}
+            </div>
+
+            <div className="scenes">
+              {scenes.map((s, idx) => (
+                <div className="card" key={idx}>
+                  <div className="row" style={{ justifyContent: "space-between" }}>
+                    <strong>Scene {idx + 1}</strong>
+                    <button style={{ padding: "6px 10px" }} onClick={() => refineOne(idx + 1)}>🔄 重新润色</button>
+                  </div>
+                  <div className="k">image_prompt</div>
+                  <textarea rows="3" style={{ width: "100%", padding: 8 }}
+                    value={(s && s.image_prompt) || ""}
+                    onChange={(e) => onEdit(idx, "image_prompt", e.target.value)}
+                  />
+                  <div className="k">video_motion</div>
+                  <textarea rows="2" style={{ width: "100%", padding: 8 }}
+                    value={(s && s.video_motion) || ""}
+                    onChange={(e) => onEdit(idx, "video_motion", e.target.value)}
+                  />
+                  <div className="k">narration</div>
+                  <textarea rows="2" style={{ width: "100%", padding: 8 }}
+                    value={(s && s.narration) || ""}
+                    onChange={(e) => onEdit(idx, "narration", e.target.value)}
+                  />
+                </div>
+              ))}
+            </div>
+
+            <div className="videoBox">
+              <h3>视频预览</h3>
+              {finalVideoUrl ? (
+                <div>
+                  <video controls src={finalVideoUrl}></video>
+                  <div className="row" style={{ marginTop: 10 }}>
+                    <a href={finalVideoUrl} download>
+                      <button>下载视频</button>
+                    </a>
+                  </div>
+                  <div className="muted">URL: {finalVideoUrl}</div>
+                </div>
+              ) : (
+                <div className="muted">尚未渲染完成。</div>
+              )}
+            </div>
+
+            <h3>原始日志（stdout/stderr）</h3>
+            <pre ref={logRef}>{logs}</pre>
+
+            {toast ? (
+              <div className="toast" role="alert">
+                <span className="close" onClick={() => setToast("")}>✕</span>
+                <div className="title">发生错误</div>
+                <div className="msg">{toast}</div>
+              </div>
+            ) : null}
+          </div>
+        );
       }
 
-      function stop() {
-        if (es) {
-          es.close();
-          es = null;
-        }
-      }
-
-      $("run").addEventListener("click", start);
-      $("stop").addEventListener("click", stop);
+      ReactDOM.createRoot(document.getElementById("root")).render(<App />);
     </script>
   </body>
 </html>