feat: 新增文件

2026-03-18 17:36:07 +08:00
commit f99098ec58
702 changed files with 68533 additions and 0 deletions
--- a/main.py
+++ b/main.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import random
+from pathlib import Path
+
+from fastapi import FastAPI
+from moviepy import ImageClip
+from PIL import Image, ImageDraw, ImageFont
+
+from engine.audio_gen import synthesize_scenes
+from engine.comfy_client import ComfyClient
+from engine.config import AppConfig
+from engine.script_gen import generate_scenes
+from engine.types import Scene
+from engine.video_editor import Segment, render_final
+
+
+app = FastAPI(title="AiVideo POC")
+
+
+def _ensure_mock_image(path: Path, size: tuple[int, int]) -> Path:
+    if path.exists():
+        return path
+    path.parent.mkdir(parents=True, exist_ok=True)
+    img = Image.new("RGB", size, color=(20, 24, 33))
+    draw = ImageDraw.Draw(img)
+    text = "MOCK"
+    try:
+        font = ImageFont.load_default()
+    except Exception:
+        font = None
+    draw.text((size[0] // 2 - 30, size[1] // 2 - 10), text, fill=(240, 240, 240), font=font)
+    img.save(path)
+    return path
+
+
+def _make_mock_video(out_path: Path, image_path: Path, duration_s: float, fps: int) -> Path:
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    clip = ImageClip(str(image_path)).with_duration(max(0.5, duration_s)).with_fps(fps)
+    try:
+        clip.write_videofile(str(out_path), codec="libx264", audio=False, fps=fps, preset="veryfast")
+    finally:
+        clip.close()
+    return out_path
+
+
+def _emit(line: str) -> None:
+    print(line, flush=True)
+
+
+def _emit_scene(scene_idx: int, scene: Scene) -> None:
+    payload = {
+        "index": scene_idx,
+        "image_prompt": scene.image_prompt,
+        "video_motion": scene.video_motion,
+        "narration": scene.narration,
+    }
+    _emit("SCENE_JSON " + json.dumps(payload, ensure_ascii=False))
+
+
+def _fallback_scenes(prompt: str) -> list[Scene]:
+    return [
+        Scene(
+            image_prompt=f"{prompt}，城市夜景，霓虹灯，电影感",
+            video_motion="缓慢推进镜头，轻微摇镜",
+            narration="夜色温柔落在街灯上",
+        ),
+        Scene(
+            image_prompt=f"{prompt}，咖啡店窗边，暖光，细雨",
+            video_motion="侧向平移，人物轻轻抬头",
+            narration="雨声里藏着一段回忆",
+        ),
+        Scene(
+            image_prompt=f"{prompt}，桥上远景，车流光轨，温暖",
+            video_motion="拉远全景，光轨流动",
+            narration="我们在光里学会告别",
+        ),
+    ]
+
+
+def _should_allow_llm_without_key(cfg: AppConfig) -> bool:
+    api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
+    return bool(os.environ.get(api_key_env))
+
+
+def _generate_scenes_for_run(prompt: str, cfg: AppConfig, mock: bool) -> list[Scene]:
+    if mock and not _should_allow_llm_without_key(cfg):
+        return _fallback_scenes(prompt)
+    try:
+        return generate_scenes(prompt, cfg)
+    except Exception:
+        if mock:
+            return _fallback_scenes(prompt)
+        raise
+
+
+async def run_pipeline(prompt: str, cfg: AppConfig, mock: bool) -> Path:
+    scenes = _generate_scenes_for_run(prompt, cfg, mock=mock)
+    audios = await synthesize_scenes([s.narration for s in scenes], cfg)
+
+    segments: list[Segment] = []
+    fps = int(cfg.get("video.mock_fps", 24))
+    mock_size = cfg.get("video.mock_size", [1024, 576])
+    w, h = int(mock_size[0]), int(mock_size[1])
+    mock_image = _ensure_mock_image(Path("./assets/mock.png"), (w, h))
+
+    if mock:
+        for i, (scene, audio) in enumerate(zip(scenes, audios), start=1):
+            vpath = Path("./assets/mock_videos") / f"scene_{i:02d}.mp4"
+            _make_mock_video(vpath, mock_image, audio.duration_s, fps=fps)
+            segments.append(Segment(video_path=vpath, audio_path=audio.path, narration=scene.narration))
+        return render_final(segments, cfg)
+
+    comfy = ComfyClient(cfg)
+    wf = comfy.load_workflow()
+    for i, (scene, audio) in enumerate(zip(scenes, audios), start=1):
+        seed = random.randint(1, 2_147_483_647)
+        wf_i = comfy.inject_params(wf, image_prompt=scene.image_prompt, seed=seed, motion_prompt=scene.video_motion or None)
+        result = await comfy.run_workflow(wf_i)
+        # pick first mp4-like output; if none, fall back to first file.
+        candidates = [p for p in result.output_files if p.suffix.lower() in {".mp4", ".mov", ".webm"}]
+        video_path = candidates[0] if candidates else result.output_files[0]
+        segments.append(Segment(video_path=video_path, audio_path=audio.path, narration=scene.narration))
+    return render_final(segments, cfg)
+
+
+def script_only(prompt: str, cfg: AppConfig, mock: bool) -> int:
+    scenes = _generate_scenes_for_run(prompt, cfg, mock=mock)
+    _emit("SCRIPT_BEGIN")
+    for idx, s in enumerate(scenes, start=1):
+        _emit_scene(idx, s)
+    _emit("SCRIPT_END")
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="AIGC auto video generation POC")
+    parser.add_argument("--prompt", required=True, help="User creative prompt")
+    parser.add_argument("--config", default="./configs/config.yaml", help="Config yaml path")
+    parser.add_argument("--mock", action="store_true", help="Mock mode (no ComfyUI needed)")
+    parser.add_argument(
+        "--script-only",
+        action="store_true",
+        help="Only generate script/scenes and print to stdout (for Node.js streaming)",
+    )
+    args = parser.parse_args()
+
+    cfg = AppConfig.load(args.config)
+    if args.script_only:
+        return script_only(args.prompt, cfg, mock=args.mock)
+    out = asyncio.run(run_pipeline(args.prompt, cfg, mock=args.mock))
+    print(str(out))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())