feat: 新增文件

This commit is contained in:
Daniel
2026-03-18 17:36:07 +08:00
commit f99098ec58
702 changed files with 68533 additions and 0 deletions

3
engine/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
from .types import Scene
__all__ = ["Scene"]

47
engine/audio_gen.py Normal file
View File

@@ -0,0 +1,47 @@
from __future__ import annotations
import asyncio
from dataclasses import dataclass
from pathlib import Path
import edge_tts
from moviepy.audio.io.AudioFileClip import AudioFileClip
from .config import AppConfig
@dataclass(frozen=True)
class AudioAsset:
path: Path
duration_s: float
def _audio_duration_seconds(path: Path) -> float:
# MoviePy uses ffmpeg and provides reliable duration for mp3.
clip = AudioFileClip(str(path))
try:
return float(clip.duration or 0.0)
finally:
clip.close()
async def synthesize_one(text: str, out_path: Path, voice: str, rate: str, volume: str) -> AudioAsset:
out_path.parent.mkdir(parents=True, exist_ok=True)
communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, volume=volume)
await communicate.save(str(out_path))
dur = _audio_duration_seconds(out_path)
return AudioAsset(path=out_path, duration_s=dur)
async def synthesize_scenes(narrations: list[str], cfg: AppConfig) -> list[AudioAsset]:
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
rate = str(cfg.get("tts.rate", "+0%"))
volume = str(cfg.get("tts.volume", "+0%"))
out_dir = Path(str(cfg.get("tts.output_dir", "./assets/audio")))
out_dir.mkdir(parents=True, exist_ok=True)
tasks: list[asyncio.Task[AudioAsset]] = []
for idx, text in enumerate(narrations, start=1):
out_path = out_dir / f"scene_{idx:02d}.mp3"
tasks.append(asyncio.create_task(synthesize_one(text, out_path, voice, rate, volume)))
return await asyncio.gather(*tasks)

188
engine/comfy_client.py Normal file
View File

@@ -0,0 +1,188 @@
from __future__ import annotations
import asyncio
import json
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable
import httpx
from .config import AppConfig
@dataclass(frozen=True)
class ComfyResult:
prompt_id: str
output_files: list[Path]
class ComfyClient:
def __init__(self, cfg: AppConfig):
self.cfg = cfg
self.base_url = str(cfg.get("app.comfy_base_url", "http://127.0.0.1:8188")).rstrip("/")
self.output_dir = Path(str(cfg.get("app.comfy_output_dir", "./ComfyUI/output")))
self.workflow_path = Path(str(cfg.get("comfy_workflow.workflow_path", "./workflow_api.json")))
self._client_id = str(uuid.uuid4())
def load_workflow(self) -> dict[str, Any]:
if not self.workflow_path.exists():
raise FileNotFoundError(f"workflow file not found: {self.workflow_path}")
raw = json.loads(self.workflow_path.read_text(encoding="utf-8"))
if not isinstance(raw, dict):
raise ValueError(f"workflow_api.json root must be dict, got {type(raw)}")
return raw
def _nodes(self, workflow: dict[str, Any]) -> dict[str, Any]:
# ComfyUI API workflow exports typically use { node_id: {class_type, inputs, ...}, ... }
return workflow
def _find_node_id_by_class_type(self, workflow: dict[str, Any], class_types: Iterable[str]) -> str | None:
want = {c.strip() for c in class_types if c and str(c).strip()}
if not want:
return None
for node_id, node in self._nodes(workflow).items():
if not isinstance(node, dict):
continue
ct = node.get("class_type")
if isinstance(ct, str) and ct in want:
return str(node_id)
return None
def _resolve_node_id(self, workflow: dict[str, Any], configured_id: Any, fallback_class_types_key: str) -> str:
if configured_id is not None and str(configured_id).strip():
node_id = str(configured_id).strip()
if node_id not in self._nodes(workflow):
raise KeyError(f"Configured node_id {node_id} not found in workflow")
return node_id
class_types = self.cfg.get(f"comfy_workflow.{fallback_class_types_key}", []) or []
if not isinstance(class_types, list):
raise ValueError(f"Config comfy_workflow.{fallback_class_types_key} must be list")
found = self._find_node_id_by_class_type(workflow, [str(x) for x in class_types])
if not found:
raise KeyError(f"Cannot resolve node by class types: {class_types}")
return found
def inject_params(self, workflow: dict[str, Any], image_prompt: str, seed: int, motion_prompt: str | None = None) -> dict[str, Any]:
wf = json.loads(json.dumps(workflow)) # deep copy
prompt_node_id = self._resolve_node_id(
wf,
self.cfg.get("comfy_workflow.prompt_node_id", None),
"prompt_node_class_types",
)
prompt_key = str(self.cfg.get("comfy_workflow.prompt_input_key", "text"))
self._set_input(wf, prompt_node_id, prompt_key, image_prompt)
seed_node_id = self._resolve_node_id(
wf,
self.cfg.get("comfy_workflow.seed_node_id", None),
"seed_node_class_types",
)
seed_key = str(self.cfg.get("comfy_workflow.seed_input_key", "seed"))
self._set_input(wf, seed_node_id, seed_key, int(seed))
motion_node_id = self.cfg.get("comfy_workflow.motion_node_id", None)
if motion_prompt and motion_node_id is not None and str(motion_node_id).strip():
motion_key = str(self.cfg.get("comfy_workflow.motion_input_key", "text"))
self._set_input(wf, str(motion_node_id).strip(), motion_key, motion_prompt)
return wf
def _set_input(self, workflow: dict[str, Any], node_id: str, key: str, value: Any) -> None:
node = self._nodes(workflow).get(str(node_id))
if not isinstance(node, dict):
raise KeyError(f"Node {node_id} not found")
inputs = node.get("inputs")
if inputs is None:
inputs = {}
node["inputs"] = inputs
if not isinstance(inputs, dict):
raise TypeError(f"Node {node_id} inputs must be dict, got {type(inputs)}")
inputs[key] = value
async def _post_prompt(self, client: httpx.AsyncClient, workflow: dict[str, Any]) -> str:
url = f"{self.base_url}/prompt"
payload = {"prompt": workflow, "client_id": self._client_id}
r = await client.post(url, json=payload)
r.raise_for_status()
data = r.json()
pid = data.get("prompt_id") or data.get("prompt_id".upper())
if not isinstance(pid, str) or not pid:
raise RuntimeError(f"Unexpected /prompt response: {data}")
return pid
async def _get_history(self, client: httpx.AsyncClient, prompt_id: str) -> dict[str, Any] | None:
# Common endpoints:
# - /history/{prompt_id}
# - /history (returns all histories keyed by prompt id)
for url in (f"{self.base_url}/history/{prompt_id}", f"{self.base_url}/history"):
try:
r = await client.get(url)
if r.status_code == 404:
continue
r.raise_for_status()
data = r.json()
if isinstance(data, dict):
if prompt_id in data and isinstance(data[prompt_id], dict):
return data[prompt_id]
if url.endswith(f"/{prompt_id}"):
return data
return None
except httpx.HTTPStatusError:
raise
except Exception:
continue
return None
def _extract_output_files(self, history_item: dict[str, Any]) -> list[Path]:
out: list[Path] = []
outputs = history_item.get("outputs")
if not isinstance(outputs, dict):
return out
def walk(v: Any) -> None:
if isinstance(v, dict):
# ComfyUI tends to store files like {"filename":"x.mp4","subfolder":"","type":"output"}
fn = v.get("filename")
if isinstance(fn, str) and fn.strip():
out.append(self.output_dir / fn)
for vv in v.values():
walk(vv)
elif isinstance(v, list):
for vv in v:
walk(vv)
walk(outputs)
# De-dup while preserving order
seen: set[str] = set()
uniq: list[Path] = []
for p in out:
s = str(p)
if s not in seen:
seen.add(s)
uniq.append(p)
return uniq
async def run_workflow(self, workflow: dict[str, Any], *, poll_interval_s: float = 1.0, timeout_s: float = 300.0) -> ComfyResult:
async with httpx.AsyncClient(timeout=30.0) as client:
prompt_id = await self._post_prompt(client, workflow)
deadline = asyncio.get_event_loop().time() + timeout_s
last_files: list[Path] = []
while True:
if asyncio.get_event_loop().time() > deadline:
raise TimeoutError(f"ComfyUI job timeout: {prompt_id}")
item = await self._get_history(client, prompt_id)
if isinstance(item, dict):
files = self._extract_output_files(item)
if files:
last_files = files
# Heuristic: if any file exists on disk, treat as done.
if any(p.exists() for p in files):
return ComfyResult(prompt_id=prompt_id, output_files=files)
await asyncio.sleep(poll_interval_s)
# unreachable
# return ComfyResult(prompt_id=prompt_id, output_files=last_files)

28
engine/config.py Normal file
View File

@@ -0,0 +1,28 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Any
import yaml
@dataclass(frozen=True)
class AppConfig:
data: dict[str, Any]
@staticmethod
def load(path: str | Path) -> "AppConfig":
p = Path(path)
raw = yaml.safe_load(p.read_text(encoding="utf-8")) if p.exists() else {}
if not isinstance(raw, dict):
raise ValueError(f"Config root must be a mapping, got {type(raw)}")
return AppConfig(raw)
def get(self, dotted: str, default: Any = None) -> Any:
cur: Any = self.data
for part in dotted.split("."):
if not isinstance(cur, dict) or part not in cur:
return default
cur = cur[part]
return cur

80
engine/script_gen.py Normal file
View File

@@ -0,0 +1,80 @@
from __future__ import annotations
import json
import os
from typing import Any
from openai import OpenAI
from .config import AppConfig
from .types import Scene
def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
return f"""你是一个专业短视频编剧与分镜师。
请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
硬性约束:
1) 三个分镜的主角描述Character Description必须保持一致姓名/外观/服饰/风格不可前后矛盾。
2) 每个分镜必须包含字段image_prompt, video_motion, narration。
3) narration 为中文旁白,每段严格控制在约 {min_chars}-{max_chars} 字左右(宁可略短,不要超过太多)。
4) 画面描述要具体可视化video_motion 描述镜头运动/人物动作。
5) 只输出 JSON不要输出任何解释、markdown、代码块。
输出 JSON Schema示例结构
{{
"character_description": "...一致的主角设定...",
"scenes": [
{{"image_prompt":"...","video_motion":"...","narration":"..."}},
{{"image_prompt":"...","video_motion":"...","narration":"..."}},
{{"image_prompt":"...","video_motion":"...","narration":"..."}}
]
}}
"""
def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
scene_count = int(cfg.get("script_gen.scene_count", 3))
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
model = str(cfg.get("openai.model", "gpt-4o-mini"))
api_key = os.environ.get(api_key_env)
if not api_key:
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
client = OpenAI(
api_key=api_key,
base_url=os.environ.get(base_url_env) or None,
)
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": _system_prompt(scene_count, min_chars, max_chars)},
{"role": "user", "content": user_prompt},
],
response_format={"type": "json_object"},
temperature=0.6,
)
content = resp.choices[0].message.content or "{}"
data: Any = json.loads(content)
scenes_raw = data.get("scenes")
if not isinstance(scenes_raw, list) or len(scenes_raw) != scene_count:
raise ValueError(f"Model returned invalid scenes length: {type(scenes_raw)}")
scenes: list[Scene] = []
for i, s in enumerate(scenes_raw):
if not isinstance(s, dict):
raise ValueError(f"Scene[{i}] must be object, got {type(s)}")
image_prompt = str(s.get("image_prompt", "")).strip()
video_motion = str(s.get("video_motion", "")).strip()
narration = str(s.get("narration", "")).strip()
if not image_prompt or not narration:
raise ValueError(f"Scene[{i}] missing required fields")
scenes.append(Scene(image_prompt=image_prompt, video_motion=video_motion, narration=narration))
return scenes

10
engine/types.py Normal file
View File

@@ -0,0 +1,10 @@
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class Scene:
image_prompt: str
video_motion: str
narration: str

78
engine/video_editor.py Normal file
View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, concatenate_videoclips, vfx
from .config import AppConfig
@dataclass(frozen=True)
class Segment:
video_path: Path
audio_path: Path
narration: str
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
if audio.duration is None or video.duration is None:
return video.with_audio(audio)
if audio.duration > video.duration:
video = video.with_effects([vfx.Loop(duration=audio.duration)])
elif video.duration > audio.duration:
video = video.subclipped(0, audio.duration)
return video.with_audio(audio)
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
# MoviePy 2 uses Pillow for text rendering by default on most setups.
return (
TextClip(
text=text,
font_size=44,
color="white",
stroke_color="black",
stroke_width=2,
size=(int(size[0] * 0.92), None),
method="caption",
)
.with_position(("center", "bottom"))
.with_duration(duration)
.with_opacity(0.95)
)
def render_final(segments: list[Segment], cfg: AppConfig, output_path: str | Path | None = None) -> Path:
transition_s = float(cfg.get("video.transition_seconds", 0.25))
out = Path(output_path or str(cfg.get("video.final_output", "./final_poc.mp4")))
out.parent.mkdir(parents=True, exist_ok=True)
clips = []
for seg in segments:
v = VideoFileClip(str(seg.video_path))
a = AudioFileClip(str(seg.audio_path))
v2 = _fit_video_to_audio(v, a)
w, h = v2.size
sub = _subtitle_clip(seg.narration, (w, h), v2.duration or a.duration or 0)
comp = CompositeVideoClip([v2, sub])
if transition_s > 0:
comp = comp.with_effects([vfx.FadeIn(transition_s), vfx.FadeOut(transition_s)])
clips.append(comp)
final = concatenate_videoclips(clips, method="compose")
try:
final.write_videofile(
str(out),
codec="libx264",
audio_codec="aac",
fps=clips[0].fps if clips and clips[0].fps else int(cfg.get("video.mock_fps", 24)),
threads=4,
preset="medium",
)
finally:
final.close()
for c in clips:
c.close()
return out