feat: 新增文件
This commit is contained in:
3
engine/__init__.py
Normal file
3
engine/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .types import Scene
|
||||
|
||||
__all__ = ["Scene"]
|
||||
47
engine/audio_gen.py
Normal file
47
engine/audio_gen.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import edge_tts
|
||||
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AudioAsset:
|
||||
path: Path
|
||||
duration_s: float
|
||||
|
||||
|
||||
def _audio_duration_seconds(path: Path) -> float:
|
||||
# MoviePy uses ffmpeg and provides reliable duration for mp3.
|
||||
clip = AudioFileClip(str(path))
|
||||
try:
|
||||
return float(clip.duration or 0.0)
|
||||
finally:
|
||||
clip.close()
|
||||
|
||||
|
||||
async def synthesize_one(text: str, out_path: Path, voice: str, rate: str, volume: str) -> AudioAsset:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, volume=volume)
|
||||
await communicate.save(str(out_path))
|
||||
dur = _audio_duration_seconds(out_path)
|
||||
return AudioAsset(path=out_path, duration_s=dur)
|
||||
|
||||
|
||||
async def synthesize_scenes(narrations: list[str], cfg: AppConfig) -> list[AudioAsset]:
|
||||
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
||||
rate = str(cfg.get("tts.rate", "+0%"))
|
||||
volume = str(cfg.get("tts.volume", "+0%"))
|
||||
out_dir = Path(str(cfg.get("tts.output_dir", "./assets/audio")))
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
tasks: list[asyncio.Task[AudioAsset]] = []
|
||||
for idx, text in enumerate(narrations, start=1):
|
||||
out_path = out_dir / f"scene_{idx:02d}.mp3"
|
||||
tasks.append(asyncio.create_task(synthesize_one(text, out_path, voice, rate, volume)))
|
||||
return await asyncio.gather(*tasks)
|
||||
188
engine/comfy_client.py
Normal file
188
engine/comfy_client.py
Normal file
@@ -0,0 +1,188 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
import httpx
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ComfyResult:
|
||||
prompt_id: str
|
||||
output_files: list[Path]
|
||||
|
||||
|
||||
class ComfyClient:
|
||||
def __init__(self, cfg: AppConfig):
|
||||
self.cfg = cfg
|
||||
self.base_url = str(cfg.get("app.comfy_base_url", "http://127.0.0.1:8188")).rstrip("/")
|
||||
self.output_dir = Path(str(cfg.get("app.comfy_output_dir", "./ComfyUI/output")))
|
||||
self.workflow_path = Path(str(cfg.get("comfy_workflow.workflow_path", "./workflow_api.json")))
|
||||
self._client_id = str(uuid.uuid4())
|
||||
|
||||
def load_workflow(self) -> dict[str, Any]:
|
||||
if not self.workflow_path.exists():
|
||||
raise FileNotFoundError(f"workflow file not found: {self.workflow_path}")
|
||||
raw = json.loads(self.workflow_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(f"workflow_api.json root must be dict, got {type(raw)}")
|
||||
return raw
|
||||
|
||||
def _nodes(self, workflow: dict[str, Any]) -> dict[str, Any]:
|
||||
# ComfyUI API workflow exports typically use { node_id: {class_type, inputs, ...}, ... }
|
||||
return workflow
|
||||
|
||||
def _find_node_id_by_class_type(self, workflow: dict[str, Any], class_types: Iterable[str]) -> str | None:
|
||||
want = {c.strip() for c in class_types if c and str(c).strip()}
|
||||
if not want:
|
||||
return None
|
||||
for node_id, node in self._nodes(workflow).items():
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
ct = node.get("class_type")
|
||||
if isinstance(ct, str) and ct in want:
|
||||
return str(node_id)
|
||||
return None
|
||||
|
||||
def _resolve_node_id(self, workflow: dict[str, Any], configured_id: Any, fallback_class_types_key: str) -> str:
|
||||
if configured_id is not None and str(configured_id).strip():
|
||||
node_id = str(configured_id).strip()
|
||||
if node_id not in self._nodes(workflow):
|
||||
raise KeyError(f"Configured node_id {node_id} not found in workflow")
|
||||
return node_id
|
||||
class_types = self.cfg.get(f"comfy_workflow.{fallback_class_types_key}", []) or []
|
||||
if not isinstance(class_types, list):
|
||||
raise ValueError(f"Config comfy_workflow.{fallback_class_types_key} must be list")
|
||||
found = self._find_node_id_by_class_type(workflow, [str(x) for x in class_types])
|
||||
if not found:
|
||||
raise KeyError(f"Cannot resolve node by class types: {class_types}")
|
||||
return found
|
||||
|
||||
def inject_params(self, workflow: dict[str, Any], image_prompt: str, seed: int, motion_prompt: str | None = None) -> dict[str, Any]:
|
||||
wf = json.loads(json.dumps(workflow)) # deep copy
|
||||
|
||||
prompt_node_id = self._resolve_node_id(
|
||||
wf,
|
||||
self.cfg.get("comfy_workflow.prompt_node_id", None),
|
||||
"prompt_node_class_types",
|
||||
)
|
||||
prompt_key = str(self.cfg.get("comfy_workflow.prompt_input_key", "text"))
|
||||
self._set_input(wf, prompt_node_id, prompt_key, image_prompt)
|
||||
|
||||
seed_node_id = self._resolve_node_id(
|
||||
wf,
|
||||
self.cfg.get("comfy_workflow.seed_node_id", None),
|
||||
"seed_node_class_types",
|
||||
)
|
||||
seed_key = str(self.cfg.get("comfy_workflow.seed_input_key", "seed"))
|
||||
self._set_input(wf, seed_node_id, seed_key, int(seed))
|
||||
|
||||
motion_node_id = self.cfg.get("comfy_workflow.motion_node_id", None)
|
||||
if motion_prompt and motion_node_id is not None and str(motion_node_id).strip():
|
||||
motion_key = str(self.cfg.get("comfy_workflow.motion_input_key", "text"))
|
||||
self._set_input(wf, str(motion_node_id).strip(), motion_key, motion_prompt)
|
||||
|
||||
return wf
|
||||
|
||||
def _set_input(self, workflow: dict[str, Any], node_id: str, key: str, value: Any) -> None:
|
||||
node = self._nodes(workflow).get(str(node_id))
|
||||
if not isinstance(node, dict):
|
||||
raise KeyError(f"Node {node_id} not found")
|
||||
inputs = node.get("inputs")
|
||||
if inputs is None:
|
||||
inputs = {}
|
||||
node["inputs"] = inputs
|
||||
if not isinstance(inputs, dict):
|
||||
raise TypeError(f"Node {node_id} inputs must be dict, got {type(inputs)}")
|
||||
inputs[key] = value
|
||||
|
||||
async def _post_prompt(self, client: httpx.AsyncClient, workflow: dict[str, Any]) -> str:
|
||||
url = f"{self.base_url}/prompt"
|
||||
payload = {"prompt": workflow, "client_id": self._client_id}
|
||||
r = await client.post(url, json=payload)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
pid = data.get("prompt_id") or data.get("prompt_id".upper())
|
||||
if not isinstance(pid, str) or not pid:
|
||||
raise RuntimeError(f"Unexpected /prompt response: {data}")
|
||||
return pid
|
||||
|
||||
async def _get_history(self, client: httpx.AsyncClient, prompt_id: str) -> dict[str, Any] | None:
|
||||
# Common endpoints:
|
||||
# - /history/{prompt_id}
|
||||
# - /history (returns all histories keyed by prompt id)
|
||||
for url in (f"{self.base_url}/history/{prompt_id}", f"{self.base_url}/history"):
|
||||
try:
|
||||
r = await client.get(url)
|
||||
if r.status_code == 404:
|
||||
continue
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
if isinstance(data, dict):
|
||||
if prompt_id in data and isinstance(data[prompt_id], dict):
|
||||
return data[prompt_id]
|
||||
if url.endswith(f"/{prompt_id}"):
|
||||
return data
|
||||
return None
|
||||
except httpx.HTTPStatusError:
|
||||
raise
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _extract_output_files(self, history_item: dict[str, Any]) -> list[Path]:
|
||||
out: list[Path] = []
|
||||
outputs = history_item.get("outputs")
|
||||
if not isinstance(outputs, dict):
|
||||
return out
|
||||
|
||||
def walk(v: Any) -> None:
|
||||
if isinstance(v, dict):
|
||||
# ComfyUI tends to store files like {"filename":"x.mp4","subfolder":"","type":"output"}
|
||||
fn = v.get("filename")
|
||||
if isinstance(fn, str) and fn.strip():
|
||||
out.append(self.output_dir / fn)
|
||||
for vv in v.values():
|
||||
walk(vv)
|
||||
elif isinstance(v, list):
|
||||
for vv in v:
|
||||
walk(vv)
|
||||
|
||||
walk(outputs)
|
||||
# De-dup while preserving order
|
||||
seen: set[str] = set()
|
||||
uniq: list[Path] = []
|
||||
for p in out:
|
||||
s = str(p)
|
||||
if s not in seen:
|
||||
seen.add(s)
|
||||
uniq.append(p)
|
||||
return uniq
|
||||
|
||||
async def run_workflow(self, workflow: dict[str, Any], *, poll_interval_s: float = 1.0, timeout_s: float = 300.0) -> ComfyResult:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
prompt_id = await self._post_prompt(client, workflow)
|
||||
|
||||
deadline = asyncio.get_event_loop().time() + timeout_s
|
||||
last_files: list[Path] = []
|
||||
while True:
|
||||
if asyncio.get_event_loop().time() > deadline:
|
||||
raise TimeoutError(f"ComfyUI job timeout: {prompt_id}")
|
||||
item = await self._get_history(client, prompt_id)
|
||||
if isinstance(item, dict):
|
||||
files = self._extract_output_files(item)
|
||||
if files:
|
||||
last_files = files
|
||||
# Heuristic: if any file exists on disk, treat as done.
|
||||
if any(p.exists() for p in files):
|
||||
return ComfyResult(prompt_id=prompt_id, output_files=files)
|
||||
await asyncio.sleep(poll_interval_s)
|
||||
|
||||
# unreachable
|
||||
# return ComfyResult(prompt_id=prompt_id, output_files=last_files)
|
||||
28
engine/config.py
Normal file
28
engine/config.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppConfig:
|
||||
data: dict[str, Any]
|
||||
|
||||
@staticmethod
|
||||
def load(path: str | Path) -> "AppConfig":
|
||||
p = Path(path)
|
||||
raw = yaml.safe_load(p.read_text(encoding="utf-8")) if p.exists() else {}
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(f"Config root must be a mapping, got {type(raw)}")
|
||||
return AppConfig(raw)
|
||||
|
||||
def get(self, dotted: str, default: Any = None) -> Any:
|
||||
cur: Any = self.data
|
||||
for part in dotted.split("."):
|
||||
if not isinstance(cur, dict) or part not in cur:
|
||||
return default
|
||||
cur = cur[part]
|
||||
return cur
|
||||
80
engine/script_gen.py
Normal file
80
engine/script_gen.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from .config import AppConfig
|
||||
from .types import Scene
|
||||
|
||||
|
||||
def _system_prompt(scene_count: int, min_chars: int, max_chars: int) -> str:
|
||||
return f"""你是一个专业短视频编剧与分镜师。
|
||||
请把用户的创意扩展为 {scene_count} 个分镜(Scene) 的 JSON。
|
||||
|
||||
硬性约束:
|
||||
1) 三个分镜的主角描述(Character Description)必须保持一致:姓名/外观/服饰/风格不可前后矛盾。
|
||||
2) 每个分镜必须包含字段:image_prompt, video_motion, narration。
|
||||
3) narration 为中文旁白,每段严格控制在约 {min_chars}-{max_chars} 字左右(宁可略短,不要超过太多)。
|
||||
4) 画面描述要具体可视化,video_motion 描述镜头运动/人物动作。
|
||||
5) 只输出 JSON,不要输出任何解释、markdown、代码块。
|
||||
|
||||
输出 JSON Schema(示例结构):
|
||||
{{
|
||||
"character_description": "...一致的主角设定...",
|
||||
"scenes": [
|
||||
{{"image_prompt":"...","video_motion":"...","narration":"..."}},
|
||||
{{"image_prompt":"...","video_motion":"...","narration":"..."}},
|
||||
{{"image_prompt":"...","video_motion":"...","narration":"..."}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
|
||||
def generate_scenes(user_prompt: str, cfg: AppConfig) -> list[Scene]:
|
||||
scene_count = int(cfg.get("script_gen.scene_count", 3))
|
||||
min_chars = int(cfg.get("script_gen.narration_min_chars", 15))
|
||||
max_chars = int(cfg.get("script_gen.narration_max_chars", 20))
|
||||
|
||||
api_key_env = str(cfg.get("openai.api_key_env", "OPENAI_API_KEY"))
|
||||
base_url_env = str(cfg.get("openai.base_url_env", "OPENAI_BASE_URL"))
|
||||
model = str(cfg.get("openai.model", "gpt-4o-mini"))
|
||||
|
||||
api_key = os.environ.get(api_key_env)
|
||||
if not api_key:
|
||||
raise RuntimeError(f"Missing env var {api_key_env} for OpenAI API key")
|
||||
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=os.environ.get(base_url_env) or None,
|
||||
)
|
||||
|
||||
resp = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": _system_prompt(scene_count, min_chars, max_chars)},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.6,
|
||||
)
|
||||
|
||||
content = resp.choices[0].message.content or "{}"
|
||||
data: Any = json.loads(content)
|
||||
scenes_raw = data.get("scenes")
|
||||
if not isinstance(scenes_raw, list) or len(scenes_raw) != scene_count:
|
||||
raise ValueError(f"Model returned invalid scenes length: {type(scenes_raw)}")
|
||||
|
||||
scenes: list[Scene] = []
|
||||
for i, s in enumerate(scenes_raw):
|
||||
if not isinstance(s, dict):
|
||||
raise ValueError(f"Scene[{i}] must be object, got {type(s)}")
|
||||
image_prompt = str(s.get("image_prompt", "")).strip()
|
||||
video_motion = str(s.get("video_motion", "")).strip()
|
||||
narration = str(s.get("narration", "")).strip()
|
||||
if not image_prompt or not narration:
|
||||
raise ValueError(f"Scene[{i}] missing required fields")
|
||||
scenes.append(Scene(image_prompt=image_prompt, video_motion=video_motion, narration=narration))
|
||||
return scenes
|
||||
10
engine/types.py
Normal file
10
engine/types.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Scene:
|
||||
image_prompt: str
|
||||
video_motion: str
|
||||
narration: str
|
||||
78
engine/video_editor.py
Normal file
78
engine/video_editor.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from moviepy import AudioFileClip, CompositeVideoClip, TextClip, VideoFileClip, concatenate_videoclips, vfx
|
||||
|
||||
from .config import AppConfig
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Segment:
|
||||
video_path: Path
|
||||
audio_path: Path
|
||||
narration: str
|
||||
|
||||
|
||||
def _fit_video_to_audio(video: VideoFileClip, audio: AudioFileClip) -> VideoFileClip:
|
||||
if audio.duration is None or video.duration is None:
|
||||
return video.with_audio(audio)
|
||||
if audio.duration > video.duration:
|
||||
video = video.with_effects([vfx.Loop(duration=audio.duration)])
|
||||
elif video.duration > audio.duration:
|
||||
video = video.subclipped(0, audio.duration)
|
||||
return video.with_audio(audio)
|
||||
|
||||
|
||||
def _subtitle_clip(text: str, size: tuple[int, int], duration: float) -> TextClip:
|
||||
# MoviePy 2 uses Pillow for text rendering by default on most setups.
|
||||
return (
|
||||
TextClip(
|
||||
text=text,
|
||||
font_size=44,
|
||||
color="white",
|
||||
stroke_color="black",
|
||||
stroke_width=2,
|
||||
size=(int(size[0] * 0.92), None),
|
||||
method="caption",
|
||||
)
|
||||
.with_position(("center", "bottom"))
|
||||
.with_duration(duration)
|
||||
.with_opacity(0.95)
|
||||
)
|
||||
|
||||
|
||||
def render_final(segments: list[Segment], cfg: AppConfig, output_path: str | Path | None = None) -> Path:
|
||||
transition_s = float(cfg.get("video.transition_seconds", 0.25))
|
||||
out = Path(output_path or str(cfg.get("video.final_output", "./final_poc.mp4")))
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
clips = []
|
||||
for seg in segments:
|
||||
v = VideoFileClip(str(seg.video_path))
|
||||
a = AudioFileClip(str(seg.audio_path))
|
||||
v2 = _fit_video_to_audio(v, a)
|
||||
|
||||
w, h = v2.size
|
||||
sub = _subtitle_clip(seg.narration, (w, h), v2.duration or a.duration or 0)
|
||||
comp = CompositeVideoClip([v2, sub])
|
||||
if transition_s > 0:
|
||||
comp = comp.with_effects([vfx.FadeIn(transition_s), vfx.FadeOut(transition_s)])
|
||||
clips.append(comp)
|
||||
|
||||
final = concatenate_videoclips(clips, method="compose")
|
||||
try:
|
||||
final.write_videofile(
|
||||
str(out),
|
||||
codec="libx264",
|
||||
audio_codec="aac",
|
||||
fps=clips[0].fps if clips and clips[0].fps else int(cfg.get("video.mock_fps", 24)),
|
||||
threads=4,
|
||||
preset="medium",
|
||||
)
|
||||
finally:
|
||||
final.close()
|
||||
for c in clips:
|
||||
c.close()
|
||||
return out
|
||||
Reference in New Issue
Block a user