48 lines
1.5 KiB
Python
48 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import edge_tts
|
|
from moviepy.audio.io.AudioFileClip import AudioFileClip
|
|
|
|
from .config import AppConfig
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AudioAsset:
|
|
path: Path
|
|
duration_s: float
|
|
|
|
|
|
def _audio_duration_seconds(path: Path) -> float:
|
|
# MoviePy uses ffmpeg and provides reliable duration for mp3.
|
|
clip = AudioFileClip(str(path))
|
|
try:
|
|
return float(clip.duration or 0.0)
|
|
finally:
|
|
clip.close()
|
|
|
|
|
|
async def synthesize_one(text: str, out_path: Path, voice: str, rate: str, volume: str) -> AudioAsset:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, volume=volume)
|
|
await communicate.save(str(out_path))
|
|
dur = _audio_duration_seconds(out_path)
|
|
return AudioAsset(path=out_path, duration_s=dur)
|
|
|
|
|
|
async def synthesize_scenes(narrations: list[str], cfg: AppConfig) -> list[AudioAsset]:
|
|
voice = str(cfg.get("tts.voice", "zh-CN-XiaoxiaoNeural"))
|
|
rate = str(cfg.get("tts.rate", "+0%"))
|
|
volume = str(cfg.get("tts.volume", "+0%"))
|
|
out_dir = Path(str(cfg.get("tts.output_dir", "./assets/audio")))
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
tasks: list[asyncio.Task[AudioAsset]] = []
|
|
for idx, text in enumerate(narrations, start=1):
|
|
out_path = out_dir / f"scene_{idx:02d}.mp3"
|
|
tasks.append(asyncio.create_task(synthesize_one(text, out_path, voice, rate, volume)))
|
|
return await asyncio.gather(*tasks)
|