fix:优化数据来源

This commit is contained in:
Daniel
2026-03-02 01:00:04 +08:00
parent 91d9e48e1e
commit 4a8fff5a00
26 changed files with 1361 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
"""英译中,入库前统一翻译"""
import re
from typing import Optional
def _is_mostly_chinese(text: str) -> bool:
if not text or len(text.strip()) < 2:
return False
chinese = len(re.findall(r"[\u4e00-\u9fff]", text))
return chinese / max(len(text), 1) > 0.3
def translate_to_chinese(text: str) -> str:
"""将文本翻译成中文,失败或已是中文则返回原文。"""
if not text or not text.strip():
return text
s = str(text).strip()
if len(s) > 2000:
s = s[:2000]
if _is_mostly_chinese(s):
return text
try:
from deep_translator import GoogleTranslator
out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
return out if out else text
except Exception:
return text