fix: 优化数据

This commit is contained in:
Daniel
2026-03-02 11:28:13 +08:00
parent 4a8fff5a00
commit 004d10b283
39 changed files with 1106 additions and 56 deletions

View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
"""英译中,入库前统一翻译"""
import os
import re
from typing import Optional
@@ -12,17 +13,26 @@ def _is_mostly_chinese(text: str) -> bool:
def translate_to_chinese(text: str) -> str:
"""将文本翻译成中文,失败或已是中文则返回原文。"""
"""将文本翻译成中文,失败或已是中文则返回原文。Google 失败时尝试 MyMemory。"""
if not text or not text.strip():
return text
if os.environ.get("TRANSLATE_DISABLED", "0") == "1":
return text
s = str(text).strip()
if len(s) > 2000:
s = s[:2000]
if _is_mostly_chinese(s):
return text
try:
from deep_translator import GoogleTranslator
out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
return out if out else text
except Exception:
return text
for translator in ["google", "mymemory"]:
try:
if translator == "google":
from deep_translator import GoogleTranslator
out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
else:
from deep_translator import MyMemoryTranslator
out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s)
if out and out.strip() and out != s:
return out
except Exception:
continue
return text