fix: 优化后端数据

This commit is contained in:
Daniel
2026-03-02 16:29:11 +08:00
parent 81628a136a
commit a9caf6e7c0
18 changed files with 295 additions and 15 deletions

View File

@@ -333,7 +333,7 @@ def _extract_and_merge_panel_data(items: list) -> None:
from datetime import timezone
merged_any = False
# 规则模式可多处理几条(无 OllamaAI 模式限制 5 条避免调用过多
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
limit = 25 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 10
for it in items[:limit]:
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
if len(text.strip()) < 20:
@@ -383,6 +383,40 @@ async def _periodic_fetch() -> None:
# ==========================
# API 接口
# ==========================
@app.post("/crawler/backfill")
def crawler_backfill():
"""从 situation_update 重新解析并合并战损/报复等数据,用于修复历史数据未提取的情况"""
if not os.path.exists(DB_PATH):
return {"ok": False, "error": "db not found"}
try:
from db_merge import merge
if os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
from extractor_rules import extract_from_news
else:
from extractor_ai import extract_from_news
conn = sqlite3.connect(DB_PATH, timeout=10)
rows = conn.execute(
"SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
).fetchall()
conn.close()
merged = 0
for r in rows:
uid, ts, cat, summary = r
text = ((cat or "") + " " + (summary or "")).strip()
if len(text) < 20:
continue
try:
extracted = extract_from_news(text, timestamp=ts)
if extracted and merge(extracted, db_path=DB_PATH):
merged += 1
except Exception:
pass
_notify_node()
return {"ok": True, "processed": len(rows), "merged": merged}
except Exception as e:
return {"ok": False, "error": str(e)}
@app.get("/crawler/status")
def crawler_status():
"""爬虫状态:用于排查数据更新链路"""