fix: 优化后端数据
This commit is contained in:
@@ -333,7 +333,7 @@ def _extract_and_merge_panel_data(items: list) -> None:
|
||||
from datetime import timezone
|
||||
merged_any = False
|
||||
# 规则模式可多处理几条(无 Ollama);AI 模式限制 5 条避免调用过多
|
||||
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
|
||||
limit = 25 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 10
|
||||
for it in items[:limit]:
|
||||
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
|
||||
if len(text.strip()) < 20:
|
||||
@@ -383,6 +383,40 @@ async def _periodic_fetch() -> None:
|
||||
# ==========================
|
||||
# API 接口
|
||||
# ==========================
|
||||
@app.post("/crawler/backfill")
|
||||
def crawler_backfill():
|
||||
"""从 situation_update 重新解析并合并战损/报复等数据,用于修复历史数据未提取的情况"""
|
||||
if not os.path.exists(DB_PATH):
|
||||
return {"ok": False, "error": "db not found"}
|
||||
try:
|
||||
from db_merge import merge
|
||||
if os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
|
||||
from extractor_rules import extract_from_news
|
||||
else:
|
||||
from extractor_ai import extract_from_news
|
||||
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||
rows = conn.execute(
|
||||
"SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
merged = 0
|
||||
for r in rows:
|
||||
uid, ts, cat, summary = r
|
||||
text = ((cat or "") + " " + (summary or "")).strip()
|
||||
if len(text) < 20:
|
||||
continue
|
||||
try:
|
||||
extracted = extract_from_news(text, timestamp=ts)
|
||||
if extracted and merge(extracted, db_path=DB_PATH):
|
||||
merged += 1
|
||||
except Exception:
|
||||
pass
|
||||
_notify_node()
|
||||
return {"ok": True, "processed": len(rows), "merged": merged}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
|
||||
@app.get("/crawler/status")
|
||||
def crawler_status():
|
||||
"""爬虫状态:用于排查数据更新链路"""
|
||||
|
||||
Reference in New Issue
Block a user