fix: 优化后端数据
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -25,7 +25,24 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
|
||||
loss_us, loss_ir = {}, {}
|
||||
|
||||
# 美军人员伤亡
|
||||
# 美军人员伤亡(中文,优先匹配)
|
||||
v = _first_int(t, r"造成\s*(\d+)\s*名?\s*美军\s*伤亡")
|
||||
if v is not None:
|
||||
loss_us["personnel_killed"] = v
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*美军\s*伤亡") if loss_us.get("personnel_killed") is None else None
|
||||
if v is not None:
|
||||
loss_us["personnel_killed"] = v
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国军队|美国)\s*(?:死亡|阵亡)")
|
||||
if v is not None:
|
||||
loss_us["personnel_killed"] = v
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国)\s*受伤")
|
||||
if v is not None:
|
||||
loss_us["personnel_wounded"] = v
|
||||
v = _first_int(t, r"美军\s*伤亡\s*(\d+)")
|
||||
if v is not None and loss_us.get("personnel_killed") is None:
|
||||
loss_us["personnel_killed"] = v
|
||||
|
||||
# 美军人员伤亡(英文)
|
||||
v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
|
||||
if v is not None:
|
||||
loss_us["personnel_killed"] = v
|
||||
@@ -36,7 +53,18 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
if v is not None:
|
||||
loss_us["personnel_wounded"] = v
|
||||
|
||||
# 伊朗人员伤亡
|
||||
# 伊朗人员伤亡(中文)
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*伤亡")
|
||||
if v is not None:
|
||||
loss_ir["personnel_killed"] = v
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*(?:伊朗|伊朗军队)\s*(?:死亡|阵亡)")
|
||||
if v is not None:
|
||||
loss_ir["personnel_killed"] = v
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*受伤")
|
||||
if v is not None:
|
||||
loss_ir["personnel_wounded"] = v
|
||||
|
||||
# 伊朗人员伤亡(英文)
|
||||
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|guard|killed|dead)")
|
||||
if v is not None:
|
||||
loss_ir["personnel_killed"] = v
|
||||
@@ -47,8 +75,11 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
if v is not None:
|
||||
loss_ir["personnel_wounded"] = v
|
||||
|
||||
# 平民伤亡(多不区分阵营,计入双方或仅 us 因多为美国基地周边)
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)")
|
||||
# 平民伤亡(中英文)
|
||||
v = _first_int(t, r"(\d+)\s*名?\s*平民\s*(?:伤亡|死亡)")
|
||||
if v is not None:
|
||||
loss_us["civilian_killed"] = v
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)") if loss_us.get("civilian_killed") is None else None
|
||||
if v is not None:
|
||||
loss_us["civilian_killed"] = v
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:wounded|injured)")
|
||||
@@ -87,7 +118,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
out.setdefault("combat_losses_delta", {})["us"] = loss_us
|
||||
if loss_ir:
|
||||
out.setdefault("combat_losses_delta", {})["iran"] = loss_ir
|
||||
if "retaliat" in t or "revenge" in t or "报复" in t:
|
||||
if "retaliat" in t or "revenge" in t or "报复" in t or "反击" in t:
|
||||
out["retaliation"] = {"value": 75, "time": ts}
|
||||
if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t:
|
||||
out["wall_street"] = {"time": ts, "value": 55}
|
||||
@@ -98,7 +129,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
if base_attacked:
|
||||
updates: list = []
|
||||
# 常见美军基地关键词 -> name_keywords(用于 db_merge 的 LIKE 匹配)
|
||||
bases_us = [
|
||||
bases_all = [
|
||||
("阿萨德|阿因|asad|assad|ain", "us"),
|
||||
("巴格达|baghdad", "us"),
|
||||
("乌代德|udeid|卡塔尔|qatar", "us"),
|
||||
@@ -113,8 +144,19 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
("赛利耶|sayliyah", "us"),
|
||||
("巴林|bahrain", "us"),
|
||||
("科威特|kuwait", "us"),
|
||||
# 伊朗基地
|
||||
("阿巴斯港|abbas|bandar abbas", "iran"),
|
||||
("德黑兰|tehran", "iran"),
|
||||
("布什尔|bushehr", "iran"),
|
||||
("伊斯法罕|isfahan|esfahan", "iran"),
|
||||
("纳坦兹|natanz", "iran"),
|
||||
("米纳布|minab", "iran"),
|
||||
("卡拉季|karaj", "iran"),
|
||||
("克尔曼沙赫|kermanshah", "iran"),
|
||||
("大不里士|tabriz", "iran"),
|
||||
("霍尔木兹|hormuz", "iran"),
|
||||
]
|
||||
for kws, side in bases_us:
|
||||
for kws, side in bases_all:
|
||||
if any(k in t for k in kws.split("|")):
|
||||
updates.append({"name_keywords": kws, "side": side, "status": "attacked", "damage_level": 2})
|
||||
if updates:
|
||||
|
||||
@@ -333,7 +333,7 @@ def _extract_and_merge_panel_data(items: list) -> None:
|
||||
from datetime import timezone
|
||||
merged_any = False
|
||||
# 规则模式可多处理几条(无 Ollama);AI 模式限制 5 条避免调用过多
|
||||
limit = 10 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 5
|
||||
limit = 25 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 10
|
||||
for it in items[:limit]:
|
||||
text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "")
|
||||
if len(text.strip()) < 20:
|
||||
@@ -383,6 +383,40 @@ async def _periodic_fetch() -> None:
|
||||
# ==========================
|
||||
# API 接口
|
||||
# ==========================
|
||||
@app.post("/crawler/backfill")
|
||||
def crawler_backfill():
|
||||
"""从 situation_update 重新解析并合并战损/报复等数据,用于修复历史数据未提取的情况"""
|
||||
if not os.path.exists(DB_PATH):
|
||||
return {"ok": False, "error": "db not found"}
|
||||
try:
|
||||
from db_merge import merge
|
||||
if os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
|
||||
from extractor_rules import extract_from_news
|
||||
else:
|
||||
from extractor_ai import extract_from_news
|
||||
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||
rows = conn.execute(
|
||||
"SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
merged = 0
|
||||
for r in rows:
|
||||
uid, ts, cat, summary = r
|
||||
text = ((cat or "") + " " + (summary or "")).strip()
|
||||
if len(text) < 20:
|
||||
continue
|
||||
try:
|
||||
extracted = extract_from_news(text, timestamp=ts)
|
||||
if extracted and merge(extracted, db_path=DB_PATH):
|
||||
merged += 1
|
||||
except Exception:
|
||||
pass
|
||||
_notify_node()
|
||||
return {"ok": True, "processed": len(rows), "merged": merged}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
|
||||
@app.get("/crawler/status")
|
||||
def crawler_status():
|
||||
"""爬虫状态:用于排查数据更新链路"""
|
||||
|
||||
Reference in New Issue
Block a user