diff --git a/README.md b/README.md index d070f40..45e5703 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,12 @@ API 会由 Vite 代理到 `/api`,前端通过 `/api/situation` 获取完整态 3. 查看爬虫状态:`curl http://localhost:8000/crawler/status`(需爬虫服务已启动) 4. 数据库面板 `/db` 每 30 秒自动刷新,可观察 situation_update 条数是否增加 +### 面板数据 / 地图 / 战损不更新时 + +- **确保 API 与爬虫共用同一数据库**:本地开发时,Node 默认用 `server/data.db`,爬虫默认用 `../server/data.db`(同文件)。若 Node 在本地、爬虫在 Docker,则数据库不同,面板不会更新。 +- **Docker 部署**:`GDELT_DISABLED=1` 时,地图冲突点由 RSS 新闻填充;战损与基地状态由规则/AI 提取后写入 `combat_losses` 和 `key_location`。 +- **排查**:访问 `/db` 看 `situation_update`、`gdelt_events`、`combat_losses` 是否在增长;确认 API 已启动且前端能访问 `/api/situation`。 + ## Development ```bash diff --git a/crawler/__pycache__/db_merge.cpython-311.pyc b/crawler/__pycache__/db_merge.cpython-311.pyc new file mode 100644 index 0000000..bb79ac2 Binary files /dev/null and b/crawler/__pycache__/db_merge.cpython-311.pyc differ diff --git a/crawler/__pycache__/extractor_rules.cpython-311.pyc b/crawler/__pycache__/extractor_rules.cpython-311.pyc new file mode 100644 index 0000000..26068eb Binary files /dev/null and b/crawler/__pycache__/extractor_rules.cpython-311.pyc differ diff --git a/crawler/__pycache__/extractor_rules.cpython-39.pyc b/crawler/__pycache__/extractor_rules.cpython-39.pyc index b3358bb..52ddb93 100644 Binary files a/crawler/__pycache__/extractor_rules.cpython-39.pyc and b/crawler/__pycache__/extractor_rules.cpython-39.pyc differ diff --git a/crawler/__pycache__/realtime_conflict_service.cpython-311.pyc b/crawler/__pycache__/realtime_conflict_service.cpython-311.pyc index ef0d3a0..39a8ba6 100644 Binary files a/crawler/__pycache__/realtime_conflict_service.cpython-311.pyc and b/crawler/__pycache__/realtime_conflict_service.cpython-311.pyc differ diff --git a/crawler/extractor_rules.py b/crawler/extractor_rules.py index 89a1ee4..b9f596a 100644 --- a/crawler/extractor_rules.py +++ b/crawler/extractor_rules.py @@ -55,16 +55,16 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A if v is not None: loss_us["civilian_wounded"] = v - # 基地损毁(美方基地居多) - v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:destroyed|leveled)") + # 基地损毁(美方基地居多)+ 中文 + v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)") if v is not None: loss_us["bases_destroyed"] = v - v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:damaged|hit|struck)") + v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)") if v is not None: loss_us["bases_damaged"] = v - if "base" in t and ("destroy" in t or "level" in t) and not loss_us.get("bases_destroyed"): + if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"): loss_us["bases_destroyed"] = 1 - if "base" in t and ("damage" in t or "hit" in t or "struck" in t or "strike" in t) and not loss_us.get("bases_damaged"): + if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"): loss_us["bases_damaged"] = 1 # 战机 / 舰船(根据上下文判断阵营) @@ -92,4 +92,32 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t: out["wall_street"] = {"time": ts, "value": 55} + # key_location_updates:受袭基地(与 key_location.name 匹配) + # 新闻提及基地遭袭时,更新对应基地 status + base_attacked = ("base" in t or "基地" in t) and ("attack" in t or "hit" in t or "strike" in t or "damage" in t or "袭击" in t or "打击" in t) + if base_attacked: + updates: list = [] + # 常见美军基地关键词 -> name_keywords(用于 db_merge 的 LIKE 匹配) + bases_us = [ + ("阿萨德|阿因|asad|assad|ain", "us"), + ("巴格达|baghdad", "us"), + ("乌代德|udeid|卡塔尔|qatar", "us"), + ("阿克罗蒂里|akrotiri|塞浦路斯|cyprus", "us"), + ("巴格拉姆|bagram|阿富汗|afghanistan", "us"), + ("埃尔比勒|erbil", "us"), + ("因吉尔利克|incirlik|土耳其|turkey", "us"), + ("苏尔坦|sultan|沙特|saudi", "us"), + ("坦夫|tanf|叙利亚|syria", "us"), + ("达夫拉|dhafra|阿联酋|uae", "us"), + ("内瓦提姆|nevatim|拉蒙|ramon|以色列|israel", "us"), + ("赛利耶|sayliyah", "us"), + ("巴林|bahrain", "us"), + ("科威特|kuwait", "us"), + ] + for kws, side in bases_us: + if any(k in t for k in kws.split("|")): + updates.append({"name_keywords": kws, "side": side, "status": "attacked", "damage_level": 2}) + if updates: + out["key_location_updates"] = updates + return out diff --git a/crawler/realtime_conflict_service.py b/crawler/realtime_conflict_service.py index bae1de7..93f90f0 100644 --- a/crawler/realtime_conflict_service.py +++ b/crawler/realtime_conflict_service.py @@ -54,23 +54,56 @@ EVENT_CACHE: List[dict] = [] def calculate_impact_score(title: str) -> int: score = 1 t = (title or "").lower() - if "missile" in t: + if "missile" in t or "导弹" in t: score += 3 - if "strike" in t: + if "strike" in t or "袭击" in t or "打击" in t: score += 2 - if "killed" in t or "death" in t or "casualt" in t: + if "killed" in t or "death" in t or "casualt" in t or "死亡" in t or "伤亡" in t: score += 4 - if "troops" in t or "soldier" in t: + if "troops" in t or "soldier" in t or "士兵" in t or "军人" in t: score += 2 - if "attack" in t or "attacked" in t: + if "attack" in t or "attacked" in t or "攻击" in t: score += 3 if "nuclear" in t or "核" in t: score += 4 - if "explosion" in t or "blast" in t or "bomb" in t: + if "explosion" in t or "blast" in t or "bomb" in t or "爆炸" in t: score += 2 return min(score, 10) +# 根据 severity 映射到 impact_score +def _severity_to_score(sev: str) -> int: + m = {"critical": 9, "high": 7, "medium": 5, "low": 2} + return m.get((sev or "").lower(), 5) + + +# 根据文本推断坐标 [lng, lat],用于 GDELT 禁用时 RSS→gdelt_events +_LOC_COORDS = [ + (["阿克罗蒂里", "akrotiri", "塞浦路斯", "cyprus"], (32.98, 34.58)), + (["巴格拉姆", "bagram", "阿富汗", "afghanistan"], (69.26, 34.95)), + (["巴格达", "baghdad", "伊拉克", "iraq"], (44.37, 33.31)), + (["贝鲁特", "beirut", "黎巴嫩", "lebanon"], (35.49, 33.89)), + (["耶路撒冷", "jerusalem", "特拉维夫", "tel aviv", "以色列", "israel"], (35.21, 31.77)), + (["阿巴斯港", "bandar abbas", "霍尔木兹", "hormuz"], (56.27, 27.18)), + (["米纳布", "minab"], (57.08, 27.13)), + (["德黑兰", "tehran", "伊朗", "iran"], (51.389, 35.689)), + (["大马士革", "damascus", "叙利亚", "syria"], (36.28, 33.50)), + (["迪拜", "dubai", "阿联酋", "uae"], (55.27, 25.20)), + (["沙特", "saudi"], (46.73, 24.71)), + (["巴基斯坦", "pakistan"], (73.06, 33.72)), + (["奥斯汀", "austin"], (-97.74, 30.27)), +] + + +def _infer_coords(text: str) -> tuple: + t = (text or "").lower() + for kws, (lng, lat) in _LOC_COORDS: + for k in kws: + if k in t: + return (lng, lat) + return (IRAN_COORD[0], IRAN_COORD[1]) + + # ========================== # 获取 GDELT 实时事件 # ========================== @@ -216,6 +249,39 @@ def _notify_node() -> None: print(f" [warn] notify API: {e}") +def _rss_to_gdelt_fallback() -> None: + """GDELT 禁用时,将 situation_update 同步到 gdelt_events,使地图有冲突点""" + if not GDELT_DISABLED or not os.path.exists(DB_PATH): + return + try: + conn = sqlite3.connect(DB_PATH, timeout=10) + rows = conn.execute( + "SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 50" + ).fetchall() + conn.close() + events = [] + for r in rows: + uid, ts, cat, summary, sev = r + lng, lat = _infer_coords((summary or "")[:300]) + impact = _severity_to_score(sev) + events.append({ + "event_id": f"rss_{uid}", + "event_time": ts, + "title": (summary or "")[:500], + "lat": lat, + "lng": lng, + "impact_score": impact, + "url": "", + }) + if events: + global EVENT_CACHE + EVENT_CACHE = events + _write_to_db(events) + _notify_node() + except Exception as e: + print(f" [warn] RSS→gdelt fallback: {e}") + + # ========================== # RSS 新闻抓取(补充 situation_update + AI 提取面板数据) # ========================== @@ -243,6 +309,9 @@ def fetch_news() -> None: LAST_FETCH["inserted"] = n if items: _extract_and_merge_panel_data(items) + # GDELT 禁用时用 RSS 填充 gdelt_events,使地图有冲突点 + if GDELT_DISABLED: + _rss_to_gdelt_fallback() # 每次抓取完成都通知 Node 更新时间戳,便于「实时更新」显示 _notify_node() print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,新增入库 {n} 条") diff --git a/server/data.db b/server/data.db index ee619b0..0ecd106 100644 Binary files a/server/data.db and b/server/data.db differ diff --git a/server/data.db-shm b/server/data.db-shm deleted file mode 100644 index db40834..0000000 Binary files a/server/data.db-shm and /dev/null differ diff --git a/server/data.db-wal b/server/data.db-wal deleted file mode 100644 index 1bd86b3..0000000 Binary files a/server/data.db-wal and /dev/null differ