fix: 优化后台数据
This commit is contained in:
BIN
crawler/__pycache__/db_merge.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/db_merge.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/extractor_rules.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/extractor_rules.cpython-311.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -55,16 +55,16 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
if v is not None:
|
||||
loss_us["civilian_wounded"] = v
|
||||
|
||||
# 基地损毁(美方基地居多)
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:destroyed|leveled)")
|
||||
# 基地损毁(美方基地居多)+ 中文
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)")
|
||||
if v is not None:
|
||||
loss_us["bases_destroyed"] = v
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:base)[\s\w]*(?:damaged|hit|struck)")
|
||||
v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)")
|
||||
if v is not None:
|
||||
loss_us["bases_damaged"] = v
|
||||
if "base" in t and ("destroy" in t or "level" in t) and not loss_us.get("bases_destroyed"):
|
||||
if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"):
|
||||
loss_us["bases_destroyed"] = 1
|
||||
if "base" in t and ("damage" in t or "hit" in t or "struck" in t or "strike" in t) and not loss_us.get("bases_damaged"):
|
||||
if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"):
|
||||
loss_us["bases_damaged"] = 1
|
||||
|
||||
# 战机 / 舰船(根据上下文判断阵营)
|
||||
@@ -92,4 +92,32 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
||||
if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t:
|
||||
out["wall_street"] = {"time": ts, "value": 55}
|
||||
|
||||
# key_location_updates:受袭基地(与 key_location.name 匹配)
|
||||
# 新闻提及基地遭袭时,更新对应基地 status
|
||||
base_attacked = ("base" in t or "基地" in t) and ("attack" in t or "hit" in t or "strike" in t or "damage" in t or "袭击" in t or "打击" in t)
|
||||
if base_attacked:
|
||||
updates: list = []
|
||||
# 常见美军基地关键词 -> name_keywords(用于 db_merge 的 LIKE 匹配)
|
||||
bases_us = [
|
||||
("阿萨德|阿因|asad|assad|ain", "us"),
|
||||
("巴格达|baghdad", "us"),
|
||||
("乌代德|udeid|卡塔尔|qatar", "us"),
|
||||
("阿克罗蒂里|akrotiri|塞浦路斯|cyprus", "us"),
|
||||
("巴格拉姆|bagram|阿富汗|afghanistan", "us"),
|
||||
("埃尔比勒|erbil", "us"),
|
||||
("因吉尔利克|incirlik|土耳其|turkey", "us"),
|
||||
("苏尔坦|sultan|沙特|saudi", "us"),
|
||||
("坦夫|tanf|叙利亚|syria", "us"),
|
||||
("达夫拉|dhafra|阿联酋|uae", "us"),
|
||||
("内瓦提姆|nevatim|拉蒙|ramon|以色列|israel", "us"),
|
||||
("赛利耶|sayliyah", "us"),
|
||||
("巴林|bahrain", "us"),
|
||||
("科威特|kuwait", "us"),
|
||||
]
|
||||
for kws, side in bases_us:
|
||||
if any(k in t for k in kws.split("|")):
|
||||
updates.append({"name_keywords": kws, "side": side, "status": "attacked", "damage_level": 2})
|
||||
if updates:
|
||||
out["key_location_updates"] = updates
|
||||
|
||||
return out
|
||||
|
||||
@@ -54,23 +54,56 @@ EVENT_CACHE: List[dict] = []
|
||||
def calculate_impact_score(title: str) -> int:
|
||||
score = 1
|
||||
t = (title or "").lower()
|
||||
if "missile" in t:
|
||||
if "missile" in t or "导弹" in t:
|
||||
score += 3
|
||||
if "strike" in t:
|
||||
if "strike" in t or "袭击" in t or "打击" in t:
|
||||
score += 2
|
||||
if "killed" in t or "death" in t or "casualt" in t:
|
||||
if "killed" in t or "death" in t or "casualt" in t or "死亡" in t or "伤亡" in t:
|
||||
score += 4
|
||||
if "troops" in t or "soldier" in t:
|
||||
if "troops" in t or "soldier" in t or "士兵" in t or "军人" in t:
|
||||
score += 2
|
||||
if "attack" in t or "attacked" in t:
|
||||
if "attack" in t or "attacked" in t or "攻击" in t:
|
||||
score += 3
|
||||
if "nuclear" in t or "核" in t:
|
||||
score += 4
|
||||
if "explosion" in t or "blast" in t or "bomb" in t:
|
||||
if "explosion" in t or "blast" in t or "bomb" in t or "爆炸" in t:
|
||||
score += 2
|
||||
return min(score, 10)
|
||||
|
||||
|
||||
# 根据 severity 映射到 impact_score
|
||||
def _severity_to_score(sev: str) -> int:
|
||||
m = {"critical": 9, "high": 7, "medium": 5, "low": 2}
|
||||
return m.get((sev or "").lower(), 5)
|
||||
|
||||
|
||||
# 根据文本推断坐标 [lng, lat],用于 GDELT 禁用时 RSS→gdelt_events
|
||||
_LOC_COORDS = [
|
||||
(["阿克罗蒂里", "akrotiri", "塞浦路斯", "cyprus"], (32.98, 34.58)),
|
||||
(["巴格拉姆", "bagram", "阿富汗", "afghanistan"], (69.26, 34.95)),
|
||||
(["巴格达", "baghdad", "伊拉克", "iraq"], (44.37, 33.31)),
|
||||
(["贝鲁特", "beirut", "黎巴嫩", "lebanon"], (35.49, 33.89)),
|
||||
(["耶路撒冷", "jerusalem", "特拉维夫", "tel aviv", "以色列", "israel"], (35.21, 31.77)),
|
||||
(["阿巴斯港", "bandar abbas", "霍尔木兹", "hormuz"], (56.27, 27.18)),
|
||||
(["米纳布", "minab"], (57.08, 27.13)),
|
||||
(["德黑兰", "tehran", "伊朗", "iran"], (51.389, 35.689)),
|
||||
(["大马士革", "damascus", "叙利亚", "syria"], (36.28, 33.50)),
|
||||
(["迪拜", "dubai", "阿联酋", "uae"], (55.27, 25.20)),
|
||||
(["沙特", "saudi"], (46.73, 24.71)),
|
||||
(["巴基斯坦", "pakistan"], (73.06, 33.72)),
|
||||
(["奥斯汀", "austin"], (-97.74, 30.27)),
|
||||
]
|
||||
|
||||
|
||||
def _infer_coords(text: str) -> tuple:
|
||||
t = (text or "").lower()
|
||||
for kws, (lng, lat) in _LOC_COORDS:
|
||||
for k in kws:
|
||||
if k in t:
|
||||
return (lng, lat)
|
||||
return (IRAN_COORD[0], IRAN_COORD[1])
|
||||
|
||||
|
||||
# ==========================
|
||||
# 获取 GDELT 实时事件
|
||||
# ==========================
|
||||
@@ -216,6 +249,39 @@ def _notify_node() -> None:
|
||||
print(f" [warn] notify API: {e}")
|
||||
|
||||
|
||||
def _rss_to_gdelt_fallback() -> None:
|
||||
"""GDELT 禁用时,将 situation_update 同步到 gdelt_events,使地图有冲突点"""
|
||||
if not GDELT_DISABLED or not os.path.exists(DB_PATH):
|
||||
return
|
||||
try:
|
||||
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||
rows = conn.execute(
|
||||
"SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 50"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
events = []
|
||||
for r in rows:
|
||||
uid, ts, cat, summary, sev = r
|
||||
lng, lat = _infer_coords((summary or "")[:300])
|
||||
impact = _severity_to_score(sev)
|
||||
events.append({
|
||||
"event_id": f"rss_{uid}",
|
||||
"event_time": ts,
|
||||
"title": (summary or "")[:500],
|
||||
"lat": lat,
|
||||
"lng": lng,
|
||||
"impact_score": impact,
|
||||
"url": "",
|
||||
})
|
||||
if events:
|
||||
global EVENT_CACHE
|
||||
EVENT_CACHE = events
|
||||
_write_to_db(events)
|
||||
_notify_node()
|
||||
except Exception as e:
|
||||
print(f" [warn] RSS→gdelt fallback: {e}")
|
||||
|
||||
|
||||
# ==========================
|
||||
# RSS 新闻抓取(补充 situation_update + AI 提取面板数据)
|
||||
# ==========================
|
||||
@@ -243,6 +309,9 @@ def fetch_news() -> None:
|
||||
LAST_FETCH["inserted"] = n
|
||||
if items:
|
||||
_extract_and_merge_panel_data(items)
|
||||
# GDELT 禁用时用 RSS 填充 gdelt_events,使地图有冲突点
|
||||
if GDELT_DISABLED:
|
||||
_rss_to_gdelt_fallback()
|
||||
# 每次抓取完成都通知 Node 更新时间戳,便于「实时更新」显示
|
||||
_notify_node()
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,新增入库 {n} 条")
|
||||
|
||||
Reference in New Issue
Block a user