fix: 优化虫 机制,新增伊朗支援

This commit is contained in:
Daniel
2026-03-06 10:34:52 +08:00
parent 89145a6743
commit 9f2442f2e3
20 changed files with 411 additions and 62 deletions

View File

@@ -1,11 +1,24 @@
# -*- coding: utf-8 -*-
"""
基于规则的新闻数据提取(无需 Ollama
从新闻文本中提取战损、报复情绪等数值,供 db_merge 写入
从新闻文本中提取战损、报复情绪、攻击地点与盟军打击线,供 db_merge 写入
"""
import re
from datetime import datetime, timezone
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional, Tuple
# 伊朗境内常见打击目标: (显示名, 经度, 纬度, 匹配关键词)
IRAN_STRIKE_TARGETS: List[Tuple[str, float, float, str]] = [
("纳坦兹", 51.916, 33.666, "natanz|纳坦兹"),
("伊斯法罕", 51.67, 32.65, "isfahan|esfahan|伊斯法罕"),
("德黑兰", 51.389, 35.689, "tehran|德黑兰"),
("布什尔", 50.83, 28.97, "bushehr|布什尔"),
("阿巴斯港", 56.27, 27.18, "bandar abbas|abbas|阿巴斯|霍尔木兹"),
("克尔曼沙赫", 47.06, 34.31, "kermanshah|克尔曼沙赫"),
("大不里士", 46.29, 38.08, "tabriz|大不里士"),
("卡拉季", 50.99, 35.83, "karaj|卡拉季"),
("米纳布", 57.08, 27.13, "minab|米纳布"),
]
def _first_int(text: str, pattern: str) -> Optional[int]:
@@ -251,4 +264,30 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
if updates:
out["key_location_updates"] = updates
# map_strike_lines盟军以色列/林肯/福特)打击伊朗目标,供地图攻击动画更新
strike_verbs = ("strike" in t or "struck" in t or "strikes" in t or "hit" in t or "attack" in t
or "打击" in (text or "") or "空袭" in (text or "") or "袭击" in (text or ""))
if strike_verbs and ("iran" in t or "伊朗" in (text or "") or any(
any(p in t for p in kw.split("|")) for _n, _lng, _lat, kw in IRAN_STRIKE_TARGETS
)):
source_id = "israel"
if "lincoln" in t or "林肯" in (text or ""):
source_id = "lincoln"
elif "ford" in t or "福特" in (text or ""):
source_id = "ford"
elif ("israel" in t or "idf" in t or "以色列" in (text or "")) and ("us " in t or "american" in t or "pentagon" in t):
source_id = "israel" # 多国时优先以色列
lines = []
for name, lng, lat, kw in IRAN_STRIKE_TARGETS:
if any(p in t for p in kw.split("|")):
lines.append({
"source_id": source_id,
"target_lng": lng,
"target_lat": lat,
"target_name": name,
"struck_at": ts,
})
if lines:
out["map_strike_lines"] = lines
return out