fix: 修复移动端报错
This commit is contained in:
@@ -41,7 +41,7 @@ def _ensure_tables(conn: sqlite3.Connection) -> None:
|
|||||||
conn.execute("ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime('now'))")
|
conn.execute("ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime('now'))")
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
pass
|
pass
|
||||||
for col in ("drones", "missiles", "helicopters", "submarines"):
|
for col in ("drones", "missiles", "helicopters", "submarines", "tanks", "civilian_ships", "airport_port"):
|
||||||
try:
|
try:
|
||||||
conn.execute(f"ALTER TABLE combat_losses ADD COLUMN {col} INTEGER NOT NULL DEFAULT 0")
|
conn.execute(f"ALTER TABLE combat_losses ADD COLUMN {col} INTEGER NOT NULL DEFAULT 0")
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
@@ -79,12 +79,12 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
"SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles,drones,missiles,helicopters,submarines FROM combat_losses WHERE side = ?",
|
"SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles,drones,missiles,helicopters,submarines,tanks,civilian_ships,airport_port FROM combat_losses WHERE side = ?",
|
||||||
(side,),
|
(side,),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
cur = {"personnel_killed": 0, "personnel_wounded": 0, "civilian_killed": 0, "civilian_wounded": 0,
|
cur = {"personnel_killed": 0, "personnel_wounded": 0, "civilian_killed": 0, "civilian_wounded": 0,
|
||||||
"bases_destroyed": 0, "bases_damaged": 0, "aircraft": 0, "warships": 0, "armor": 0, "vehicles": 0,
|
"bases_destroyed": 0, "bases_damaged": 0, "aircraft": 0, "warships": 0, "armor": 0, "vehicles": 0,
|
||||||
"drones": 0, "missiles": 0, "helicopters": 0, "submarines": 0}
|
"drones": 0, "missiles": 0, "helicopters": 0, "submarines": 0, "tanks": 0, "civilian_ships": 0, "airport_port": 0}
|
||||||
if row:
|
if row:
|
||||||
cur = {
|
cur = {
|
||||||
"personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
|
"personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
|
||||||
@@ -92,6 +92,7 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
|
|||||||
"aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
|
"aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
|
||||||
"drones": row[10] if len(row) > 10 else 0, "missiles": row[11] if len(row) > 11 else 0,
|
"drones": row[10] if len(row) > 10 else 0, "missiles": row[11] if len(row) > 11 else 0,
|
||||||
"helicopters": row[12] if len(row) > 12 else 0, "submarines": row[13] if len(row) > 13 else 0,
|
"helicopters": row[12] if len(row) > 12 else 0, "submarines": row[13] if len(row) > 13 else 0,
|
||||||
|
"tanks": row[14] if len(row) > 14 else 0, "civilian_ships": row[15] if len(row) > 15 else 0, "airport_port": row[16] if len(row) > 16 else 0,
|
||||||
}
|
}
|
||||||
pk = max(0, (cur["personnel_killed"] or 0) + delta.get("personnel_killed", 0))
|
pk = max(0, (cur["personnel_killed"] or 0) + delta.get("personnel_killed", 0))
|
||||||
pw = max(0, (cur["personnel_wounded"] or 0) + delta.get("personnel_wounded", 0))
|
pw = max(0, (cur["personnel_wounded"] or 0) + delta.get("personnel_wounded", 0))
|
||||||
@@ -107,19 +108,22 @@ def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
|
|||||||
ms = max(0, (cur["missiles"] or 0) + delta.get("missiles", 0))
|
ms = max(0, (cur["missiles"] or 0) + delta.get("missiles", 0))
|
||||||
hp = max(0, (cur["helicopters"] or 0) + delta.get("helicopters", 0))
|
hp = max(0, (cur["helicopters"] or 0) + delta.get("helicopters", 0))
|
||||||
sb = max(0, (cur["submarines"] or 0) + delta.get("submarines", 0))
|
sb = max(0, (cur["submarines"] or 0) + delta.get("submarines", 0))
|
||||||
|
tk = max(0, (cur["tanks"] or 0) + delta.get("tanks", 0))
|
||||||
|
cs = max(0, (cur["civilian_ships"] or 0) + delta.get("civilian_ships", 0))
|
||||||
|
ap = max(0, (cur["airport_port"] or 0) + delta.get("airport_port", 0))
|
||||||
ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
if row:
|
if row:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
|
"""UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
|
||||||
bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?,
|
bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?,
|
||||||
drones=?, missiles=?, helicopters=?, submarines=?, updated_at=? WHERE side=?""",
|
drones=?, missiles=?, helicopters=?, submarines=?, tanks=?, civilian_ships=?, airport_port=?, updated_at=? WHERE side=?""",
|
||||||
(pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, ts, side),
|
(pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, tk, cs, ap, ts, side),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""INSERT OR REPLACE INTO combat_losses (side, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
|
"""INSERT OR REPLACE INTO combat_losses (side, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
|
||||||
bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, tanks, civilian_ships, airport_port, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
(side, pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, ts),
|
(side, pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, tk, cs, ap, ts),
|
||||||
)
|
)
|
||||||
if conn.total_changes > 0:
|
if conn.total_changes > 0:
|
||||||
updated = True
|
updated = True
|
||||||
|
|||||||
@@ -33,7 +33,8 @@ def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any
|
|||||||
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
|
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
|
||||||
重要:bases_* 仅指已确认损毁/受损的基地数量;"军事目标"/targets 等泛指不是基地,若报道只说"X个军事目标遭袭"而无具体基地名,不填写 bases_*
|
重要:bases_* 仅指已确认损毁/受损的基地数量;"军事目标"/targets 等泛指不是基地,若报道只说"X个军事目标遭袭"而无具体基地名,不填写 bases_*
|
||||||
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
||||||
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines
|
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines,
|
||||||
|
us_tanks, iran_tanks, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||||
- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复情绪时
|
- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复情绪时
|
||||||
- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时
|
- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时
|
||||||
- key_location_updates: 当新闻提及具体基地/地点遭袭时,数组项 { "name_keywords": "asad|阿萨德|assad", "side": "us", "status": "attacked", "damage_level": 1-3 }
|
- key_location_updates: 当新闻提及具体基地/地点遭袭时,数组项 { "name_keywords": "asad|阿萨德|assad", "side": "us", "status": "attacked", "damage_level": 1-3 }
|
||||||
@@ -81,7 +82,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
|||||||
# combat_losses 增量(仅数字字段)
|
# combat_losses 增量(仅数字字段)
|
||||||
loss_us = {}
|
loss_us = {}
|
||||||
loss_ir = {}
|
loss_ir = {}
|
||||||
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines"]:
|
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "tanks", "civilian_ships", "airport_port"]:
|
||||||
uk = f"us_{k}"
|
uk = f"us_{k}"
|
||||||
ik = f"iran_{k}"
|
ik = f"iran_{k}"
|
||||||
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ def _call_dashscope_extract(text: str, timeout: int = 15) -> Optional[Dict[str,
|
|||||||
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
|
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged.
|
||||||
重要:bases_* 仅指已确认损毁/受损的基地数量;"军事目标"/"targets"等泛指不是基地,若报道只说"X个军事目标遭袭"而无具体基地名,不填写 bases_*
|
重要:bases_* 仅指已确认损毁/受损的基地数量;"军事目标"/"targets"等泛指不是基地,若报道只说"X个军事目标遭袭"而无具体基地名,不填写 bases_*
|
||||||
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
||||||
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines
|
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines,
|
||||||
|
us_tanks, iran_tanks, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||||
- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复/反击情绪时
|
- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复/反击情绪时
|
||||||
- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时
|
- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时
|
||||||
- key_location_updates: 当新闻提及具体基地/设施遭袭时必填,数组 [{{"name_keywords":"阿萨德|asad|assad|阿因","side":"us","status":"attacked","damage_level":1-3}}]。常用关键词:阿萨德|asad|巴格达|baghdad|乌代德|udeid|埃尔比勒|erbil|因吉尔利克|incirlik|德黑兰|tehran|阿巴斯|abbas|布什尔|bushehr|伊斯法罕|isfahan|纳坦兹|natanz
|
- key_location_updates: 当新闻提及具体基地/设施遭袭时必填,数组 [{{"name_keywords":"阿萨德|asad|assad|阿因","side":"us","status":"attacked","damage_level":1-3}}]。常用关键词:阿萨德|asad|巴格达|baghdad|乌代德|udeid|埃尔比勒|erbil|因吉尔利克|incirlik|德黑兰|tehran|阿巴斯|abbas|布什尔|bushehr|伊斯法罕|isfahan|纳坦兹|natanz
|
||||||
@@ -85,7 +86,7 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
|||||||
loss_ir = {}
|
loss_ir = {}
|
||||||
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded",
|
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded",
|
||||||
"bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles",
|
"bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles",
|
||||||
"drones", "missiles", "helicopters", "submarines"]:
|
"drones", "missiles", "helicopters", "submarines", "tanks", "civilian_ships", "airport_port"]:
|
||||||
uk, ik = f"us_{k}", f"iran_{k}"
|
uk, ik = f"us_{k}", f"iran_{k}"
|
||||||
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
||||||
loss_us[k] = max(0, int(parsed[uk]))
|
loss_us[k] = max(0, int(parsed[uk]))
|
||||||
|
|||||||
@@ -172,6 +172,34 @@ def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, A
|
|||||||
else:
|
else:
|
||||||
loss_us["submarines"] = v
|
loss_us["submarines"] = v
|
||||||
|
|
||||||
|
# 坦克 tank / 坦克
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:tank|坦克)[\s\w]*(?:destroyed|damaged|lost|hit|摧毁|损毁|击毁)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t:
|
||||||
|
loss_ir["tanks"] = v
|
||||||
|
else:
|
||||||
|
loss_us["tanks"] = v
|
||||||
|
|
||||||
|
# 民船 civilian ship / 商船 / 民船
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian\s*ship|merchant|商船|民船)[\s\w]*(?:sunk|damaged|hit|击沉|受损)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(text or t, r"(?:民船|商船|货船)[\s\w]*(\d+)[\s\w]*(?:艘)?[\s\w]*(?:击沉|受损|袭击)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
|
||||||
|
loss_ir["civilian_ships"] = v
|
||||||
|
else:
|
||||||
|
loss_us["civilian_ships"] = v
|
||||||
|
|
||||||
|
# 机/港 airport / port / 机场 / 港口
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:airport|port|机场|港口)[\s\w]*(?:destroyed|damaged|hit|struck|摧毁|受损|袭击)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(text or t, r"(?:机场|港口)[\s\w]*(\d+)[\s\w]*(?:处|个)?[\s\w]*(?:受损|袭击|摧毁)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
|
||||||
|
loss_ir["airport_port"] = v
|
||||||
|
else:
|
||||||
|
loss_us["airport_port"] = v
|
||||||
|
|
||||||
if loss_us:
|
if loss_us:
|
||||||
out.setdefault("combat_losses_delta", {})["us"] = loss_us
|
out.setdefault("combat_losses_delta", {})["us"] = loss_us
|
||||||
if loss_ir:
|
if loss_ir:
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ TimeSeriesPoint = Tuple[str, int] # (ISO time, value)
|
|||||||
# AI 可从新闻中提取的字段
|
# AI 可从新闻中提取的字段
|
||||||
EXTRACTABLE_FIELDS = {
|
EXTRACTABLE_FIELDS = {
|
||||||
"situation_update": ["summary", "category", "severity", "timestamp"],
|
"situation_update": ["summary", "category", "severity", "timestamp"],
|
||||||
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines"],
|
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "tanks", "civilian_ships", "airport_port"],
|
||||||
"retaliation": ["value"], # 0-100
|
"retaliation": ["value"], # 0-100
|
||||||
"wall_street_trend": ["time", "value"], # 0-100
|
"wall_street_trend": ["time", "value"], # 0-100
|
||||||
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
|
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
|
||||||
|
|||||||
137
docs/CRAWLER_LOGIC.md
Normal file
137
docs/CRAWLER_LOGIC.md
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# 爬虫逻辑梳理与数据校验
|
||||||
|
|
||||||
|
## 一、两条入口,数据流不同
|
||||||
|
|
||||||
|
### 1. 入口 A:`npm run crawler`(main.py)
|
||||||
|
|
||||||
|
- **流程**:RSS 抓取 → 关键词过滤 → 分类/严重度 → **直接写 situation_update** → 通知 API
|
||||||
|
- **不经过**:翻译、news_content、AI 提取(战损/基地等)
|
||||||
|
- **写入表**:`situation_update`、`situation.updated_at`
|
||||||
|
- **用途**:轻量、只给「事件脉络」喂新条目,不更新战损/基地/报复指数
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS_FEEDS → fetch_all() → KEYWORDS 过滤 → parser_ai.classify_and_severity
|
||||||
|
→ write_updates(items) → situation_update INSERT + situation 表 touch
|
||||||
|
→ notify_api()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 入口 B:`npm run gdelt`(realtime_conflict_service.py)
|
||||||
|
|
||||||
|
- **流程**:RSS 抓取 → 翻译 → 清洗 → **news_content 去重** → situation_update → **AI 提取 → db_merge** → GDELT 事件(可选)→ 通知 API
|
||||||
|
- **写入表**:`news_content`、`situation_update`、`situation`;提取后还有 `combat_losses`、`key_location`、`retaliation_*`、`wall_street_trend` 等
|
||||||
|
- **用途**:完整管线,前端「战损 / 军事基地 / 报复 / 美股」等数据都依赖这条
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS → fetch_all() → translate_to_chinese → cleaner_ai → save_and_dedup → news_content
|
||||||
|
→ write_updates(new_items) → situation_update
|
||||||
|
→ _extract_and_merge_panel_data(new_items) → extract_from_news() → db_merge.merge()
|
||||||
|
→ (可选) fetch_gdelt_events() → gdelt_events, conflict_stats
|
||||||
|
→ _notify_node()
|
||||||
|
```
|
||||||
|
|
||||||
|
**结论**:要检查「抓回的数据是否有效」且包含战损/基地等,应跑 **入口 B**(gdelt 服务);若只关心事件脉络条数,可看入口 A。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、入口 B 逐步拆解(用于逐段校验)
|
||||||
|
|
||||||
|
### 2.1 RSS 抓取与过滤
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 源列表 | `config.RSS_FEEDS` | 多国媒体 RSS,见 config.py |
|
||||||
|
| 抓取 | `scrapers.rss_scraper.fetch_all()` | feedparser,单源超时 10s |
|
||||||
|
| 过滤 | `_matches_keywords(text)` | 标题+摘要 至少命中 `config.KEYWORDS` 中一个才保留 |
|
||||||
|
| 去重 | `(title[:80], link)` | 同一条不重复加入当次列表 |
|
||||||
|
| 分类 | `parser_ai.classify_and_severity(text)` | 得到 category、severity(Ollama 或规则) |
|
||||||
|
|
||||||
|
**校验**:`npm run crawler:test` 看本次抓到的条数;若为 0,查网络或放宽/检查 KEYWORDS。
|
||||||
|
|
||||||
|
### 2.2 翻译与清洗(仅入口 B)
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 翻译 | `translate_utils.translate_to_chinese()` | 标题/摘要译成中文(依赖配置) |
|
||||||
|
| 清洗 | `cleaner_ai.clean_news_for_panel()` | 截断、清理;`ensure_category` / `ensure_severity` 合法化 |
|
||||||
|
|
||||||
|
### 2.3 落库:news_content(去重)与 situation_update
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 去重 | `news_storage.save_and_dedup(items)` | 按 `content_hash(title, summary, url)` 判重,只插入新记录 |
|
||||||
|
| 表 | `news_content` | id, content_hash, title, summary, url, source, published_at, category, severity |
|
||||||
|
| 表 | `situation_update` | 仅对 **去重后的 new_items** 调用 `write_updates()`,供前端「事件脉络」 |
|
||||||
|
|
||||||
|
**校验**:
|
||||||
|
|
||||||
|
- `news_content`:`SELECT COUNT(*), MAX(published_at) FROM news_content`
|
||||||
|
- `situation_update`:`SELECT COUNT(*), MAX(timestamp) FROM situation_update`
|
||||||
|
- 服务状态:`GET http://localhost:8000/crawler/status` 看 `last_fetch_items` / `last_fetch_inserted` / `last_fetch_error`
|
||||||
|
|
||||||
|
### 2.4 AI 提取与 db_merge(战损 / 基地 / 报复等)
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 输入 | `_extract_and_merge_panel_data(new_items)` | 仅处理本次 **新增** 的 new_items,前 limit 条(DashScope 10 条,规则 25 条,Ollama 10 条) |
|
||||||
|
| 文本 | 每条 `title + " " + summary`,长度 < 20 跳过 |
|
||||||
|
| 提取器选择 | 环境变量 | `DASHSCOPE_API_KEY` → extractor_dashscope;`CLEANER_AI_DISABLED=1` → extractor_rules;否则 extractor_ai(Ollama) |
|
||||||
|
| 输出结构 | 见 panel_schema / 各 extractor | `situation_update?`, `combat_losses_delta?`, `retaliation?`, `wall_street?`, `key_location_updates?` |
|
||||||
|
| 合并 | `db_merge.merge(extracted)` | 见下表 |
|
||||||
|
|
||||||
|
**merge 映射概要**:
|
||||||
|
|
||||||
|
| 提取字段 | 写入表/逻辑 |
|
||||||
|
|----------|-------------|
|
||||||
|
| situation_update | situation_update 表 INSERT(id 为 hash) |
|
||||||
|
| combat_losses_delta | combat_losses 表,按 side 增量叠加 |
|
||||||
|
| retaliation | retaliation_current 替换 + retaliation_history 追加 |
|
||||||
|
| wall_street | wall_street_trend 表 INSERT |
|
||||||
|
| key_location_updates | key_location 表 UPDATE status/damage_level(name LIKE 关键词) |
|
||||||
|
|
||||||
|
**校验**:
|
||||||
|
|
||||||
|
- 战损:`SELECT * FROM combat_losses`
|
||||||
|
- 基地:`SELECT id, name, side, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level > 0`
|
||||||
|
- 报复:`SELECT * FROM retaliation_current` 与 `retaliation_history` 最近几条
|
||||||
|
- 事件脉络:`SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 20`
|
||||||
|
|
||||||
|
### 2.5 GDELT(可选)
|
||||||
|
|
||||||
|
- `GDELT_DISABLED=1` 时跳过 GDELT,仅用 RSS;可用 `_rss_to_gdelt_fallback()` 用 RSS 标题生成 gdelt_events。
|
||||||
|
- 未禁用时:`fetch_gdelt_events()` 拉 GDELT → 写 `gdelt_events`、`conflict_stats`。
|
||||||
|
|
||||||
|
**校验**:`SELECT COUNT(*), MAX(event_time) FROM gdelt_events`;`SELECT * FROM conflict_stats WHERE id=1`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、如何检查「抓回的数据是否有效」
|
||||||
|
|
||||||
|
1. **确认跑的入口**
|
||||||
|
- 只跑 `npm run crawler`:只有 situation_update 会有新数据,战损/基地不会变。
|
||||||
|
- 跑 `npm run gdelt` 且服务常驻:才会既有 situation_update,又有 combat_losses、key_location 等。
|
||||||
|
|
||||||
|
2. **看 DB 与 API**
|
||||||
|
- 同上:查 `news_content`、`situation_update`、`combat_losses`、`key_location`、`retaliation_*`、`gdelt_events`、`conflict_stats`。
|
||||||
|
- 前端数据来源:`GET /api/situation`(见 server/situationData.js),对照上述表即可。
|
||||||
|
|
||||||
|
3. **看提取是否触发**
|
||||||
|
- 若 `combat_losses` / `key_location` 一直不更新:确认是入口 B、有 new_items、提取器未报错;可对单条新闻跑 `extract_from_news(text)` 看是否产出 combat_losses_delta / key_location_updates。
|
||||||
|
|
||||||
|
4. **重跑历史提取(补数据)**
|
||||||
|
- `POST http://localhost:8000/crawler/backfill`:用当前 situation_update 最近 50 条重新做一次提取并 merge,可用来修历史未提取的数据。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、配置与环境变量(与数据有效性相关)
|
||||||
|
|
||||||
|
| 变量 | 作用 |
|
||||||
|
|------|------|
|
||||||
|
| DB_PATH | 与 server 共用的 SQLite 路径,必须一致 |
|
||||||
|
| API_BASE | 通知 Node 的地址,merge 后通知前端 |
|
||||||
|
| DASHSCOPE_API_KEY | 有则用 DashScope 提取;无则用 Ollama 或规则 |
|
||||||
|
| CLEANER_AI_DISABLED=1 | 用规则提取(extractor_rules),不用 Ollama |
|
||||||
|
| GDELT_DISABLED=1 | 不用 GDELT,仅 RSS;RSS 可转 gdelt_events 占位 |
|
||||||
|
| CRAWL_INTERVAL | main.py 抓取间隔(秒) |
|
||||||
|
| RSS_INTERVAL_SEC / FETCH_INTERVAL_SEC | realtime 服务里 RSS / GDELT 间隔 |
|
||||||
|
|
||||||
|
按上述顺序对照「入口 → RSS → 去重 → situation_update → 提取 → merge → 表」即可逐段检查爬虫抓回的数据是否有效。
|
||||||
Binary file not shown.
Binary file not shown.
@@ -149,6 +149,9 @@ try {
|
|||||||
if (!lossNames.includes('missiles')) db.exec('ALTER TABLE combat_losses ADD COLUMN missiles INTEGER NOT NULL DEFAULT 0')
|
if (!lossNames.includes('missiles')) db.exec('ALTER TABLE combat_losses ADD COLUMN missiles INTEGER NOT NULL DEFAULT 0')
|
||||||
if (!lossNames.includes('helicopters')) db.exec('ALTER TABLE combat_losses ADD COLUMN helicopters INTEGER NOT NULL DEFAULT 0')
|
if (!lossNames.includes('helicopters')) db.exec('ALTER TABLE combat_losses ADD COLUMN helicopters INTEGER NOT NULL DEFAULT 0')
|
||||||
if (!lossNames.includes('submarines')) db.exec('ALTER TABLE combat_losses ADD COLUMN submarines INTEGER NOT NULL DEFAULT 0')
|
if (!lossNames.includes('submarines')) db.exec('ALTER TABLE combat_losses ADD COLUMN submarines INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('tanks')) db.exec('ALTER TABLE combat_losses ADD COLUMN tanks INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('civilian_ships')) db.exec('ALTER TABLE combat_losses ADD COLUMN civilian_ships INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('airport_port')) db.exec('ALTER TABLE combat_losses ADD COLUMN airport_port INTEGER NOT NULL DEFAULT 0')
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|
||||||
// 迁移:所有表添加 updated_at 用于数据回放
|
// 迁移:所有表添加 updated_at 用于数据回放
|
||||||
|
|||||||
@@ -149,9 +149,9 @@ function seed() {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
db.exec(`
|
db.exec(`
|
||||||
INSERT OR REPLACE INTO combat_losses (side, bases_destroyed, bases_damaged, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines) VALUES
|
INSERT OR REPLACE INTO combat_losses (side, bases_destroyed, bases_damaged, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, tanks, civilian_ships, airport_port) VALUES
|
||||||
('us', 0, 27, 127, 384, 18, 52, 2, 0, 0, 8, 4, 12, 1, 0),
|
('us', 0, 27, 127, 384, 18, 52, 2, 0, 0, 8, 4, 12, 1, 0, 0, 0, 0),
|
||||||
('iran', 3, 8, 2847, 5620, 412, 1203, 24, 12, 18, 42, 28, 156, 8, 2);
|
('iran', 3, 8, 2847, 5620, 412, 1203, 24, 12, 18, 42, 28, 156, 8, 2, 0, 0, 0);
|
||||||
`)
|
`)
|
||||||
} catch (_) {
|
} catch (_) {
|
||||||
db.exec(`
|
db.exec(`
|
||||||
|
|||||||
@@ -24,6 +24,9 @@ function toLosses(row) {
|
|||||||
missiles: row.missiles ?? 0,
|
missiles: row.missiles ?? 0,
|
||||||
helicopters: row.helicopters ?? 0,
|
helicopters: row.helicopters ?? 0,
|
||||||
submarines: row.submarines ?? 0,
|
submarines: row.submarines ?? 0,
|
||||||
|
tanks: row.tanks ?? 0,
|
||||||
|
civilianShips: row.civilian_ships ?? 0,
|
||||||
|
airportPort: row.airport_port ?? 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,6 +42,9 @@ const defaultLosses = {
|
|||||||
missiles: 0,
|
missiles: 0,
|
||||||
helicopters: 0,
|
helicopters: 0,
|
||||||
submarines: 0,
|
submarines: 0,
|
||||||
|
tanks: 0,
|
||||||
|
civilianShips: 0,
|
||||||
|
airportPort: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSituation() {
|
function getSituation() {
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ import {
|
|||||||
Rocket,
|
Rocket,
|
||||||
Asterisk,
|
Asterisk,
|
||||||
Amphora,
|
Amphora,
|
||||||
|
Layers,
|
||||||
|
Sailboat,
|
||||||
|
Warehouse,
|
||||||
} from 'lucide-react'
|
} from 'lucide-react'
|
||||||
import type { CombatLosses } from '@/data/mockData'
|
import type { CombatLosses } from '@/data/mockData'
|
||||||
|
|
||||||
@@ -25,6 +28,9 @@ export function CombatLossesOtherPanel({ usLosses, iranLosses, className = '' }:
|
|||||||
{ label: '战舰', icon: Ship, iconColor: 'text-blue-500', us: usLosses.warships, ir: iranLosses.warships },
|
{ label: '战舰', icon: Ship, iconColor: 'text-blue-500', us: usLosses.warships, ir: iranLosses.warships },
|
||||||
{ label: '装甲', icon: Shield, iconColor: 'text-emerald-500', us: usLosses.armor, ir: iranLosses.armor },
|
{ label: '装甲', icon: Shield, iconColor: 'text-emerald-500', us: usLosses.armor, ir: iranLosses.armor },
|
||||||
{ label: '车辆', icon: Car, iconColor: 'text-slate-400', us: usLosses.vehicles, ir: iranLosses.vehicles },
|
{ label: '车辆', icon: Car, iconColor: 'text-slate-400', us: usLosses.vehicles, ir: iranLosses.vehicles },
|
||||||
|
{ label: '坦克', icon: Layers, iconColor: 'text-amber-600', us: usLosses.tanks ?? 0, ir: iranLosses.tanks ?? 0 },
|
||||||
|
{ label: '民船', icon: Sailboat, iconColor: 'text-cyan-400', us: usLosses.civilianShips ?? 0, ir: iranLosses.civilianShips ?? 0 },
|
||||||
|
{ label: '机/港', icon: Warehouse, iconColor: 'text-orange-400', us: usLosses.airportPort ?? 0, ir: iranLosses.airportPort ?? 0 },
|
||||||
{ label: '无人机', icon: Drone, iconColor: 'text-violet-400', us: usLosses.drones ?? 0, ir: iranLosses.drones ?? 0 },
|
{ label: '无人机', icon: Drone, iconColor: 'text-violet-400', us: usLosses.drones ?? 0, ir: iranLosses.drones ?? 0 },
|
||||||
{ label: '导弹', icon: Rocket, iconColor: 'text-orange-500', us: usLosses.missiles ?? 0, ir: iranLosses.missiles ?? 0 },
|
{ label: '导弹', icon: Rocket, iconColor: 'text-orange-500', us: usLosses.missiles ?? 0, ir: iranLosses.missiles ?? 0 },
|
||||||
{ label: '直升机', icon: Asterisk, iconColor: 'text-teal-400', us: usLosses.helicopters ?? 0, ir: iranLosses.helicopters ?? 0 },
|
{ label: '直升机', icon: Asterisk, iconColor: 'text-teal-400', us: usLosses.helicopters ?? 0, ir: iranLosses.helicopters ?? 0 },
|
||||||
|
|||||||
@@ -127,11 +127,25 @@ function toFeature(loc: KeyLoc, side: 'us' | 'iran', status?: BaseStatus) {
|
|||||||
|
|
||||||
const FLIGHT_DURATION_MS = 2500 // 光点飞行单程时间
|
const FLIGHT_DURATION_MS = 2500 // 光点飞行单程时间
|
||||||
|
|
||||||
|
/** 移动端/小屏降低动画更新频率以减轻卡顿;返回最小间隔 ms */
|
||||||
|
function getAnimIntervalMs(): number {
|
||||||
|
try {
|
||||||
|
if (typeof window === 'undefined') return 33
|
||||||
|
const reducedMotion =
|
||||||
|
window.matchMedia('(prefers-reduced-motion: reduce)').matches
|
||||||
|
if (reducedMotion) return 100 // 约 10fps,兼顾可访问性
|
||||||
|
return window.innerWidth <= 768 ? 50 : 33 // 移动端约 20fps,桌面约 30fps
|
||||||
|
} catch {
|
||||||
|
return 33
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function WarMap() {
|
export function WarMap() {
|
||||||
const mapRef = useRef<MapRef>(null)
|
const mapRef = useRef<MapRef>(null)
|
||||||
const containerRef = useRef<HTMLDivElement>(null)
|
const containerRef = useRef<HTMLDivElement>(null)
|
||||||
const animRef = useRef<number>(0)
|
const animRef = useRef<number>(0)
|
||||||
const startRef = useRef<number>(0)
|
const startRef = useRef<number>(0)
|
||||||
|
const lastAnimUpdateRef = useRef<number>(0)
|
||||||
const attackPathsRef = useRef<[number, number][][]>([])
|
const attackPathsRef = useRef<[number, number][][]>([])
|
||||||
const lincolnPathsRef = useRef<[number, number][][]>([])
|
const lincolnPathsRef = useRef<[number, number][][]>([])
|
||||||
const fordPathsRef = useRef<[number, number][][]>([])
|
const fordPathsRef = useRef<[number, number][][]>([])
|
||||||
@@ -293,6 +307,11 @@ export function WarMap() {
|
|||||||
|
|
||||||
const tick = (t: number) => {
|
const tick = (t: number) => {
|
||||||
const elapsed = t - startRef.current
|
const elapsed = t - startRef.current
|
||||||
|
const intervalMs = getAnimIntervalMs()
|
||||||
|
const shouldUpdate = t - lastAnimUpdateRef.current >= intervalMs
|
||||||
|
if (shouldUpdate) lastAnimUpdateRef.current = t
|
||||||
|
|
||||||
|
if (shouldUpdate) {
|
||||||
const zoom = map.getZoom()
|
const zoom = map.getZoom()
|
||||||
const zoomScale = Math.max(0.5, zoom / 4.2) // 随镜头缩放:放大变大、缩小变小(4.2 为默认 zoom)
|
const zoomScale = Math.max(0.5, zoom / 4.2) // 随镜头缩放:放大变大、缩小变小(4.2 为默认 zoom)
|
||||||
try {
|
try {
|
||||||
@@ -409,6 +428,7 @@ export function WarMap() {
|
|||||||
map.setPaintProperty('gdelt-events-red-pulse', 'circle-opacity', opacity)
|
map.setPaintProperty('gdelt-events-red-pulse', 'circle-opacity', opacity)
|
||||||
}
|
}
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
}
|
||||||
animRef.current = requestAnimationFrame(tick)
|
animRef.current = requestAnimationFrame(tick)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,12 @@ export interface CombatLosses {
|
|||||||
missiles?: number
|
missiles?: number
|
||||||
helicopters?: number
|
helicopters?: number
|
||||||
submarines?: number
|
submarines?: number
|
||||||
|
/** 坦克 */
|
||||||
|
tanks?: number
|
||||||
|
/** 民船 */
|
||||||
|
civilianShips?: number
|
||||||
|
/** 机/港(机场/港口) */
|
||||||
|
airportPort?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SituationUpdate {
|
export interface SituationUpdate {
|
||||||
@@ -161,6 +167,9 @@ export const INITIAL_MOCK_DATA: MilitarySituation = {
|
|||||||
missiles: 12,
|
missiles: 12,
|
||||||
helicopters: 1,
|
helicopters: 1,
|
||||||
submarines: 0,
|
submarines: 0,
|
||||||
|
tanks: 0,
|
||||||
|
civilianShips: 0,
|
||||||
|
airportPort: 0,
|
||||||
},
|
},
|
||||||
wallStreetInvestmentTrend: [
|
wallStreetInvestmentTrend: [
|
||||||
{ time: '2025-03-01T00:00:00', value: 82 },
|
{ time: '2025-03-01T00:00:00', value: 82 },
|
||||||
@@ -215,6 +224,9 @@ export const INITIAL_MOCK_DATA: MilitarySituation = {
|
|||||||
missiles: 156,
|
missiles: 156,
|
||||||
helicopters: 8,
|
helicopters: 8,
|
||||||
submarines: 2,
|
submarines: 2,
|
||||||
|
tanks: 0,
|
||||||
|
civilianShips: 0,
|
||||||
|
airportPort: 0,
|
||||||
},
|
},
|
||||||
retaliationSentiment: 78,
|
retaliationSentiment: 78,
|
||||||
retaliationSentimentHistory: [
|
retaliationSentimentHistory: [
|
||||||
|
|||||||
@@ -80,6 +80,9 @@ export function useReplaySituation(): MilitarySituation {
|
|||||||
missiles: lerp(0, usLoss.missiles ?? 0),
|
missiles: lerp(0, usLoss.missiles ?? 0),
|
||||||
helicopters: lerp(0, usLoss.helicopters ?? 0),
|
helicopters: lerp(0, usLoss.helicopters ?? 0),
|
||||||
submarines: lerp(0, usLoss.submarines ?? 0),
|
submarines: lerp(0, usLoss.submarines ?? 0),
|
||||||
|
tanks: lerp(0, usLoss.tanks ?? 0),
|
||||||
|
civilianShips: lerp(0, usLoss.civilianShips ?? 0),
|
||||||
|
airportPort: lerp(0, usLoss.airportPort ?? 0),
|
||||||
}
|
}
|
||||||
const irLossesAt = {
|
const irLossesAt = {
|
||||||
bases: {
|
bases: {
|
||||||
@@ -99,6 +102,9 @@ export function useReplaySituation(): MilitarySituation {
|
|||||||
missiles: lerp(0, irLoss.missiles ?? 0),
|
missiles: lerp(0, irLoss.missiles ?? 0),
|
||||||
helicopters: lerp(0, irLoss.helicopters ?? 0),
|
helicopters: lerp(0, irLoss.helicopters ?? 0),
|
||||||
submarines: lerp(0, irLoss.submarines ?? 0),
|
submarines: lerp(0, irLoss.submarines ?? 0),
|
||||||
|
tanks: lerp(0, irLoss.tanks ?? 0),
|
||||||
|
civilianShips: lerp(0, irLoss.civilianShips ?? 0),
|
||||||
|
airportPort: lerp(0, irLoss.airportPort ?? 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
// 被袭基地:按 damage_level 排序,高损毁先出现;根据 progress 决定显示哪些为 attacked
|
// 被袭基地:按 damage_level 排序,高损毁先出现;根据 progress 决定显示哪些为 attacked
|
||||||
|
|||||||
Reference in New Issue
Block a user