diff --git a/README.md b/README.md index 3f1a64b..0c3c129 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,13 @@ npm run api:seed npm run api ``` -开发时需同时运行前端与 API: +开发时可用一键启动(推荐): + +```bash +npm start +``` + +或分终端分别运行: ```bash # 终端 1 @@ -53,6 +59,13 @@ npm run dev API 会由 Vite 代理到 `/api`,前端通过 `/api/situation` 获取完整态势数据。数据库文件位于 `server/data.db`,可通过修改表数据实现动态调整。 +### 爬虫不生效时 + +1. 测试 RSS 抓取:`npm run crawler:test`(需网络,返回抓取条数) +2. 单独启动爬虫查看日志:`npm run gdelt`(另开终端) +3. 查看爬虫状态:`curl http://localhost:8000/crawler/status`(需爬虫服务已启动) +4. 数据库面板 `/db` 每 30 秒自动刷新,可观察 situation_update 条数是否增加 + ## Development ```bash diff --git a/crawler/README.md b/crawler/README.md index d392455..0280af6 100644 --- a/crawler/README.md +++ b/crawler/README.md @@ -18,12 +18,19 @@ | 项目 | 说明 | |------|------| -| 源 | Reuters、BBC World/MiddleEast、Al Jazeera、NYT World | +| 源 | 多国主流媒体:美(Reuters/NYT)、英(BBC)、法(France 24)、俄(TASS/RT)、中(Xinhua/CGTN)、伊(Press TV)、卡塔尔(Al Jazeera) | | 过滤 | 标题/摘要需含 `KEYWORDS` 之一(iran、usa、strike、military 等) | | 更新 | 爬虫 45 秒拉一次(`RSS_INTERVAL_SEC`),优先保证事件脉络 | | 优先级 | 启动时先拉 RSS,再拉 GDELT | -**GDELT 无法访问时**:设置 `GDELT_DISABLED=1`,仅用 RSS 新闻即可维持事件脉络。 +**GDELT 无法访问时**:设置 `GDELT_DISABLED=1`,仅用 RSS 新闻即可维持事件脉络。部分境外源可能受网络限制。 + +### 3. AI 新闻清洗与分类(可选) + +- **清洗**:`cleaner_ai.py` 用 Ollama 提炼新闻为简洁摘要,供面板展示 +- **分类**:`parser_ai.py` 用 Ollama 替代规则做 category/severity 判定 +- 需先安装并运行 Ollama:`ollama run llama3.1` +- 环境变量:`OLLAMA_MODEL=llama3.1`、`PARSER_AI_DISABLED=1`、`CLEANER_AI_DISABLED=1`(禁用对应 AI) --- @@ -69,6 +76,9 @@ GDELT API → 抓取(60s) → SQLite (gdelt_events, conflict_stats) → POST /ap - `GDELT_DISABLED`: 设为 `1` 则跳过 GDELT,仅用 RSS 新闻(GDELT 无法访问时用) - `FETCH_INTERVAL_SEC`: GDELT 抓取间隔(秒),默认 60 - `RSS_INTERVAL_SEC`: RSS 抓取间隔(秒),默认 45(优先保证事件脉络) +- `OLLAMA_MODEL`: AI 分类模型,默认 `llama3.1` +- `PARSER_AI_DISABLED`: 设为 `1` 则禁用 AI 分类,仅用规则 +- `CLEANER_AI_DISABLED`: 设为 `1` 则禁用 AI 清洗,仅用规则截断 ## 冲突强度 (impact_score) diff --git a/crawler/__pycache__/cleaner_ai.cpython-39.pyc b/crawler/__pycache__/cleaner_ai.cpython-39.pyc new file mode 100644 index 0000000..fe62db8 Binary files /dev/null and b/crawler/__pycache__/cleaner_ai.cpython-39.pyc differ diff --git a/crawler/__pycache__/config.cpython-39.pyc b/crawler/__pycache__/config.cpython-39.pyc index b48687b..88f7772 100644 Binary files a/crawler/__pycache__/config.cpython-39.pyc and b/crawler/__pycache__/config.cpython-39.pyc differ diff --git a/crawler/__pycache__/parser_ai.cpython-39.pyc b/crawler/__pycache__/parser_ai.cpython-39.pyc new file mode 100644 index 0000000..fc0c9df Binary files /dev/null and b/crawler/__pycache__/parser_ai.cpython-39.pyc differ diff --git a/crawler/__pycache__/realtime_conflict_service.cpython-39.pyc b/crawler/__pycache__/realtime_conflict_service.cpython-39.pyc index c22b35a..2eefef1 100644 Binary files a/crawler/__pycache__/realtime_conflict_service.cpython-39.pyc and b/crawler/__pycache__/realtime_conflict_service.cpython-39.pyc differ diff --git a/crawler/__pycache__/translate_utils.cpython-39.pyc b/crawler/__pycache__/translate_utils.cpython-39.pyc index a6fb40b..563a325 100644 Binary files a/crawler/__pycache__/translate_utils.cpython-39.pyc and b/crawler/__pycache__/translate_utils.cpython-39.pyc differ diff --git a/crawler/cleaner_ai.py b/crawler/cleaner_ai.py new file mode 100644 index 0000000..c5d6f04 --- /dev/null +++ b/crawler/cleaner_ai.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" +AI 清洗新闻数据,严格按面板字段约束输出 +面板 EventTimelinePanel 所需:summary(≤120字)、category(枚举)、severity(枚举) +""" +import os +import re +from typing import Optional + +CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1" +OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") + +# 面板 schema:必须与 EventTimelinePanel / SituationUpdate 一致 +SUMMARY_MAX_LEN = 120 # 面板 line-clamp-2 展示 +CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other") +SEVERITIES = ("low", "medium", "high", "critical") + + +def _sanitize_summary(text: str, max_len: int = SUMMARY_MAX_LEN) -> str: + """确保 summary 符合面板:纯文本、无换行、限制长度""" + if not text or not isinstance(text, str): + return "" + s = re.sub(r"\s+", " ", str(text).strip()) + s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", s) # 去除控制字符 + return s[:max_len].rstrip() + + +def _rule_clean(text: str, max_len: int = SUMMARY_MAX_LEN) -> str: + """规则清洗:去空白、去控制符、截断""" + return _sanitize_summary(text, max_len) + + +def _call_ollama_summary(text: str, max_len: int, timeout: int = 6) -> Optional[str]: + """调用 Ollama 提炼摘要,输出须为纯文本、≤max_len 字""" + if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 5: + return None + try: + import requests + prompt = f"""将新闻提炼为1-2句简洁中文事实,直接输出纯文本,不要标号、引号、解释。限{max_len}字内。 + +原文:{str(text)[:350]} + +输出:""" + r = requests.post( + "http://localhost:11434/api/chat", + json={ + "model": OLLAMA_MODEL, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + "options": {"num_predict": 150}, + }, + timeout=timeout, + ) + if r.status_code != 200: + return None + out = (r.json().get("message", {}).get("content", "") or "").strip() + out = re.sub(r"^[\d\.\-\*\s]+", "", out) # 去编号 + out = re.sub(r"^['\"\s]+|['\"\s]+$", "", out) + out = _sanitize_summary(out, max_len) + if out and len(out) > 3: + return out + return None + except Exception: + return None + + +def clean_news_for_panel(text: str, max_len: int = SUMMARY_MAX_LEN) -> str: + """清洗 summary 字段,供 EventTimelinePanel 展示。输出必为≤max_len 的纯文本""" + if not text or not isinstance(text, str): + return "" + t = str(text).strip() + if not t: + return "" + res = _call_ollama_summary(t, max_len, timeout=6) + if res: + return res + return _rule_clean(t, max_len) + + +def ensure_category(cat: str) -> str: + """确保 category 在面板枚举内""" + return cat if cat in CATEGORIES else "other" + + +def ensure_severity(sev: str) -> str: + """确保 severity 在面板枚举内""" + return sev if sev in SEVERITIES else "medium" diff --git a/crawler/config.py b/crawler/config.py index db0a7af..eb117d6 100644 --- a/crawler/config.py +++ b/crawler/config.py @@ -13,14 +13,30 @@ API_BASE = os.environ.get("API_BASE", "http://localhost:3001") # 抓取间隔(秒) CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300")) -# RSS 源(美伊/中东相关,多源保证实时事件脉络) +# RSS 源:世界主流媒体,覆盖美伊/中东多视角 RSS_FEEDS = [ + # 美国 "https://feeds.reuters.com/reuters/topNews", + "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", + # 英国 "https://feeds.bbci.co.uk/news/world/rss.xml", "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml", + "https://www.theguardian.com/world/rss", + # 法国 + "https://www.france24.com/en/rss", + # 德国 + "https://rss.dw.com/xml/rss-en-world", + # 俄罗斯 + "https://tass.com/rss/v2.xml", + "https://www.rt.com/rss/", + # 中国 + "https://english.news.cn/rss/world.xml", + "https://www.cgtn.com/rss/world", + # 伊朗 + "https://www.presstv.ir/rss", + # 卡塔尔(中东) "https://www.aljazeera.com/xml/rss/all.xml", "https://www.aljazeera.com/xml/rss/middleeast.xml", - "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", ] # 关键词过滤:至少匹配一个才会入库 diff --git a/crawler/db_merge.py b/crawler/db_merge.py new file mode 100644 index 0000000..11f6b14 --- /dev/null +++ b/crawler/db_merge.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +""" +将 AI 提取的结构化数据合并到 SQLite +与 panel schema 及 situationData.getSituation 对齐,支持回放 +""" +import os +import sqlite3 +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Optional + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db")) + + +def _ensure_tables(conn: sqlite3.Connection) -> None: + """确保所需表存在(与 db.js 一致)""" + conn.execute(""" + CREATE TABLE IF NOT EXISTS situation_update ( + id TEXT PRIMARY KEY, timestamp TEXT NOT NULL, category TEXT NOT NULL, + summary TEXT NOT NULL, severity TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS combat_losses ( + side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')), + bases_destroyed INTEGER NOT NULL, bases_damaged INTEGER NOT NULL, + personnel_killed INTEGER NOT NULL, personnel_wounded INTEGER NOT NULL, + aircraft INTEGER NOT NULL, warships INTEGER NOT NULL, armor INTEGER NOT NULL, vehicles INTEGER NOT NULL + ) + """) + try: + conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0") + except sqlite3.OperationalError: + pass + try: + conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0") + except sqlite3.OperationalError: + pass + try: + conn.execute("ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime('now'))") + except sqlite3.OperationalError: + pass + conn.execute("CREATE TABLE IF NOT EXISTS wall_street_trend (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)") + conn.execute("CREATE TABLE IF NOT EXISTS retaliation_current (id INTEGER PRIMARY KEY CHECK (id = 1), value INTEGER NOT NULL)") + conn.execute("CREATE TABLE IF NOT EXISTS retaliation_history (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)") + conn.execute("CREATE TABLE IF NOT EXISTS situation (id INTEGER PRIMARY KEY CHECK (id = 1), data TEXT NOT NULL, updated_at TEXT NOT NULL)") + conn.commit() + + +def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool: + """将提取数据合并到 DB,返回是否有更新""" + path = db_path or DB_PATH + if not os.path.exists(path): + return False + conn = sqlite3.connect(path, timeout=10) + try: + _ensure_tables(conn) + updated = False + # situation_update + if "situation_update" in extracted: + u = extracted["situation_update"] + uid = f"ai_{hash(u.get('summary','')+u.get('timestamp','')) % 10**10}" + conn.execute( + "INSERT OR IGNORE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)", + (uid, u.get("timestamp", ""), u.get("category", "other"), u.get("summary", "")[:500], u.get("severity", "medium")), + ) + if conn.total_changes > 0: + updated = True + # combat_losses:增量叠加到当前值 + if "combat_losses_delta" in extracted: + for side, delta in extracted["combat_losses_delta"].items(): + if side not in ("us", "iran"): + continue + try: + row = conn.execute( + "SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles FROM combat_losses WHERE side = ?", + (side,), + ).fetchone() + if not row: + continue + cur = { + "personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0, + "civilian_wounded": row[3] or 0, "bases_destroyed": row[4], "bases_damaged": row[5], + "aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9], + } + pk = max(0, (cur["personnel_killed"] or 0) + delta.get("personnel_killed", 0)) + pw = max(0, (cur["personnel_wounded"] or 0) + delta.get("personnel_wounded", 0)) + ck = max(0, (cur["civilian_killed"] or 0) + delta.get("civilian_killed", 0)) + cw = max(0, (cur["civilian_wounded"] or 0) + delta.get("civilian_wounded", 0)) + bd = max(0, (cur["bases_destroyed"] or 0) + delta.get("bases_destroyed", 0)) + bm = max(0, (cur["bases_damaged"] or 0) + delta.get("bases_damaged", 0)) + ac = max(0, (cur["aircraft"] or 0) + delta.get("aircraft", 0)) + ws = max(0, (cur["warships"] or 0) + delta.get("warships", 0)) + ar = max(0, (cur["armor"] or 0) + delta.get("armor", 0)) + vh = max(0, (cur["vehicles"] or 0) + delta.get("vehicles", 0)) + ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z") + conn.execute( + """UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?, + bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?, updated_at=? WHERE side=?""", + (pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, ts, side), + ) + if conn.total_changes > 0: + updated = True + except Exception: + pass + # retaliation + if "retaliation" in extracted: + r = extracted["retaliation"] + conn.execute("INSERT OR REPLACE INTO retaliation_current (id, value) VALUES (1, ?)", (r["value"],)) + conn.execute("INSERT INTO retaliation_history (time, value) VALUES (?, ?)", (r["time"], r["value"])) + updated = True + # wall_street_trend + if "wall_street" in extracted: + w = extracted["wall_street"] + conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (w["time"], w["value"])) + updated = True + if updated: + conn.execute("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)", (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),)) + conn.commit() + return updated + except Exception as e: + conn.rollback() + raise e + finally: + conn.close() diff --git a/crawler/extractor_ai.py b/crawler/extractor_ai.py new file mode 100644 index 0000000..146cc4d --- /dev/null +++ b/crawler/extractor_ai.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +从新闻文本中 AI 提取结构化数据,映射到面板 schema +输出符合 panel_schema 的字段,供 db_merge 写入 +""" +import json +import os +import re +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from panel_schema import validate_category, validate_severity, validate_summary + +CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1" +OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") + + +def _call_ollama_extract(text: str, timeout: int = 10) -> Optional[Dict[str, Any]]: + """调用 Ollama 提取结构化数据。输出 JSON,仅包含新闻中可明确推断的字段""" + if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 10: + return None + try: + import requests + prompt = f"""从以下美伊/中东新闻中提取可推断的数值,输出 JSON,仅包含有明确依据的字段。无依据则省略该字段。 + +要求: +- summary: 1-2句中文事实,≤80字 +- category: deployment|alert|intel|diplomatic|other +- severity: low|medium|high|critical +- us_personnel_killed, iran_personnel_killed 等:仅当新闻明确提及具体数字时填写 +- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复情绪时 +- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时 + +原文:{str(text)[:500]} + +直接输出 JSON,不要解释:""" + r = requests.post( + "http://localhost:11434/api/chat", + json={ + "model": OLLAMA_MODEL, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + "options": {"num_predict": 256}, + }, + timeout=timeout, + ) + if r.status_code != 200: + return None + raw = (r.json().get("message", {}).get("content", "") or "").strip() + raw = re.sub(r"^```\w*\s*|\s*```$", "", raw) + return json.loads(raw) + except Exception: + return None + + +def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]: + """ + 从新闻文本提取结构化数据,严格符合面板 schema + 返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, ... } + """ + ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z") + out: Dict[str, Any] = {} + parsed = _call_ollama_extract(text) + if not parsed: + return out + # situation_update + if parsed.get("summary"): + out["situation_update"] = { + "summary": validate_summary(str(parsed["summary"])[:120], 120), + "category": validate_category(str(parsed.get("category", "other")).lower()), + "severity": validate_severity(str(parsed.get("severity", "medium")).lower()), + "timestamp": ts, + } + # combat_losses 增量(仅数字字段) + loss_us = {} + loss_ir = {} + for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"]: + uk = f"us_{k}" + ik = f"iran_{k}" + if uk in parsed and isinstance(parsed[uk], (int, float)): + loss_us[k] = max(0, int(parsed[uk])) + if ik in parsed and isinstance(parsed[ik], (int, float)): + loss_ir[k] = max(0, int(parsed[ik])) + if loss_us or loss_ir: + out["combat_losses_delta"] = {} + if loss_us: + out["combat_losses_delta"]["us"] = loss_us + if loss_ir: + out["combat_losses_delta"]["iran"] = loss_ir + # retaliation + if "retaliation_sentiment" in parsed: + v = parsed["retaliation_sentiment"] + if isinstance(v, (int, float)) and 0 <= v <= 100: + out["retaliation"] = {"value": int(v), "time": ts} + # wall_street + if "wall_street_value" in parsed: + v = parsed["wall_street_value"] + if isinstance(v, (int, float)) and 0 <= v <= 100: + out["wall_street"] = {"time": ts, "value": int(v)} + return out diff --git a/crawler/extractor_rules.py b/crawler/extractor_rules.py new file mode 100644 index 0000000..36c8227 --- /dev/null +++ b/crawler/extractor_rules.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" +基于规则的新闻数据提取(无需 Ollama) +从新闻文本中提取战损、报复情绪等数值,供 db_merge 写入 +""" +import re +from datetime import datetime, timezone +from typing import Any, Dict, Optional + + +def _first_int(text: str, pattern: str) -> Optional[int]: + m = re.search(pattern, text, re.I) + if m and m.group(1) and m.group(1).replace(",", "").isdigit(): + return max(0, int(m.group(1).replace(",", ""))) + return None + + +def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]: + """ + 规则提取:匹配数字+关键词,输出符合 panel schema 的字段(无需 Ollama) + """ + ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z") + out: Dict[str, Any] = {} + t = (text or "").lower() + + loss_us, loss_ir = {}, {} + v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|killed|dead)") + if v is not None: + loss_us["personnel_killed"] = v + v = _first_int(t, r"(\d+)[\s\w]*(?:us|american)[\s\w]*(?:troop|soldier|killed|dead)") + if v is not None: + loss_us["personnel_killed"] = v + v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|killed|dead)") + if v is not None: + loss_ir["personnel_killed"] = v + v = _first_int(t, r"(\d+)[\s\w]*(?:iranian|iran)[\s\w]*(?:troop|soldier|killed|dead)") + if v is not None: + loss_ir["personnel_killed"] = v + + if loss_us: + out.setdefault("combat_losses_delta", {})["us"] = loss_us + if loss_ir: + out.setdefault("combat_losses_delta", {})["iran"] = loss_ir + if "retaliat" in t or "revenge" in t or "报复" in t: + out["retaliation"] = {"value": 75, "time": ts} + if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t: + out["wall_street"] = {"time": ts, "value": 55} + + return out diff --git a/crawler/panel_schema.py b/crawler/panel_schema.py new file mode 100644 index 0000000..1f2029e --- /dev/null +++ b/crawler/panel_schema.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" +前端面板完整数据 schema,与 DB / situationData / useReplaySituation 对齐 +爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端 +""" +from typing import Any, Dict, List, Literal, Optional, Tuple + +# 事件脉络 +SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other") +SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical") +SUMMARY_MAX_LEN = 120 + +# 战损 +CombatLossesRow = Dict[str, Any] # bases_destroyed, bases_damaged, personnel_killed, ... + +# 时间序列(回放用) +TimeSeriesPoint = Tuple[str, int] # (ISO time, value) + +# AI 可从新闻中提取的字段 +EXTRACTABLE_FIELDS = { + "situation_update": ["summary", "category", "severity", "timestamp"], + "combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"], + "retaliation": ["value"], # 0-100 + "wall_street_trend": ["time", "value"], # 0-100 + "conflict_stats": ["estimated_casualties", "estimated_strike_count"], +} + + +def validate_category(cat: str) -> str: + return cat if cat in SITUATION_UPDATE_CATEGORIES else "other" + + +def validate_severity(sev: str) -> str: + return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium" + + +def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str: + import re + if not s or not isinstance(s, str): + return "" + t = re.sub(r"\s+", " ", str(s).strip())[:max_len] + return re.sub(r"[\x00-\x1f]", "", t).rstrip() diff --git a/crawler/parser_ai.py b/crawler/parser_ai.py new file mode 100644 index 0000000..774b13c --- /dev/null +++ b/crawler/parser_ai.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +AI 新闻分类与严重度判定 +优先使用 Ollama 本地模型(免费),失败则回退到规则 +设置 PARSER_AI_DISABLED=1 可只用规则(更快) +""" +import os +from typing import Literal, Optional, Tuple + +Category = Literal["deployment", "alert", "intel", "diplomatic", "other"] +Severity = Literal["low", "medium", "high", "critical"] + +PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1" +OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1") # 或 qwen2.5:7b + +_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other") +_SEVERITIES = ("low", "medium", "high", "critical") + + +def _parse_ai_response(text: str) -> Tuple[Category, Severity]: + """从 AI 回复解析 category:severity""" + t = (text or "").strip().lower() + cat, sev = "other", "low" + for c in _CATEGORIES: + if c in t: + cat = c + break + for s in _SEVERITIES: + if s in t: + sev = s + break + return cat, sev # type: ignore + + +def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]: + """调用 Ollama 本地模型。需先运行 ollama run llama3.1 或 qwen2.5:7b""" + if PARSER_AI_DISABLED: + return None + try: + import requests + prompt = f"""Classify this news about US-Iran/middle east (one line only): +- category: deployment|alert|intel|diplomatic|other +- severity: low|medium|high|critical + +News: {text[:300]} + +Reply format: category:severity (e.g. alert:high)""" + r = requests.post( + "http://localhost:11434/api/chat", + json={ + "model": OLLAMA_MODEL, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + "options": {"num_predict": 32}, + }, + timeout=timeout, + ) + if r.status_code != 200: + return None + out = r.json().get("message", {}).get("content", "") + return _parse_ai_response(out) + except Exception: + return None + + +def _rule_classify(text: str) -> Category: + from parser import classify + return classify(text) + + +def _rule_severity(text: str, category: Category) -> Severity: + from parser import severity + return severity(text, category) + + +def classify(text: str) -> Category: + """分类。AI 失败时回退规则""" + res = _call_ollama(text) + if res: + return res[0] + return _rule_classify(text) + + +def severity(text: str, category: Category) -> Severity: + """严重度。AI 失败时回退规则""" + res = _call_ollama(text) + if res: + return res[1] + return _rule_severity(text, category) + + +def classify_and_severity(text: str) -> Tuple[Category, Severity]: + """一次调用返回分类和严重度(减少 AI 调用)""" + if PARSER_AI_DISABLED: + from parser import classify, severity + c = classify(text) + return c, severity(text, c) + res = _call_ollama(text) + if res: + return res + return _rule_classify(text), _rule_severity(text, _rule_classify(text)) diff --git a/crawler/realtime_conflict_service.py b/crawler/realtime_conflict_service.py index de2b11c..0410f24 100644 --- a/crawler/realtime_conflict_service.py +++ b/crawler/realtime_conflict_service.py @@ -14,11 +14,13 @@ from datetime import datetime from pathlib import Path from typing import List, Optional +import logging import requests from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from apscheduler.schedulers.background import BackgroundScheduler +logging.getLogger("apscheduler.scheduler").setLevel(logging.ERROR) app = FastAPI(title="GDELT Conflict Service") app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"]) @@ -29,7 +31,7 @@ API_BASE = os.environ.get("API_BASE", "http://localhost:3001") QUERY = os.environ.get("GDELT_QUERY", "United States Iran military") MAX_RECORDS = int(os.environ.get("GDELT_MAX_RECORDS", "30")) FETCH_INTERVAL_SEC = int(os.environ.get("FETCH_INTERVAL_SEC", "60")) -RSS_INTERVAL_SEC = int(os.environ.get("RSS_INTERVAL_SEC", "45")) # 新闻抓取更频繁,优先保证事件脉络 +RSS_INTERVAL_SEC = int(os.environ.get("RSS_INTERVAL_SEC", "60")) # 每分钟抓取世界主流媒体 # 时间范围:1h=1小时 1d=1天 1week=1周;不设则默认 3 个月(易返回旧文) GDELT_TIMESPAN = os.environ.get("GDELT_TIMESPAN", "1d") # 设为 1 则跳过 GDELT,仅用 RSS 新闻作为事件脉络(GDELT 国外可能无法访问) @@ -77,7 +79,9 @@ def _parse_article(article: dict) -> Optional[dict]: if not title_raw: return None from translate_utils import translate_to_chinese + from cleaner_ai import clean_news_for_panel title = translate_to_chinese(str(title_raw)[:500]) + title = clean_news_for_panel(title, max_len=150) url = article.get("url") or article.get("socialimage") or "" seendate = article.get("seendate") or datetime.utcnow().isoformat() lat = article.get("lat") @@ -134,8 +138,8 @@ def fetch_gdelt_events() -> None: _write_to_db(new_events) _notify_node() print(f"[{datetime.now().strftime('%H:%M:%S')}] GDELT 更新 {len(new_events)} 条事件") - except Exception as e: - print(f"GDELT 抓取失败: {e}") + except Exception: + pass def _ensure_table(conn: sqlite3.Connection) -> None: @@ -213,38 +217,115 @@ def _notify_node() -> None: # ========================== -# RSS 新闻抓取(补充 situation_update) +# RSS 新闻抓取(补充 situation_update + AI 提取面板数据) # ========================== +LAST_FETCH = {"items": 0, "inserted": 0, "error": None} + + def fetch_news() -> None: try: from scrapers.rss_scraper import fetch_all from db_writer import write_updates from translate_utils import translate_to_chinese + from cleaner_ai import clean_news_for_panel + from cleaner_ai import ensure_category, ensure_severity + LAST_FETCH["error"] = None items = fetch_all() for it in items: - it["title"] = translate_to_chinese(it.get("title", "") or "") - it["summary"] = translate_to_chinese(it.get("summary", "") or it.get("title", "")) + raw_title = translate_to_chinese(it.get("title", "") or "") + raw_summary = translate_to_chinese(it.get("summary", "") or it.get("title", "")) + it["title"] = clean_news_for_panel(raw_title, max_len=80) + it["summary"] = clean_news_for_panel(raw_summary or raw_title, max_len=120) + it["category"] = ensure_category(it.get("category", "other")) + it["severity"] = ensure_severity(it.get("severity", "medium")) + n = write_updates(items) if items else 0 + LAST_FETCH["items"] = len(items) + LAST_FETCH["inserted"] = n if items: - n = write_updates(items) + _extract_and_merge_panel_data(items) if n > 0: _notify_node() - print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻入库 {n} 条") + print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,新增入库 {n} 条") except Exception as e: - print(f"新闻抓取失败: {e}") + LAST_FETCH["error"] = str(e) + print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}") + + +def _extract_and_merge_panel_data(items: list) -> None: + """对新闻做 AI/规则 提取,合并到 combat_losses / retaliation / wall_street_trend 等表""" + if not items or not os.path.exists(DB_PATH): + return + try: + from db_merge import merge + if os.environ.get("CLEANER_AI_DISABLED", "0") == "1": + from extractor_rules import extract_from_news + else: + from extractor_ai import extract_from_news + from datetime import timezone + merged_any = False + # 只对前几条有足够文本的新闻做提取,避免 Ollama 调用过多 + for it in items[:5]: + text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "") + if len(text.strip()) < 20: + continue + pub = it.get("published") + ts = None + if pub: + try: + if isinstance(pub, str): + pub_dt = datetime.fromisoformat(pub.replace("Z", "+00:00")) + else: + pub_dt = pub + if pub_dt.tzinfo: + pub_dt = pub_dt.astimezone(timezone.utc) + ts = pub_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z") + except Exception: + pass + extracted = extract_from_news(text, timestamp=ts) + if extracted: + if merge(extracted, db_path=DB_PATH): + merged_any = True + if merged_any: + _notify_node() + except Exception as e: + print(f" [warn] AI 面板数据提取/合并: {e}") # ========================== # 定时任务(RSS 更频繁,优先保证事件脉络实时) # ========================== scheduler = BackgroundScheduler() -scheduler.add_job(fetch_news, "interval", seconds=RSS_INTERVAL_SEC) -scheduler.add_job(fetch_gdelt_events, "interval", seconds=FETCH_INTERVAL_SEC) +scheduler.add_job(fetch_news, "interval", seconds=RSS_INTERVAL_SEC, max_instances=2, coalesce=True) +scheduler.add_job(fetch_gdelt_events, "interval", seconds=FETCH_INTERVAL_SEC, max_instances=2, coalesce=True) scheduler.start() # ========================== # API 接口 # ========================== +@app.get("/crawler/status") +def crawler_status(): + """爬虫状态:用于排查数据更新链路""" + import os + db_ok = os.path.exists(DB_PATH) + total = 0 + if db_ok: + try: + conn = sqlite3.connect(DB_PATH, timeout=3) + total = conn.execute("SELECT COUNT(*) FROM situation_update").fetchone()[0] + conn.close() + except Exception: + pass + return { + "db_path": DB_PATH, + "db_exists": db_ok, + "situation_update_count": total, + "last_fetch_items": LAST_FETCH.get("items", 0), + "last_fetch_inserted": LAST_FETCH.get("inserted", 0), + "last_fetch_error": LAST_FETCH.get("error"), + } + + @app.get("/events") def get_events(): return { diff --git a/crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc b/crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc index ca8a8c2..b0f243d 100644 Binary files a/crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc and b/crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc differ diff --git a/crawler/scrapers/rss_scraper.py b/crawler/scrapers/rss_scraper.py index 9bdf9c2..9af4da7 100644 --- a/crawler/scrapers/rss_scraper.py +++ b/crawler/scrapers/rss_scraper.py @@ -6,7 +6,7 @@ from datetime import datetime, timezone import feedparser from config import RSS_FEEDS, KEYWORDS -from parser import classify, severity +from parser_ai import classify_and_severity def _parse_date(entry) -> datetime: @@ -62,8 +62,7 @@ def fetch_all() -> list[dict]: continue seen.add(key) published = _parse_date(entry) - cat = classify(text) - sev = severity(text, cat) + cat, sev = classify_and_severity(text) items.append({ "title": title, "summary": summary[:400] if summary else title, diff --git a/crawler/translate_utils.py b/crawler/translate_utils.py index 63ddd77..0eb3e4d 100644 --- a/crawler/translate_utils.py +++ b/crawler/translate_utils.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- """英译中,入库前统一翻译""" +import os import re from typing import Optional @@ -12,17 +13,26 @@ def _is_mostly_chinese(text: str) -> bool: def translate_to_chinese(text: str) -> str: - """将文本翻译成中文,失败或已是中文则返回原文。""" + """将文本翻译成中文,失败或已是中文则返回原文。Google 失败时尝试 MyMemory。""" if not text or not text.strip(): return text + if os.environ.get("TRANSLATE_DISABLED", "0") == "1": + return text s = str(text).strip() if len(s) > 2000: s = s[:2000] if _is_mostly_chinese(s): return text - try: - from deep_translator import GoogleTranslator - out = GoogleTranslator(source="auto", target="zh-CN").translate(s) - return out if out else text - except Exception: - return text + for translator in ["google", "mymemory"]: + try: + if translator == "google": + from deep_translator import GoogleTranslator + out = GoogleTranslator(source="auto", target="zh-CN").translate(s) + else: + from deep_translator import MyMemoryTranslator + out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s) + if out and out.strip() and out != s: + return out + except Exception: + continue + return text diff --git a/package-lock.json b/package-lock.json index 5202ec7..70559b0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,6 +18,7 @@ "react": "^18.3.1", "react-dom": "^18.3.1", "react-map-gl": "^7.1.7", + "react-router-dom": "^7.13.1", "ws": "^8.19.0", "zustand": "^5.0.0" }, @@ -4396,6 +4397,54 @@ "node": ">=0.10.0" } }, + "node_modules/react-router": { + "version": "7.13.1", + "resolved": "https://registry.npmmirror.com/react-router/-/react-router-7.13.1.tgz", + "integrity": "sha512-td+xP4X2/6BJvZoX6xw++A2DdEi++YypA69bJUV5oVvqf6/9/9nNlD70YO1e9d3MyamJEBQFEzk6mbfDYbqrSA==", + "dependencies": { + "cookie": "^1.0.1", + "set-cookie-parser": "^2.6.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + }, + "peerDependenciesMeta": { + "react-dom": { + "optional": true + } + } + }, + "node_modules/react-router-dom": { + "version": "7.13.1", + "resolved": "https://registry.npmmirror.com/react-router-dom/-/react-router-dom-7.13.1.tgz", + "integrity": "sha512-UJnV3Rxc5TgUPJt2KJpo1Jpy0OKQr0AjgbZzBFjaPJcFOb2Y8jA5H3LT8HUJAiRLlWrEXWHbF1Z4SCZaQjWDHw==", + "dependencies": { + "react-router": "7.13.1" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + } + }, + "node_modules/react-router/node_modules/cookie": { + "version": "1.1.1", + "resolved": "https://registry.npmmirror.com/cookie/-/cookie-1.1.1.tgz", + "integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmmirror.com/read-cache/-/read-cache-1.0.0.tgz", @@ -4645,6 +4694,11 @@ "node": ">= 0.8.0" } }, + "node_modules/set-cookie-parser": { + "version": "2.7.2", + "resolved": "https://registry.npmmirror.com/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz", + "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==" + }, "node_modules/set-value": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/set-value/-/set-value-2.0.1.tgz", diff --git a/package.json b/package.json index 887f2ae..3190439 100644 --- a/package.json +++ b/package.json @@ -4,11 +4,13 @@ "version": "1.0.0", "type": "module", "scripts": { + "start": "./start.sh", "dev": "vite", "api": "node server/index.js", "api:seed": "node server/seed.js", "crawler": "cd crawler && python main.py", "gdelt": "cd crawler && uvicorn realtime_conflict_service:app --host 0.0.0.0 --port 8000", + "crawler:test": "cd crawler && python3 -c \"import sys; sys.path.insert(0,'.'); from scrapers.rss_scraper import fetch_all; n=len(fetch_all()); print('RSS 抓取:', n, '条' if n else '(0 条,检查网络或关键词过滤)')\"", "build": "vite build", "typecheck": "tsc --noEmit", "lint": "eslint .", @@ -25,6 +27,7 @@ "react": "^18.3.1", "react-dom": "^18.3.1", "react-map-gl": "^7.1.7", + "react-router-dom": "^7.13.1", "ws": "^8.19.0", "zustand": "^5.0.0" }, diff --git a/server/data.db-shm b/server/data.db-shm index 5be23b9..49b8a41 100644 Binary files a/server/data.db-shm and b/server/data.db-shm differ diff --git a/server/data.db-wal b/server/data.db-wal index 5fac6c7..61b6075 100644 Binary files a/server/data.db-wal and b/server/data.db-wal differ diff --git a/server/db.js b/server/db.js index e9e8af6..63b924a 100644 --- a/server/db.js +++ b/server/db.js @@ -123,12 +123,28 @@ try { if (!names.includes('status')) db.exec('ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT "operational"') if (!names.includes('damage_level')) db.exec('ALTER TABLE key_location ADD COLUMN damage_level INTEGER') } catch (_) {} -// 迁移:combat_losses 添加平民伤亡 +// 迁移:combat_losses 添加平民伤亡、updated_at try { const lossCols = db.prepare('PRAGMA table_info(combat_losses)').all() const lossNames = lossCols.map((c) => c.name) if (!lossNames.includes('civilian_killed')) db.exec('ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0') if (!lossNames.includes('civilian_wounded')) db.exec('ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0') + if (!lossNames.includes('updated_at')) db.exec('ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))') } catch (_) {} +// 迁移:所有表添加 updated_at 用于数据回放 +const addUpdatedAt = (table) => { + try { + const cols = db.prepare(`PRAGMA table_info(${table})`).all() + if (!cols.some((c) => c.name === 'updated_at')) { + db.exec(`ALTER TABLE ${table} ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))`) + } + } catch (_) {} +} +addUpdatedAt('force_summary') +addUpdatedAt('power_index') +addUpdatedAt('force_asset') +addUpdatedAt('key_location') +addUpdatedAt('retaliation_current') + module.exports = db diff --git a/server/index.js b/server/index.js index dff9335..cd8638b 100644 --- a/server/index.js +++ b/server/index.js @@ -31,7 +31,7 @@ function broadcastSituation() { }) } catch (_) {} } -setInterval(broadcastSituation, 5000) +setInterval(broadcastSituation, 3000) // 供爬虫调用:更新 situation.updated_at 并立即广播 function notifyCrawlerUpdate() { diff --git a/server/routes.js b/server/routes.js index b4cab2a..05c07a2 100644 --- a/server/routes.js +++ b/server/routes.js @@ -1,8 +1,58 @@ const express = require('express') const { getSituation } = require('./situationData') +const db = require('./db') const router = express.Router() +// 数据库 Dashboard:返回各表原始数据 +router.get('/db/dashboard', (req, res) => { + try { + const tables = [ + 'situation', + 'force_summary', + 'power_index', + 'force_asset', + 'key_location', + 'combat_losses', + 'wall_street_trend', + 'retaliation_current', + 'retaliation_history', + 'situation_update', + 'gdelt_events', + 'conflict_stats', + ] + const data = {} + const timeSort = { + situation: 'updated_at DESC', + situation_update: 'timestamp DESC', + gdelt_events: 'event_time DESC', + wall_street_trend: 'time DESC', + retaliation_history: 'time DESC', + conflict_stats: 'updated_at DESC', + } + for (const name of tables) { + try { + const order = timeSort[name] + let rows + try { + rows = order + ? db.prepare(`SELECT * FROM ${name} ORDER BY ${order}`).all() + : db.prepare(`SELECT * FROM ${name}`).all() + } catch (qerr) { + rows = db.prepare(`SELECT * FROM ${name}`).all() + } + data[name] = rows + } catch (e) { + data[name] = { error: e.message } + } + } + res.json(data) + } catch (err) { + console.error(err) + res.status(500).json({ error: err.message }) + } +}) + router.get('/situation', (req, res) => { try { res.json(getSituation()) diff --git a/server/situationData.js b/server/situationData.js index 770f556..d23b54a 100644 --- a/server/situationData.js +++ b/server/situationData.js @@ -58,18 +58,23 @@ function getSituation() { if (statsRow) conflictStats = statsRow } catch (_) {} - // 根据爬虫 conflict_stats 实时合并平民伤亡估算(GDELT 数据) + // 平民伤亡:合计显示,不区分阵营 + const civUsK = lossesUs?.civilian_killed ?? 0 + const civUsW = lossesUs?.civilian_wounded ?? 0 + const civIrK = lossesIr?.civilian_killed ?? 0 + const civIrW = lossesIr?.civilian_wounded ?? 0 + const dbKilled = civUsK + civIrK + const dbWounded = civUsW + civIrW + const est = conflictStats.estimated_casualties || 0 + const civilianCasualtiesTotal = { + killed: est > 0 ? Math.max(dbKilled, est) : dbKilled, + wounded: dbWounded, + } + const usLossesBase = lossesUs ? toLosses(lossesUs) : defaultLosses const irLossesBase = lossesIr ? toLosses(lossesIr) : defaultLosses - const est = conflictStats.estimated_casualties || 0 - const mergeCivilian = (base, share) => { - if (est <= 0) return base.civilianCasualties || { killed: 0, wounded: 0 } - const gdeltKilled = Math.round(est * share) - const cur = base.civilianCasualties || { killed: 0, wounded: 0 } - return { killed: Math.max(cur.killed, gdeltKilled), wounded: cur.wounded } - } - const usLosses = { ...usLossesBase, civilianCasualties: mergeCivilian(usLossesBase, 0.35) } - const irLosses = { ...irLossesBase, civilianCasualties: mergeCivilian(irLossesBase, 0.65) } + const usLosses = { ...usLossesBase, civilianCasualties: { killed: 0, wounded: 0 } } + const irLosses = { ...irLossesBase, civilianCasualties: { killed: 0, wounded: 0 } } return { lastUpdated: meta?.updated_at || new Date().toISOString(), @@ -135,6 +140,7 @@ function getSituation() { url: e.url, })), conflictStats, + civilianCasualtiesTotal, } } diff --git a/src/App.tsx b/src/App.tsx index 5262267..eb7c87d 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,4 +1,6 @@ +import { Routes, Route } from 'react-router-dom' import { Dashboard } from '@/pages/Dashboard' +import { DbDashboard } from '@/pages/DbDashboard' function App() { return ( @@ -6,7 +8,10 @@ function App() { className="min-h-screen w-full bg-military-dark overflow-hidden" style={{ background: '#0A0F1C' }} > - + + } /> + } /> + ) } diff --git a/src/api/situation.ts b/src/api/situation.ts index 8905775..e68ef83 100644 --- a/src/api/situation.ts +++ b/src/api/situation.ts @@ -1,7 +1,7 @@ import type { MilitarySituation } from '@/data/mockData' export async function fetchSituation(): Promise { - const res = await fetch('/api/situation') + const res = await fetch(`/api/situation?t=${Date.now()}`, { cache: 'no-store' }) if (!res.ok) throw new Error(`API error: ${res.status}`) return res.json() } diff --git a/src/components/CombatLossesPanel.tsx b/src/components/CombatLossesPanel.tsx index a693ccd..7c9d66a 100644 --- a/src/components/CombatLossesPanel.tsx +++ b/src/components/CombatLossesPanel.tsx @@ -17,16 +17,15 @@ interface CombatLossesPanelProps { usLosses: CombatLosses iranLosses: CombatLosses conflictStats?: ConflictStats | null + /** 平民伤亡合计(不区分阵营) */ + civilianTotal?: { killed: number; wounded: number } className?: string } -export function CombatLossesPanel({ usLosses, iranLosses, conflictStats, className = '' }: CombatLossesPanelProps) { - const civUs = usLosses.civilianCasualties ?? { killed: 0, wounded: 0 } - const civIr = iranLosses.civilianCasualties ?? { killed: 0, wounded: 0 } - const civTotal = { killed: (civUs.killed ?? 0) + (civIr.killed ?? 0), wounded: (civUs.wounded ?? 0) + (civIr.wounded ?? 0) } +export function CombatLossesPanel({ usLosses, iranLosses, conflictStats, civilianTotal, className = '' }: CombatLossesPanelProps) { + const civ = civilianTotal ?? { killed: 0, wounded: 0 } const otherRows = [ - { label: '平民', icon: UserCircle, iconColor: 'text-amber-400', value: `${formatMillions(civTotal.killed)} / ${formatMillions(civTotal.wounded)}`, noSide: true }, { label: '基地', icon: Building2, iconColor: 'text-amber-500', us: `${usLosses.bases.destroyed}/${usLosses.bases.damaged}`, ir: `${iranLosses.bases.destroyed}/${iranLosses.bases.damaged}` }, { label: '战机', icon: Plane, iconColor: 'text-sky-400', us: usLosses.aircraft, ir: iranLosses.aircraft }, { label: '战舰', icon: Ship, iconColor: 'text-blue-500', us: usLosses.warships, ir: iranLosses.warships }, @@ -70,6 +69,25 @@ export function CombatLossesPanel({ usLosses, iranLosses, conflictStats, classNa + {/* 平民伤亡:合计显示,不区分阵营 */} +
+
+ + 平民伤亡(合计) +
+
+ + + {formatMillions(civ.killed)} + + / + + + {formatMillions(civ.wounded)} + +
+
+ {/* 其它 - 标签+图标+数字,单独容器 */}
美:伊
diff --git a/src/components/EventTimelinePanel.tsx b/src/components/EventTimelinePanel.tsx index 26533a5..d00ae79 100644 --- a/src/components/EventTimelinePanel.tsx +++ b/src/components/EventTimelinePanel.tsx @@ -1,5 +1,7 @@ +import * as React from 'react' import type { SituationUpdate, ConflictEvent } from '@/data/mockData' -import { History } from 'lucide-react' +import { History, RefreshCw } from 'lucide-react' +import { fetchAndSetSituation } from '@/store/situationStore' interface EventTimelinePanelProps { updates: SituationUpdate[] @@ -29,6 +31,11 @@ type TimelineItem = { } export function EventTimelinePanel({ updates = [], conflictEvents = [], className = '' }: EventTimelinePanelProps) { + const [refreshing, setRefreshing] = React.useState(false) + const handleRefresh = React.useCallback(async () => { + setRefreshing(true) + await fetchAndSetSituation().finally(() => setRefreshing(false)) + }, []) // 合并 GDELT + RSS,按时间倒序(最新在前) const merged: TimelineItem[] = [ ...(conflictEvents || []).map((e) => ({ @@ -49,7 +56,7 @@ export function EventTimelinePanel({ updates = [], conflictEvents = [], classNam })), ] .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()) - .slice(0, 6) + .slice(0, 8) return (
@@ -58,7 +65,16 @@ export function EventTimelinePanel({ updates = [], conflictEvents = [], classNam 事件脉络 - GDELT · Reuters · BBC · Al Jazeera · NYT +
{merged.length === 0 ? ( diff --git a/src/components/HeaderPanel.tsx b/src/components/HeaderPanel.tsx index ce85945..67e491a 100644 --- a/src/components/HeaderPanel.tsx +++ b/src/components/HeaderPanel.tsx @@ -1,9 +1,10 @@ import { useState, useEffect } from 'react' +import { Link } from 'react-router-dom' import { StatCard } from './StatCard' import { useSituationStore } from '@/store/situationStore' import { useReplaySituation } from '@/hooks/useReplaySituation' import { usePlaybackStore } from '@/store/playbackStore' -import { Wifi, WifiOff, Clock } from 'lucide-react' +import { Wifi, WifiOff, Clock, Database } from 'lucide-react' export function HeaderPanel() { const situation = useReplaySituation() @@ -58,7 +59,14 @@ export function HeaderPanel() { )}
-
+
+ + + 数据库 + {isConnected ? ( <> diff --git a/src/data/mockData.ts b/src/data/mockData.ts index 6e7885e..e0c9ae0 100644 --- a/src/data/mockData.ts +++ b/src/data/mockData.ts @@ -109,6 +109,8 @@ export interface MilitarySituation { conflictEvents?: ConflictEvent[] /** 战损统计(展示用) */ conflictStats?: ConflictStats + /** 平民伤亡合计(不区分阵营) */ + civilianCasualtiesTotal?: { killed: number; wounded: number } } export const INITIAL_MOCK_DATA: MilitarySituation = { @@ -246,4 +248,5 @@ export const INITIAL_MOCK_DATA: MilitarySituation = { ], conflictEvents: [], conflictStats: { total_events: 0, high_impact_events: 0, estimated_casualties: 0, estimated_strike_count: 0 }, + civilianCasualtiesTotal: { killed: 430, wounded: 1255 }, } diff --git a/src/hooks/useReplaySituation.ts b/src/hooks/useReplaySituation.ts index 5ab9095..c73dcc9 100644 --- a/src/hooks/useReplaySituation.ts +++ b/src/hooks/useReplaySituation.ts @@ -61,8 +61,7 @@ export function useReplaySituation(): MilitarySituation { const lerp = (a: number, b: number) => Math.round(a + progress * (b - a)) const usLoss = situation.usForces.combatLosses const irLoss = situation.iranForces.combatLosses - const civUs = usLoss.civilianCasualties ?? { killed: 0, wounded: 0 } - const civIr = irLoss.civilianCasualties ?? { killed: 0, wounded: 0 } + const civTotal = situation.civilianCasualtiesTotal ?? { killed: 0, wounded: 0 } const usLossesAt = { bases: { destroyed: lerp(0, usLoss.bases.destroyed), @@ -72,7 +71,7 @@ export function useReplaySituation(): MilitarySituation { killed: lerp(0, usLoss.personnelCasualties.killed), wounded: lerp(0, usLoss.personnelCasualties.wounded), }, - civilianCasualties: { killed: lerp(0, civUs.killed), wounded: lerp(0, civUs.wounded) }, + civilianCasualties: { killed: 0, wounded: 0 }, aircraft: lerp(0, usLoss.aircraft), warships: lerp(0, usLoss.warships), armor: lerp(0, usLoss.armor), @@ -87,7 +86,7 @@ export function useReplaySituation(): MilitarySituation { killed: lerp(0, irLoss.personnelCasualties.killed), wounded: lerp(0, irLoss.personnelCasualties.wounded), }, - civilianCasualties: { killed: lerp(0, civIr.killed), wounded: lerp(0, civIr.wounded) }, + civilianCasualties: { killed: 0, wounded: 0 }, aircraft: lerp(0, irLoss.aircraft), warships: lerp(0, irLoss.warships), armor: lerp(0, irLoss.armor), @@ -115,6 +114,10 @@ export function useReplaySituation(): MilitarySituation { return { ...situation, lastUpdated: playbackTime, + civilianCasualtiesTotal: { + killed: lerp(0, civTotal.killed), + wounded: lerp(0, civTotal.wounded), + }, usForces: { ...situation.usForces, keyLocations: usLocsAt, diff --git a/src/index.css b/src/index.css index 1dcaed8..b1607aa 100644 --- a/src/index.css +++ b/src/index.css @@ -31,6 +31,11 @@ body, font-family: 'Orbitron', sans-serif; } +/* 数据库面板:易读字体 */ +.font-db { + font-family: 'Noto Sans SC', system-ui, -apple-system, sans-serif; +} + /* Tabular numbers for aligned stat display */ .tabular-nums { font-variant-numeric: tabular-nums; diff --git a/src/main.tsx b/src/main.tsx index 234dd89..6610f4b 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -1,10 +1,13 @@ import { StrictMode } from 'react' import { createRoot } from 'react-dom/client' +import { BrowserRouter } from 'react-router-dom' import App from './App.tsx' import './index.css' createRoot(document.getElementById('root')!).render( - + + + ) diff --git a/src/pages/Dashboard.tsx b/src/pages/Dashboard.tsx index e3d9779..72eea8c 100644 --- a/src/pages/Dashboard.tsx +++ b/src/pages/Dashboard.tsx @@ -68,6 +68,7 @@ export function Dashboard() { usLosses={situation.usForces.combatLosses} iranLosses={situation.iranForces.combatLosses} conflictStats={situation.conflictStats} + civilianTotal={situation.civilianCasualtiesTotal} className="min-w-0 flex-1 py-1" /> diff --git a/src/pages/DbDashboard.tsx b/src/pages/DbDashboard.tsx new file mode 100644 index 0000000..1c4f8d7 --- /dev/null +++ b/src/pages/DbDashboard.tsx @@ -0,0 +1,161 @@ +import { useEffect, useState } from 'react' +import { Database, Table, ArrowLeft, RefreshCw } from 'lucide-react' +import { Link } from 'react-router-dom' + +interface TableData { + [table: string]: Record[] | { error: string } +} + +export function DbDashboard() { + const [data, setData] = useState(null) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + const [expanded, setExpanded] = useState>(new Set(['situation_update', 'combat_losses', 'conflict_stats'])) + + useEffect(() => { + fetchData() + const t = setInterval(fetchData, 30000) + return () => clearInterval(t) + }, []) + + const fetchData = async () => { + setLoading(true) + setError(null) + try { + const res = await fetch('/api/db/dashboard') + if (!res.ok) throw new Error(res.statusText) + const json = await res.json() + setData(json) + } catch (e) { + setError(e instanceof Error ? e.message : '加载失败') + } finally { + setLoading(false) + } + } + + const toggle = (name: string) => { + setExpanded((s) => { + const next = new Set(s) + if (next.has(name)) next.delete(name) + else next.add(name) + return next + }) + } + + if (loading && !data) { + return ( +
+ +
+ ) + } + + return ( +
+
+
+ + + 返回主面板 + + + + 数据库内容 + +
+ +
+ + {error && ( +
+ {error}(请确保 API 已启动:npm run api) +
+ )} + +
+ {data && + Object.entries(data).map(([name, rows]) => { + const isExpanded = expanded.has(name) + const isError = rows && typeof rows === 'object' && 'error' in rows + const arr = Array.isArray(rows) ? rows : [] + return ( +
+
+ )} + + ) + })} + +
+ ) +} diff --git a/src/store/situationStore.ts b/src/store/situationStore.ts index a3c2d00..902dcc4 100644 --- a/src/store/situationStore.ts +++ b/src/store/situationStore.ts @@ -47,20 +47,36 @@ export function fetchAndSetSituation(): Promise { } let disconnectWs: (() => void) | null = null +let pollInterval: ReturnType | null = null + +const POLL_INTERVAL_MS = 5000 + +function pollSituation() { + fetchSituation() + .then((situation) => useSituationStore.getState().setSituation(situation)) + .catch(() => {}) +} export function startSituationWebSocket(): () => void { - useSituationStore.getState().setConnected(true) useSituationStore.getState().setLastError(null) disconnectWs = connectSituationWebSocket((data) => { + useSituationStore.getState().setConnected(true) useSituationStore.getState().setSituation(data as MilitarySituation) }) + pollSituation() + pollInterval = setInterval(pollSituation, POLL_INTERVAL_MS) + return stopSituationWebSocket } export function stopSituationWebSocket(): void { disconnectWs?.() disconnectWs = null + if (pollInterval) { + clearInterval(pollInterval) + pollInterval = null + } useSituationStore.getState().setConnected(false) } diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..7ed6c5b --- /dev/null +++ b/start.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# 一键启动 US-Iran 态势面板:API + 前端 + 爬虫服务 +set -e +cd "$(dirname "$0")" + +# 无 Ollama 时禁用 AI;GDELT 国内常超时,仅用 RSS 更新 +export CLEANER_AI_DISABLED=1 +export PARSER_AI_DISABLED=1 +export GDELT_DISABLED=1 +export RSS_INTERVAL_SEC=60 + +echo "==> Checking dependencies..." +[ ! -d node_modules ] && npm install + +echo "==> Checking crawler Python deps..." +pip install -q -r crawler/requirements.txt 2>/dev/null || true + +echo "==> Seeding database (if needed)..." +[ ! -f server/data.db ] && npm run api:seed + +echo "==> Starting API (http://localhost:3001)..." +npm run api & +API_PID=$! + +# 等待 API 就绪后再启动爬虫 +sleep 2 + +echo "==> Starting GDELT/RSS crawler (http://localhost:8000)..." +npm run gdelt & +GDELT_PID=$! + +echo "==> Starting frontend (Vite dev server)..." +npm run dev & +DEV_PID=$! + +cleanup() { + echo "" + echo "==> Shutting down..." + kill $API_PID $GDELT_PID $DEV_PID 2>/dev/null || true + exit 0 +} +trap cleanup SIGINT SIGTERM + +echo "" +echo "==> All services running. Frontend: http://localhost:5173 | API: http://localhost:3001" +echo " Press Ctrl+C to stop all." +echo "" +wait