fix:修复启动文件

fix:优化爬虫配置，单独使用docker容器运行
fix:修复依赖文件报错
2026-03-05 20:22:15 +08:00 · 2026-03-05 20:19:24 +08:00 · 2026-03-05 20:00:15 +08:00 · 2026-03-05 19:53:05 +08:00 · 2026-03-05 19:18:45 +08:00 · 2026-03-05 16:11:33 +08:00
141 changed files with 12038 additions and 987 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+node_modules
+.git
+.env
+.env.local
+*.log
+dist
+server/data.db
+.DS_Store
+*.md
+.cursor
+.venv
+__pycache__
+*.pyc
--- a/.env
+++ b/.env
@@ -0,0 +1,3 @@
+# Mapbox 地图令牌
+VITE_MAPBOX_ACCESS_TOKEN=pk.eyJ1IjoiZDI5cTAiLCJhIjoiY21tYWQyOXI3MGFrZzJwcjJmZGltODI4ZCJ9.0jW_aK91VJExw6ffKGqWIA
+DASHSCOPE_API_KEY=sk-029a4c4d761d49b99cfe6073234ac443
--- a/.env.example
+++ b/.env.example
@@ -1,46 +1,8 @@
-# Mapbox 地图令牌 (波斯湾区域展示)
+# Mapbox 地图令牌（仅在此或 .env 中配置，勿写进源码；若曾泄漏请到 Mapbox 控制台轮换）
 # 免费申请: https://account.mapbox.com/access-tokens/
-# 复制本文件为 .env 并填入你的 token
 VITE_MAPBOX_ACCESS_TOKEN=your_mapbox_public_token_here
-27 个基地完整 JSON 数据
-[
-  { "id": 1, "name": "Al Udeid Air Base", "country": "Qatar", "lat": 25.117, "lng": 51.314 },
-  { "id": 2, "name": "Camp As Sayliyah", "country": "Qatar", "lat": 25.275, "lng": 51.520 },

-  { "id": 3, "name": "Naval Support Activity Bahrain", "country": "Bahrain", "lat": 26.236, "lng": 50.608 },
-
-  { "id": 4, "name": "Camp Arifjan", "country": "Kuwait", "lat": 28.832, "lng": 47.799 },
-  { "id": 5, "name": "Ali Al Salem Air Base", "country": "Kuwait", "lat": 29.346, "lng": 47.520 },
-  { "id": 6, "name": "Camp Buehring", "country": "Kuwait", "lat": 29.603, "lng": 47.456 },
-
-  { "id": 7, "name": "Al Dhafra Air Base", "country": "UAE", "lat": 24.248, "lng": 54.547 },
-
-  { "id": 8, "name": "Prince Sultan Air Base", "country": "Saudi Arabia", "lat": 24.062, "lng": 47.580 },
-  { "id": 9, "name": "Eskan Village", "country": "Saudi Arabia", "lat": 24.774, "lng": 46.738 },
-
-  { "id": 10, "name": "Al Asad Airbase", "country": "Iraq", "lat": 33.785, "lng": 42.441 },
-  { "id": 11, "name": "Erbil Air Base", "country": "Iraq", "lat": 36.237, "lng": 43.963 },
-  { "id": 12, "name": "Baghdad Diplomatic Support Center", "country": "Iraq", "lat": 33.315, "lng": 44.366 },
-  { "id": 13, "name": "Camp Taji", "country": "Iraq", "lat": 33.556, "lng": 44.256 },
-  { "id": 14, "name": "Ain al-Asad", "country": "Iraq", "lat": 33.800, "lng": 42.450 },
-
-  { "id": 15, "name": "Al-Tanf Garrison", "country": "Syria", "lat": 33.490, "lng": 38.618 },
-  { "id": 16, "name": "Rmelan Landing Zone", "country": "Syria", "lat": 37.015, "lng": 41.885 },
-  { "id": 17, "name": "Shaddadi Base", "country": "Syria", "lat": 36.058, "lng": 40.730 },
-  { "id": 18, "name": "Conoco Gas Field Base", "country": "Syria", "lat": 35.336, "lng": 40.295 },
-
-  { "id": 19, "name": "Muwaffaq Salti Air Base", "country": "Jordan", "lat": 32.356, "lng": 36.259 },
-
-  { "id": 20, "name": "Incirlik Air Base", "country": "Turkey", "lat": 37.002, "lng": 35.425 },
-  { "id": 21, "name": "Kurecik Radar Station", "country": "Turkey", "lat": 38.354, "lng": 37.794 },
-
-  { "id": 22, "name": "Nevatim Air Base", "country": "Israel", "lat": 31.208, "lng": 35.012 },
-  { "id": 23, "name": "Ramon Air Base", "country": "Israel", "lat": 30.776, "lng": 34.666 },
-
-  { "id": 24, "name": "Thumrait Air Base", "country": "Oman", "lat": 17.666, "lng": 54.024 },
-  { "id": 25, "name": "Masirah Air Base", "country": "Oman", "lat": 20.675, "lng": 58.890 },
-
-  { "id": 26, "name": "West Cairo Air Base", "country": "Egypt", "lat": 30.915, "lng": 30.298 },
-
-  { "id": 27, "name": "Camp Lemonnier", "country": "Djibouti", "lat": 11.547, "lng": 43.159 }
-]
+# 阿里云 DashScope API Key（爬虫 AI 提取用，不设则用规则或 Ollama）
+# 在 crawler 目录或系统环境变量中设置，例如:
+# export DASHSCOPE_API_KEY=sk-xxx
+DASHSCOPE_API_KEY=
--- a/.env的副本
+++ b/.env的副本
@@ -0,0 +1,3 @@
+# Mapbox 地图令牌
+VITE_MAPBOX_ACCESS_TOKEN=pk.eyJ1IjoiZDI5cTAiLCJhIjoiY21tYWQyOXI3MGFrZzJwcjJmZGltODI4ZCJ9.0jW_aK91VJExw6ffKGqWIA
+DASHSCOPE_API_KEY=sk-029a4c4d761d49b99cfe6073234ac443
--- a/.gitignore
+++ b/.gitignore
@@ -23,10 +23,11 @@ dist-ssr
 *.sln
 *.sw?

-# API database
+# API database（SQLite 文件，部署时应挂载卷持久化，勿提交）
 server/data.db

-# Env
+# Env（含 token，勿提交）
 .env
 .env.local
 .env.*.local
+.pyc
--- a/=1.11.0
+++ b/=1.11.0
--- a/DEPLOY.md
+++ b/DEPLOY.md
@@ -0,0 +1,168 @@
+# Docker 部署到服务器
+
+将 US-Iran 态势面板打包成 Docker 镜像，便于移植到任意服务器。
+
+## 架构
+
+| 服务   | 端口 | 说明                     |
+|--------|------|--------------------------|
+| api    | 3001 | 前端静态 + REST API + WebSocket |
+| crawler| 8000 | RSS 爬虫 + GDELT，内部服务      |
+
+- 数据库：SQLite，挂载到 `app-data` volume（`/data/data.db`）
+- 前端与 API 合并到同一镜像，构建时执行 `npm run build` 生成 dist（含修订页 `/edit`），访问 `http://主机:3001` 即可
+
+## 快速部署
+
+```bash
+# 1. 克隆项目
+git clone <repo> usa-dashboard && cd usa-dashboard
+
+# 2. 构建并启动（需先配置 Mapbox Token，见下方）
+docker compose up -d --build
+
+# 3. 访问
+# 前端 + API: http://localhost:3001
+# 爬虫状态:   http://localhost:8000/crawler/status
+```
+
+## Mapbox Token（地图展示）
+
+构建时需将 Token 传入前端，否则地图为占位模式：
+
+```bash
+# 方式 1：.env 文件
+echo "VITE_MAPBOX_ACCESS_TOKEN=pk.xxx" > .env
+docker compose up -d --build
+
+# 方式 2：环境变量
+VITE_MAPBOX_ACCESS_TOKEN=pk.xxx docker compose up -d --build
+```
+
+## 推送到私有仓库并移植
+
+```bash
+# 1. 打标签（替换为你的仓库地址）
+docker compose build
+docker tag usa-dashboard-api your-registry/usa-dashboard-api:latest
+docker tag usa-dashboard-crawler your-registry/usa-dashboard-crawler:latest
+
+# 2. 推送
+docker push your-registry/usa-dashboard-api:latest
+docker push your-registry/usa-dashboard-crawler:latest
+
+# 3. 在目标服务器拉取并启动
+docker pull your-registry/usa-dashboard-api:latest
+docker pull your-registry/usa-dashboard-crawler:latest
+# 需准备 docker-compose.yml 或等效编排，见下方
+```
+
+## 仅用镜像启动（无 compose）
+
+```bash
+# 1. 创建网络与数据卷
+docker network create usa-net
+docker volume create usa-data
+
+# 2. 启动 API（前端+接口）
+docker run -d --name api --network usa-net \
+  -p 3001:3001 \
+  -v usa-data:/data \
+  -e DB_PATH=/data/data.db \
+  usa-dashboard-api
+
+# 3. 启动爬虫（通过 usa-net 访问 api）
+docker run -d --name crawler --network usa-net \
+  -v usa-data:/data \
+  -e DB_PATH=/data/data.db \
+  -e API_BASE=http://api:3001 \
+  -e CLEANER_AI_DISABLED=1 \
+  -e GDELT_DISABLED=1 \
+  usa-dashboard-crawler
+```
+
+爬虫通过 `API_BASE` 调用 Node 的 `/api/crawler/notify`，两容器需在同一网络内。
+
+## 国内服务器 / 镜像加速
+
+拉取 `node`、`python` 等基础镜像慢时：
+
+1. **Docker 镜像加速**：见 [docs/DOCKER_MIRROR.md](docs/DOCKER_MIRROR.md)
+2. **构建时使用国内镜像源**：
+   ```bash
+   docker compose build --build-arg REGISTRY=docker.m.daocloud.io/library/
+   docker compose up -d
+   ```
+
+## 常用操作
+
+```bash
+# 查看日志
+docker compose logs -f
+
+# 重启
+docker compose restart
+
+# 停止并删除容器（数据卷保留）
+docker compose down
+
+# 回填战损数据（从 situation_update 重新提取）
+curl -X POST http://localhost:8000/crawler/backfill
+```
+
+## 服务器直接部署（不用 Docker）
+
+若在服务器上直接跑 Node（不用 Docker），要能访问修订页 `/edit`，需保证：
+
+1. **先构建、再启动**：在项目根目录执行 `npm run build`，再启动 API（如 `npm run api` 或 `node server/index.js`）。  
+   未构建时没有 `dist` 目录，启动会打日志：`dist 目录不存在，前端页面（含 /edit 修订页）不可用`。
+
+2. **若前面有 Nginx**：`curl http://127.0.0.1:3001/edit` 已是 200 但浏览器访问 `/edit` 仍 404，说明 Nginx 没有把前端路由交给后端或没做 SPA fallback。二选一即可：
+
+   **方式 A：Nginx 只反代，所有页面由 Node 提供（推荐）**
+   ```nginx
+   server {
+     listen 80;
+     server_name 你的域名;
+     location / {
+       proxy_pass http://127.0.0.1:3001;
+       proxy_http_version 1.1;
+       proxy_set_header Host $host;
+       proxy_set_header X-Real-IP $remote_addr;
+       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+       proxy_set_header X-Forwarded-Proto $scheme;
+     }
+     location /ws {
+       proxy_pass http://127.0.0.1:3001;
+       proxy_http_version 1.1;
+       proxy_set_header Upgrade $http_upgrade;
+       proxy_set_header Connection "upgrade";
+     }
+   }
+   ```
+
+   **方式 B：Nginx 提供 dist 静态，仅 /api、/ws 反代**
+   ```nginx
+   server {
+     listen 80;
+     server_name 你的域名;
+     root /path/to/项目根目录/dist;   # 改成实际路径
+     index index.html;
+     location / {
+       try_files $uri $uri/ /index.html;
+     }
+     location /api {
+       proxy_pass http://127.0.0.1:3001;
+       proxy_set_header Host $host;
+       proxy_set_header X-Real-IP $remote_addr;
+     }
+     location /ws {
+       proxy_pass http://127.0.0.1:3001;
+       proxy_http_version 1.1;
+       proxy_set_header Upgrade $http_upgrade;
+       proxy_set_header Connection "upgrade";
+     }
+   }
+   ```
+
+   修改后执行 `sudo nginx -t` 检查配置，再 `sudo systemctl reload nginx`（或 `sudo nginx -s reload`）。
--- a/41
+++ b/41
@@ -0,0 +1,41 @@
+# 前端 + 后端合一镜像：构建阶段产出 dist，运行阶段提供静态与 API（含修订页 /edit）
+# 国内服务器拉取慢时，可加 --build-arg REGISTRY=docker.m.daocloud.io/library
+ARG REGISTRY=
+
+# ---------- 阶段 1：构建前端 ----------
+FROM ${REGISTRY}node:20-slim AS frontend-builder
+WORKDIR /app
+RUN npm config set registry https://registry.npmmirror.com
+COPY package*.json ./
+RUN npm ci
+COPY vite.config.ts index.html tsconfig.json tsconfig.app.json ./
+COPY postcss.config.js tailwind.config.js ./
+COPY src ./src
+RUN npm run build
+
+# ---------- 阶段 2：运行（API + 静态） ----------
+FROM ${REGISTRY}node:20-slim
+
+RUN npm config set registry https://registry.npmmirror.com
+
+RUN rm -f /etc/apt/sources.list.d/debian.sources && \
+    echo 'deb http://mirrors.aliyun.com/debian bookworm main' > /etc/apt/sources.list && \
+    echo 'deb http://mirrors.aliyun.com/debian bookworm-updates main' >> /etc/apt/sources.list && \
+    echo 'deb http://mirrors.aliyun.com/debian-security bookworm-security main' >> /etc/apt/sources.list && \
+    apt-get update && apt-get install -y --no-install-recommends python3 make g++ && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci --omit=dev
+COPY server ./server
+COPY --from=frontend-builder /app/dist ./dist
+
+ENV NODE_ENV=production
+ENV API_PORT=3001
+ENV DB_PATH=/data/data.db
+EXPOSE 3001
+
+COPY docker-entrypoint.sh ./
+RUN chmod +x docker-entrypoint.sh
+ENTRYPOINT ["./docker-entrypoint.sh"]
--- a/Dockerfile.crawler
+++ b/Dockerfile.crawler
@@ -0,0 +1,18 @@
+# Python 3.11+ 爬虫服务（与 requirements.txt / pyproject.toml 一致）
+# 国内服务器可加 --build-arg REGISTRY=docker.m.daocloud.io/library
+ARG REGISTRY=
+FROM ${REGISTRY}python:3.11-slim
+
+WORKDIR /app
+COPY crawler/requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY crawler ./
+
+ENV DB_PATH=/data/data.db
+ENV API_BASE=http://api:3001
+ENV GDELT_DISABLED=1
+ENV RSS_INTERVAL_SEC=60
+
+EXPOSE 8000
+
+CMD ["uvicorn", "realtime_conflict_service:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -41,7 +41,13 @@ npm run api:seed
 npm run api
 ```

-开发时需同时运行前端与 API：
+开发时可用一键启动（推荐）：
+
+```bash
+npm start
+```
+
+或分终端分别运行：

 ```bash
 # 终端 1
@@ -53,6 +59,19 @@ npm run dev

 API 会由 Vite 代理到 `/api`，前端通过 `/api/situation` 获取完整态势数据。数据库文件位于 `server/data.db`，可通过修改表数据实现动态调整。

+### 爬虫不生效时
+
+1. 测试 RSS 抓取：`npm run crawler:test`（需网络，返回抓取条数）
+2. 单独启动爬虫查看日志：`npm run gdelt`（另开终端）
+3. 查看爬虫状态：`curl http://localhost:8000/crawler/status`（需爬虫服务已启动）
+4. 数据库面板 `/db` 每 30 秒自动刷新，可观察 situation_update 条数是否增加
+
+### 面板数据 / 地图 / 战损不更新时
+
+- **确保 API 与爬虫共用同一数据库**：本地开发时，Node 默认用 `server/data.db`，爬虫默认用 `../server/data.db`（同文件）。若 Node 在本地、爬虫在 Docker，则数据库不同，面板不会更新。
+- **Docker 部署**：`GDELT_DISABLED=1` 时，地图冲突点由 RSS 新闻填充；战损与基地状态由规则/AI 提取后写入 `combat_losses` 和 `key_location`。
+- **排查**：访问 `/db` 看 `situation_update`、`gdelt_events`、`combat_losses` 是否在增长；确认 API 已启动且前端能访问 `/api/situation`。
+
 ## Development

 ```bash
@@ -65,6 +84,37 @@ npm run dev
 npm run build
 ```

+## Docker 部署
+
+```bash
+# 构建并启动（需 .env 中配置 VITE_MAPBOX_ACCESS_TOKEN 以启用地图）
+docker compose up -d
+
+# 访问前端：http://localhost:3001
+# 数据库与爬虫共享 volume，首次启动自动 seed
+```
+
+**迁移到服务器**：见 [DEPLOY.md](DEPLOY.md)（构建、推送、单机/多机部署说明）
+
+**拉取镜像超时？** 在 Docker Desktop 配置镜像加速，见 [docs/DOCKER_MIRROR.md](docs/DOCKER_MIRROR.md)
+
+**开发时无需每次 rebuild**：使用开发模式挂载源码 + 热重载：
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+```
+
+- API：`node --watch` 监听 `server/` 变更并自动重启
+- 爬虫：`uvicorn --reload` 监听 `crawler/` 变更并自动重启
+- 修改 `server/` 或 `crawler/` 后，服务会自动重载，无需重新 build
+
+环境变量（可选，在 .env 或 docker-compose.yml 中配置）：
+
+- `VITE_MAPBOX_ACCESS_TOKEN`：Mapbox 令牌，构建时注入
+- `DB_PATH`：数据库路径（默认 /data/data.db）
+- `CLEANER_AI_DISABLED=1`：爬虫默认禁用 Ollama
+- `GDELT_DISABLED=1`：爬虫默认禁用 GDELT（国内易超时）
+
 ## Project Structure

 ```
@@ -91,3 +141,6 @@ server/
 ├── seed.js     # 数据库种子脚本
 └── data.db     # SQLite 数据库（运行 seed 后生成）
 ```
+
+
+https://git.bimwe.com/Daniel/usa.git
--- a/crawler/README.md
+++ b/crawler/README.md
@@ -0,0 +1,459 @@
+# GDELT 实时冲突服务 + 新闻爬虫
+
+## 数据来源梳理
+
+### 1. GDELT Project (gdelt_events)
+
+| 项目 | 说明 |
+|------|------|
+| API | `https://api.gdeltproject.org/api/v2/doc/doc` |
+| 查询 | `query=United States Iran military`（可配 `GDELT_QUERY`） |
+| 模式 | `mode=ArtList`，`format=json`，`maxrecords=30` |
+| 时间范围 | **未指定时默认最近 3 个月**，按相关性排序，易返回较旧文章 |
+| 更新频率 | GDELT 约 15 分钟级，爬虫 60 秒拉一次 |
+
+**数据偏老原因**：未传 `timespan` 和 `sort=datedesc`，API 返回 3 个月内“最相关”文章，不保证最新。
+
+### 2. RSS 新闻 → 看板实时数据（主输出）+ 事件脉络
+
+| 项目 | 说明 |
+|------|------|
+| **主输出** | **看板实时数据**：战损（combat_losses）、据点状态（key_location）、冲突事件（gdelt_events）、统计（conflict_stats）等，供前端战损/基地/地图等面板展示。 |
+| 辅助输出 | 事件脉络（situation_update）：时间线摘要，非主展示目标。 |
+| 源 | 多国主流媒体：美/英/法/俄/中/伊/卡塔尔等（见 `config.RSS_FEEDS`） |
+| 过滤 | 标题/摘要需含 `KEYWORDS` 之一（iran、usa、strike、military 等） |
+| 更新 | 爬虫按 `RSS_INTERVAL_SEC` 拉取；每 `BACKFILL_CYCLES` 轮会从近期事件回填一次战损/据点，保证面板数据与最新内容一致。 |
+
+**GDELT 无法访问时**：设置 `GDELT_DISABLED=1`，仅用 RSS；部分境外源可能需代理。
+
+### 3. AI 新闻清洗与分类（可选）
+
+- **清洗**：`cleaner_ai.py` 用 Ollama 提炼新闻为简洁摘要，供面板展示
+- **分类**：`parser_ai.py` 用 Ollama 替代规则做 category/severity 判定
+- 需先安装并运行 Ollama：`ollama run llama3.1`
+- 环境变量：`OLLAMA_MODEL=llama3.1`、`PARSER_AI_DISABLED=1`、`CLEANER_AI_DISABLED=1`（禁用对应 AI）
+
+---
+
+**看板实时数据更新**：爬虫抓取 → 提取战损/据点等 → 写入 combat_losses、key_location 等 → 调用 Node 通知 → WebSocket 广播 → 前端战损/基地/地图等面板刷新。事件脉络（时间线）为同一流水线的辅助输出。
+
+## 依赖
+
+- **Python 3.11+**（推荐 3.11 或 3.12）
+
+```bash
+pip install -r requirements.txt
+```
+
+或使用 pyproject：`pip install -e crawler/`（在项目根目录）。
+
+- `deep-translator`：GDELT 与 RSS 新闻入库前自动翻译为中文。
+- `dashscope`：可选，配置 `DASHSCOPE_API_KEY` 后启用通义提取/清洗。
+
+## 运行（需同时启动 3 个服务）
+
+| 终端 | 命令 | 说明 |
+|------|------|------|
+| 1 | `npm run api` | Node API + WebSocket（必须） |
+| 2 | `npm run gdelt` | GDELT + RSS 爬虫（**事件脉络数据来源**） |
+| 3 | `npm run dev` | 前端开发 |
+
+**事件脉络不更新时**：多半是未启动 `npm run gdelt`。只跑 `npm run api` 时，事件脉络会显示空或仅有缓存。
+
+## 如何检查爬虫是否工作正常
+
+按下面顺序做即可确认整条链路（爬虫 → 数据库 → Node 重载 → API/WebSocket）正常。
+
+### 1. 一键验证（推荐）
+
+先启动 API，再执行验证脚本（可选是否顺带启动爬虫）：
+
+```bash
+# 终端 1：必须
+npm run api
+
+# 终端 2：执行验证（不启动爬虫，只检查当前状态）
+./scripts/verify-pipeline.sh
+
+# 或：顺带启动爬虫并等首次抓取后再验证
+./scripts/verify-pipeline.sh --start-crawler
+```
+
+脚本会检查：API 健康、态势数据含 `lastUpdated`、爬虫服务是否可达、`news_content`/situation_update、战损字段、`POST /api/crawler/notify` 是否可用。
+
+### 2. 手动快速检查
+
+| 步骤 | 命令 / 操作 | 正常表现 |
+|-----|-------------|----------|
+| API 是否在跑 | `curl -s http://localhost:3001/api/health` | 返回 `{"ok":true}` |
+| 态势是否可读 | `curl -s http://localhost:3001/api/situation \| head -c 300` | 含 `lastUpdated`、`usForces`、`recentUpdates` |
+| RSS 能否抓到 | `npm run crawler:test` | 输出「RSS 抓取: N 条」，N>0 表示有命中 |
+| 爬虫服务（gdelt） | `curl -s http://localhost:8000/crawler/status` | 返回 JSON，含 `db_path`/`db_exists` 等 |
+| 库里有无爬虫数据 | `sqlite3 server/data.db "SELECT COUNT(*) FROM situation_update; SELECT COUNT(*) FROM news_content;"` 或访问 `http://localhost:3001/api/db/dashboard` | situation_update、news_content 条数 > 0（跑过流水线后） |
+| 通知后是否重载 | 爬虫写库后会 POST `/api/crawler/notify`，Node 会 `reloadFromFile` 再广播 | 前端/`/api/situation` 的 `lastUpdated` 和内容会更新 |
+
+### 3. 跑一轮流水线（不常驻爬虫时）
+
+不启动 gdelt 时，可单次跑完整流水线（抓取 → 去重 → 写表 → notify）：
+
+```bash
+npm run api   # 保持运行
+cd crawler && python3 -c "
+from pipeline import run_full_pipeline
+from config import DB_PATH, API_BASE
+n_fetched, n_news, n_panel = run_full_pipeline(db_path=DB_PATH, api_base=API_BASE, notify=True)
+print('抓取:', n_fetched, '去重新增:', n_news, '面板写入:', n_panel)
+"
+```
+
+有网络且有关键词命中时，应看到非零数字；再查 `curl -s http://localhost:3001/api/situation` 或前端事件脉络是否出现新数据。
+
+**按时间范围测试（例如 2 月 28 日 0 时至今）**：RSS 流水线支持只保留指定起始时间之后的条目，便于测试「从某日 0 点到现在」的数据。
+
+```bash
+# 默认从 2026-02-28 0:00 到现在
+npm run crawler:once:range
+
+# 或指定起始时间
+./scripts/run-crawler-range.sh 2026-02-28T00:00:00
+```
+
+需设置环境变量 `CRAWL_START_DATE`（ISO 时间，如 `2026-02-28T00:00:00`）。GDELT 时间范围在启动 gdelt 服务时设置，例如：`GDELT_TIMESPAN=3d npm run gdelt`（最近 3 天）。
+
+### 4. 仅测提取逻辑（不写库）
+
+```bash
+npm run crawler:test:extraction   # 规则/db_merge 测试
+# 或按 README「快速自测命令」用示例文本调 extract_from_news 看 combat_losses_delta / key_location_updates
+```
+
+**常见现象**：抓取 0 条 → 网络/RSS 被墙或关键词未命中；situation_update 为空 → 未跑流水线或去重后无新增；前端不刷新 → 未开 `npm run api` 或未开爬虫（gdelt）。
+
+### 5. 爬虫与面板是否联通
+
+专门检查「爬虫写库」与「面板展示」是否一致：
+
+```bash
+./scripts/check-crawler-panel-connectivity.sh
+```
+
+会对比：爬虫侧的 `situation_update` 条数 vs 面板 API 返回的 `recentUpdates` 条数，并说明为何战损/基地等不一定随每条新闻变化。
+
+## 爬虫与面板数据联动说明
+
+| 面板展示 | 数据来源（表/接口） | 是否由爬虫更新 | 说明 |
+|----------|---------------------|----------------|------|
+| **事件脉络** (recentUpdates) | situation_update → getSituation() | ✅ 是 | 每条去重后的新闻会写入 situation_update，Node 收到 notify 后重载 DB 再广播 |
+| **地图冲突点** (conflictEvents) | gdelt_events 或 RSS→gdelt 回填 | ✅ 是 | GDELT 或 GDELT 禁用时由 situation_update 同步到 gdelt_events |
+| **战损/装备毁伤** (combatLosses) | combat_losses | ⚠️ 有条件 | 仅当 AI/规则从新闻中提取到数字（如「2 名美军死亡」）时，merge 才写入增量 |
+| **基地/地点状态** (keyLocations) | key_location | ⚠️ 有条件 | 仅当提取到 key_location_updates（如某基地遭袭）时更新 |
+| **地图打击/攻击动画** (mapData.strikeSources, strikeLines) | map_strike_source, map_strike_line | ⚠️ 有条件 | 仅当提取到 map_strike_sources / map_strike_lines 时写入；格式见下「地图打击数据」 |
+| **力量摘要/指数/资产** (summary, powerIndex, assets) | force_summary, power_index, force_asset | ❌ 否 | 仅 seed 初始化，爬虫不写 |
+| **华尔街/报复情绪** (wallStreet, retaliation) | wall_street_trend, retaliation_* | ⚠️ 有条件 | 仅当提取器输出对应字段时更新 |
+
+因此：**新闻很多、但战损/基地数字不动**是正常现象——多数标题不含可解析的伤亡/基地数字，只有事件脉络（recentUpdates）和地图冲突点会随每条新闻增加。若**事件脉络也不更新**，请确认 Node 终端在爬虫每轮抓取后是否出现 `[crawler/notify] DB 已重载`；若无，检查爬虫的 `API_BASE` 是否指向当前 API（默认 `http://localhost:3001`）。
+
+## 写库流水线（与 server/README 第五节一致）
+
+RSS 与主入口均走统一流水线 `pipeline.run_full_pipeline`：
+
+1. **抓取** → 2. **AI 清洗**（标题/摘要/分类）→ 3. **去重**（news_content.content_hash）→ 4. **映射到前端库字段**（situation_update、combat_losses、key_location 等）→ 5. **更新表** → 6. **有新增时 POST /api/crawler/notify**
+
+- `npm run crawler`（main.py）与 `npm run gdelt`（realtime_conflict_service）的 RSS 分支都调用该流水线。
+- 实现见 `crawler/pipeline.py`。
+
+## 数据流
+
+```
+GDELT API → 抓取(60s) → SQLite (gdelt_events, conflict_stats) → POST /api/crawler/notify
+RSS → 抓取 → 清洗 → 去重 → 写 news_content / situation_update / 战损等 → POST /api/crawler/notify
+                                                                    ↓
+                                              Node 更新 situation.updated_at + WebSocket 广播
+                                                                    ↓
+                                                              前端实时展示
+```
+
+## 配置
+
+环境变量：
+
+- `DB_PATH`: SQLite 路径，默认 `../server/data.db`
+- `API_BASE`: Node API 地址，默认 `http://localhost:3001`
+- **`DASHSCOPE_API_KEY`**：阿里云通义（DashScope）API Key。**设置后全程使用商业模型，无需本机安装 Ollama**（适合 Mac 版本较低无法跑 Ollama 的情况）。获取： [阿里云百炼 / DashScope](https://dashscope.console.aliyun.com/) → 创建 API-KEY，复制到环境变量或项目根目录 `.env` 中 `DASHSCOPE_API_KEY=sk-xxx`。摘要、分类、战损/基地提取均走通义。
+- `GDELT_QUERY`: 搜索关键词，默认 `United States Iran military`
+- `GDELT_MAX_RECORDS`: 最大条数，默认 30
+- `GDELT_TIMESPAN`: 时间范围，`1h` / `1d` / `1week`，默认 `1d`（近日资讯）
+- `GDELT_DISABLED`: 设为 `1` 则跳过 GDELT，仅用 RSS 新闻（GDELT 无法访问时用）
+- `FETCH_INTERVAL_SEC`: GDELT 抓取间隔（秒），默认 60
+- `RSS_INTERVAL_SEC`: RSS 抓取间隔（秒），默认 45（优先保证事件脉络）
+- `OLLAMA_MODEL`: AI 分类模型，默认 `llama3.1`
+- `PARSER_AI_DISABLED`: 设为 `1` 则禁用 AI 分类，仅用规则
+- `CLEANER_AI_DISABLED`: 设为 `1` 则禁用 AI 清洗，仅用规则截断
+- `FETCH_FULL_ARTICLE`: 设为 `0` 则不再抓取正文，仅用标题+摘要做 AI 提取（默认 `1` 抓取正文）
+- `ARTICLE_FETCH_LIMIT`: 每轮为多少条新资讯抓取正文，默认 10
+- `ARTICLE_FETCH_TIMEOUT`: 单篇正文请求超时（秒），默认 12
+- `ARTICLE_MAX_BODY_CHARS`: 正文最大字符数，默认 6000
+- `EXTRACT_TEXT_MAX_LEN`: 送入 AI 提取的原文最大长度，默认 4000
+
+**增量与地点**：战损一律按**增量**处理——AI 只填本则报道的「本次/此次」新增数，不填累计总数；合并时与库内当前值叠加。双方攻击地点通过 `key_location_updates` 更新（美军基地被打击 side=us，伊朗设施被打击 side=iran），会写入 `key_location` 的 status/damage_level。
+
+---
+
+## 主要新闻资讯来源（RSS）
+
+配置在 `crawler/config.py` 的 `RSS_FEEDS`，当前包含：
+
+| 来源 | URL / 说明 |
+|------|------------|
+| **美国** | Reuters Top News、NYT World |
+| **英国** | BBC World、BBC Middle East、The Guardian World |
+| **法国** | France 24 |
+| **德国** | DW World |
+| **俄罗斯** | TASS、RT |
+| **中国** | Xinhua World、CGTN World |
+| **凤凰** | 凤凰军事、凤凰国际（feedx.net 镜像） |
+| **伊朗** | Press TV |
+| **卡塔尔/中东** | Al Jazeera All、Al Jazeera Middle East |
+
+单源超时由 `FEED_TIMEOUT`（默认 12 秒）控制；某源失败不影响其他源。
+
+**过滤**：每条条目的标题+摘要必须命中 `config.KEYWORDS` 中至少一个关键词才会进入流水线（伊朗/美国/中东/军事/基地/霍尔木兹等，见 `config.KEYWORDS`）。
+
+### 境内可访问情况（仅供参考，以实际网络为准）
+
+| 通常境内可直接访问 | 说明 |
+|-------------------|------|
+| **新华网** `english.news.cn/rss/world.xml` | 中国官方外文社 |
+| **CGTN** `cgtn.com/rss/world` | 中国国际台 |
+| **凤凰** `feedx.net/rss/ifengmil.xml`、`ifengworld.xml` | 第三方 RSS 镜像，中文军事/国际 |
+| **人民网** `people.com.cn/rss/military.xml`、`world.xml` | 军事、国际 |
+| **新浪** `rss.sina.com.cn` 军事/新闻 | 新浪军事、新浪新闻滚动 |
+| **中国日报** `chinadaily.com.cn/rss/world_rss.xml` | 国际新闻 |
+| **中国军网** `english.chinamil.com.cn/rss.xml` | 解放军报英文 |
+| **俄通社 TASS** `tass.com/rss/v2.xml` | 俄罗斯官媒 |
+| **RT** `rt.com/rss/` | 俄罗斯今日俄罗斯 |
+| **DW** `rss.dw.com/xml/rss-en-world` | 德国之声，部分地区/时段可访问 |
+
+**境内常需代理**：Reuters、NYT、BBC、Guardian、France 24、Al Jazeera、Press TV 等境外主站 RSS，直连易超时或被墙。境内部署建议：设 `CRAWLER_USE_PROXY=1` 并配置代理，或仅保留上表源（可在 `config.py` 中注释掉不可达的 URL，减少超时等待）。
+
+**国内其他媒体（今日头条、网易、腾讯、新浪微博等）**：今日头条、腾讯新闻、新浪微博等多为 App/信息流产品，**无官方公开 RSS**。如需接入可考虑：第三方 RSS 聚合（如 FeedX、RSSHub 等若有对应频道）、或平台开放 API（若有且合规使用）。当前爬虫已加入新浪（rss.sina.com.cn）、人民网、中国日报、中国军网等有明确 RSS 的境内源；网易新闻曾有 RSS 中心页，具体栏目 XML 需在其订阅页查找后加入 `config.py`。
+
+---
+
+## 为什么爬虫一直抓不到有效信息（0 条）
+
+常见原因与应对如下。
+
+| 原因 | 说明 | 建议 |
+|------|------|------|
+| **RSS 源在国内不可达** | 多数源为境外站（Reuters、BBC、NYT、Guardian、France24、DW、TASS、RT、Al Jazeera、Press TV 等），国内直连易超时或被墙。 | 使用代理：设 `CRAWLER_USE_PROXY=1` 并配置系统/环境 HTTP(S) 代理，或部署到海外服务器再跑爬虫。 |
+| **关键词无一命中** | 只有标题或摘要里包含 `KEYWORDS` 中至少一个词才会保留（如 iran、usa、middle east、strike、基地 等）。若当前头条都不涉及美伊/中东，整轮会 0 条。 | 先跑 `npm run crawler:test` 看是否 0 条；若长期为 0 且网络正常，可在 `config.py` 中适当放宽或增加 `KEYWORDS`（如增加通用词做测试）。 |
+| **单源超时导致整轮无结果** | 若所有源都在 `FEED_TIMEOUT` 内未返回，则每源返回空列表，汇总仍为 0 条。 | 增大 `FEED_TIMEOUT`（如 20）；或先单独用浏览器/curl 测某条 RSS URL 是否可访问；国内建议代理后再试。 |
+| **分类/清洗依赖 AI 且失败** | 每条命中关键词的条目会调 `classify_and_severity`（Ollama 或 DashScope）。若本机未起 Ollama、未设 DashScope，且规则兜底异常，可能影响该条。 | 设 `PARSER_AI_DISABLED=1` 使用纯规则分类，避免依赖 Ollama/DashScope；或配置好 `DASHSCOPE_API_KEY` / 本地 Ollama 再跑。 |
+| **去重后无新增** | 抓到的条数 >0，但经 `news_content` 的 content_hash 去重后「新增」为 0，则不会写 `situation_update`，事件脉络不增加。 | 属正常：同一批新闻再次抓取不会重复写入。等有新头条命中关键词后才会出现新条目。 |
+
+**快速自检**：
+
+```bash
+npm run crawler:test
+```
+
+输出「RSS 抓取: N 条」。若始终为 0，优先检查网络/代理与 `KEYWORDS`；若 N>0 但面板无新事件，多为去重后无新增或未调 `POST /api/crawler/notify`。
+
+---
+
+## 数据流与 AI 自检
+
+**完整链路**：RSS 抓取 → 关键词过滤 → 翻译/清洗 → 去重（news_content）→ 写 situation_update → 正文抓取（可选）→ **AI 提取**（战损/基地等）→ db_merge 写 combat_losses/key_location 等 → POST /api/crawler/notify → Node 重载并广播。
+
+| 环节 | 说明 | 自检 |
+|------|------|------|
+| 抓取 | `scrapers/rss_scraper.fetch_all()`，按 KEYWORDS 过滤 | `npm run crawler:test` 看条数 |
+| 去重 | `news_storage.save_and_dedup()`，content_hash 落库 news_content | 查 `news_content` 表条数 |
+| 事件脉络 | `db_writer.write_updates()` 写 situation_update（与 pipeline 使用同一 db_path） | 查 `situation_update` 表 |
+| AI 提取 | 战损/基地等：**有 DASHSCOPE_API_KEY 用通义**，**否则 CLEANER_AI_DISABLED=1 用规则**，否则用 **Ollama**（extractor_ai） | 见下 |
+| 分类/严重度 | 每条 RSS 的 category/severity：**PARSER_AI_DISABLED=1 用规则**，否则 DashScope 或 Ollama | 无 AI 时设 `PARSER_AI_DISABLED=1` 可正常跑 |
+
+**如何保证「面板实时数据」有更新**（战损、据点等）：
+
+- **推荐**：设 `CLEANER_AI_DISABLED=1` → 使用 `extractor_rules`（纯规则），无需 Ollama/通义，即可从新闻中提取战损/基地并写入 combat_losses、key_location。  
+- 或设 `DASHSCOPE_API_KEY` → 用通义做更细的提取。  
+- 否则用 `extractor_ai`（需本机 `ollama run llama3.1`），未起则提取静默失败、面板数字不更新。  
+- 服务会每 `BACKFILL_CYCLES` 轮（默认 2 轮）从近期事件再跑一次提取并合并，保证战损/据点与最新内容一致。
+
+**常见 bug 与修复**：
+
+- **事件脉络有、战损/基地不更新**：多为 AI 未跑通（Ollama 未起且未设 DashScope、未设 CLEANER_AI_DISABLED）。可设 `CLEANER_AI_DISABLED=1` 用规则提取，或起 Ollama / 配置 DashScope。
+- **多 DB 路径不一致**：pipeline 已统一 `db_path`，`write_updates`、`save_and_dedup`、`merge` 均使用同一 path（或 `config.DB_PATH`）。
+
+---
+
+## 优化后验证效果示例
+
+以下为「正文抓取 + AI 精确提取 + 增量与地点更新」优化后，单条新闻从输入到前端展示的完整示例，便于对照验证。
+
+### 1. 示例输入（新闻摘要/全文片段）
+
+```
+伊朗向伊拉克阿萨德空军基地发射 12 枚弹道导弹，造成此次袭击中 2 名美军人员死亡、14 人受伤，
+另有 1 架战机在跑道受损。乌代德基地未遭直接命中。同日以色列对伊朗伊斯法罕一处设施发动打击。
+```
+
+### 2. AI 提取输出（增量 + 攻击地点）
+
+```json
+{
+  "summary": "伊朗导弹袭击伊拉克阿萨德基地致美军 2 死 14 伤，1 架战机受损；以军打击伊斯法罕。",
+  "category": "alert",
+  "severity": "high",
+  "us_personnel_killed": 2,
+  "us_personnel_wounded": 14,
+  "us_aircraft": 1,
+  "us_bases_damaged": 1,
+  "key_location_updates": [
+    { "name_keywords": "阿萨德|asad|al-asad", "side": "us", "status": "attacked", "damage_level": 2 },
+    { "name_keywords": "伊斯法罕|isfahan", "side": "iran", "status": "attacked", "damage_level": 1 }
+  ]
+}
+```
+
+说明：战损为**本则报道的新增数**（此次 2 死、14 伤、1 架战机），不是累计总数；地点为双方遭袭设施（美军基地 side=us，伊朗设施 side=iran）。
+
+### 3. 合并后数据库变化
+
+| 表/字段 | 合并前 | 本则增量 | 合并后 |
+|--------|--------|----------|--------|
+| combat_losses.us.personnel_killed | 127 | +2 | 129 |
+| combat_losses.us.personnel_wounded | 384 | +14 | 398 |
+| combat_losses.us.aircraft | 2 | +1 | 3 |
+| combat_losses.us.bases_damaged | 27 | +1 | 28 |
+| key_location（name 含「阿萨德」） | status=operational | — | status=attacked, damage_level=2 |
+| key_location（name 含「伊斯法罕」） | status=operational | — | status=attacked, damage_level=1 |
+
+若 AI 误提「累计 2847 人丧生」并填成 personnel_killed=2847，单次合并会被上限截断（如最多 +500），避免一次写入导致数据剧增。
+
+### 4. 前端验证效果
+
+- **事件脉络**：出现一条新条目，summary 为上述 1–2 句概括，category=alert、severity=high。
+- **装备毁伤面板**：美军「阵亡」+2、「受伤」+14、「战机」+1；基地毁/损数字随 bases_damaged +1 更新。
+- **地图**：阿萨德基地、伊斯法罕对应点位显示为「遭袭」状态（脉冲/标色随现有地图逻辑）。
+- **API**：`GET /api/situation` 中 `usForces.combatLosses`、`usForces.keyLocations`（含 status/damage_level）为更新后值；`lastUpdated` 为合并后时间。
+
+### 5. 快速自测命令
+
+```bash
+# 仅测提取逻辑（不写库）：用示例文本调 AI 提取，看是否得到增量 + key_location_updates
+cd crawler && python3 -c "
+from extractor_ai import extract_from_news
+text = '''伊朗向伊拉克阿萨德空军基地发射导弹，此次袭击造成 2 名美军死亡、14 人受伤，1 架战机受损。'''
+out = extract_from_news(text)
+print('combat_losses_delta:', out.get('combat_losses_delta'))
+print('key_location_updates:', out.get('key_location_updates'))
+"
+```
+
+期望：`combat_losses_delta.us` 含 personnel_killed=2、personnel_wounded=14、aircraft=1 等增量；`key_location_updates` 含阿萨德 side=us 等条目。
+
+### 地图打击数据（与前端攻击动画统一格式）
+
+爬虫/AI 若输出以下字段，`db_merge` 会写入 `map_strike_source`、`map_strike_line`，`GET /api/situation` 的 `mapData.strikeSources` / `mapData.strikeLines` 会更新，前端可直接追加打击线与飞行动画。
+
+- **map_strike_sources**（可选）：`[{ "id": "israel"|"lincoln"|"ford", "name": "显示名", "lng": 经度, "lat": 纬度 }]`，与 seed 中打击源 id 一致时可覆盖位置。
+- **map_strike_lines**（可选）：`[{ "source_id": "israel"|"lincoln"|"ford", "target_lng", "target_lat", "target_name": "目标名", "struck_at": "ISO 时间" }]`，每条追加一条打击线（不删已有），便于按时间回放。
+
+示例：`{ "map_strike_lines": [{ "source_id": "israel", "target_lng": 51.916, "target_lat": 33.666, "target_name": "纳坦兹", "struck_at": "2026-03-01T02:04:00.000Z" }] }`
+
+---
+
+## 冲突强度 (impact_score)
+
+| 分数 | 地图效果   |
+|------|------------|
+| 1–3  | 绿色点     |
+| 4–6  | 橙色闪烁   |
+| 7–10 | 红色脉冲扩散 |
+
+## API
+
+- `GET http://localhost:8000/events`：返回事件列表与冲突统计（Python 服务直连）
+- `GET http://localhost:3001/api/events`：从 Node 读取（推荐，含 WebSocket 同步）
+
+## 本地验证链路
+
+按下面任选一种方式，确认「抓取 → 清洗 → 去重 → 映射 → 写表 → 通知」整条链路正常。
+
+### 方式一：最小验证（不启动前端）
+
+1. **启动 API（必须）**
+   ```bash
+   npm run api
+   ```
+   保持运行，默认 `http://localhost:3001`。
+
+2. **安装爬虫依赖并跑一轮流水线**
+   ```bash
+   cd crawler && pip install -r requirements.txt
+   python -c "
+   from pipeline import run_full_pipeline
+   from config import DB_PATH, API_BASE
+   n_fetched, n_news, n_panel = run_full_pipeline(db_path=DB_PATH, api_base=API_BASE, translate=True, notify=True)
+   print('抓取:', n_fetched, '去重新增:', n_news, '面板写入:', n_panel)
+   "
+   ```
+   - 有网络且有关键词命中时，应看到非零数字；无网络或全被过滤则为 `0 0 0`。
+   - 若报错 `module 'socket' has no attribute 'settimeout'`，已修复为 `setdefaulttimeout`，请拉取最新代码。
+
+3. **查库确认**
+   ```bash
+   sqlite3 server/data.db "SELECT COUNT(*) FROM situation_update; SELECT COUNT(*) FROM news_content;"
+   ```
+   或浏览器打开 `http://localhost:3001/api/db/dashboard`，看 `situation_update`、`news_content` 是否有数据。
+
+4. **确认态势接口**
+   ```bash
+   curl -s http://localhost:3001/api/situation | head -c 500
+   ```
+   应包含 `lastUpdated`、`recentUpdates` 等。
+
+### 方式二：用现有验证脚本（推荐）
+
+1. 终端 1：`npm run api`
+2. 终端 2（可选）：`npm run gdelt`（会定时跑 RSS + GDELT）
+3. 执行验证脚本：
+   ```bash
+   ./scripts/verify-pipeline.sh
+   ```
+   若爬虫未启动想一并测爬虫，可：
+   ```bash
+   ./scripts/verify-pipeline.sh --start-crawler
+   ```
+   脚本会检查：API 健康、态势数据、爬虫状态、资讯表、战损字段、通知接口。
+
+### 方式三：只测 RSS 抓取（不写库）
+
+```bash
+npm run crawler:test
+```
+输出为「RSS 抓取: N 条」。0 条时检查网络或 `config.py` 里 `RSS_FEEDS` / `KEYWORDS`。
+
+### 常见问题
+
+| 现象 | 可能原因 |
+|------|----------|
+| 抓取 0 条 | 网络不通、RSS 被墙、关键词无一命中 |
+| `situation_update` 为空 | 去重后无新增，或未跑流水线（只跑了 `fetch_all` 未跑 `run_full_pipeline`） |
+| 前端事件脉络不刷新 | 未启动 `npm run api` 或 WebSocket 未连上（需通过 Vite 代理访问前端） |
+| 翻译/AI 清洗很慢或报错 | 设 `TRANSLATE_DISABLED=1` 或 `CLEANER_AI_DISABLED=1` 可跳过，用规则兜底 |
+
+---
+
+## 故障排查
+
+| 现象 | 可能原因 | 排查 |
+|------|----------|------|
+| 事件脉络始终为空 | 未启动 GDELT 爬虫 | 另开终端运行 `npm run gdelt`，观察是否有 `GDELT 更新 X 条事件` 输出 |
+| 事件脉络不刷新 | WebSocket 未连上 | 确认 `npm run api` 已启动，前端需通过 `npm run dev` 访问（Vite 会代理 /ws） |
+| GDELT 抓取失败 | 系统代理超时 / ProxyError | 爬虫默认直连，不走代理；若需代理请设 `CRAWLER_USE_PROXY=1` |
+| GDELT 抓取失败 | 网络 / GDELT API 限流 | 检查 Python 终端报错；GDELT 在国外，国内网络可能较慢或超时 |
+| 新闻条数为 0 | RSS 源被墙或关键词不匹配 | 检查 crawler/config.py 中 RSS_FEEDS、KEYWORDS；国内需代理 |
+| **返回数据偏老** | GDELT 默认 3 个月内按相关性 | 设置 `GDELT_TIMESPAN=1d` 限制为近日；加 `sort=datedesc` 最新优先 |
--- a/crawler/pycache/article_fetcher.cpython-39.pyc
+++ b/crawler/pycache/article_fetcher.cpython-39.pyc
--- a/crawler/pycache/cleaner_ai.cpython-311.pyc
+++ b/crawler/pycache/cleaner_ai.cpython-311.pyc
--- a/crawler/pycache/cleaner_ai.cpython-39.pyc
+++ b/crawler/pycache/cleaner_ai.cpython-39.pyc
--- a/crawler/pycache/config.cpython-311.pyc
+++ b/crawler/pycache/config.cpython-311.pyc
--- a/crawler/pycache/config.cpython-39.pyc
+++ b/crawler/pycache/config.cpython-39.pyc
--- a/crawler/pycache/db_merge.cpython-311.pyc
+++ b/crawler/pycache/db_merge.cpython-311.pyc
--- a/crawler/pycache/db_merge.cpython-39.pyc
+++ b/crawler/pycache/db_merge.cpython-39.pyc
--- a/crawler/pycache/db_writer.cpython-311.pyc
+++ b/crawler/pycache/db_writer.cpython-311.pyc
--- a/crawler/pycache/db_writer.cpython-39.pyc
+++ b/crawler/pycache/db_writer.cpython-39.pyc
--- a/crawler/pycache/extractor_ai.cpython-39.pyc
+++ b/crawler/pycache/extractor_ai.cpython-39.pyc
--- a/crawler/pycache/extractor_dashscope.cpython-39.pyc
+++ b/crawler/pycache/extractor_dashscope.cpython-39.pyc
--- a/crawler/pycache/extractor_rules.cpython-311.pyc
+++ b/crawler/pycache/extractor_rules.cpython-311.pyc
--- a/crawler/pycache/extractor_rules.cpython-39.pyc
+++ b/crawler/pycache/extractor_rules.cpython-39.pyc
--- a/crawler/pycache/news_storage.cpython-39.pyc
+++ b/crawler/pycache/news_storage.cpython-39.pyc
--- a/crawler/pycache/panel_schema.cpython-39.pyc
+++ b/crawler/pycache/panel_schema.cpython-39.pyc
--- a/crawler/pycache/parser.cpython-311.pyc
+++ b/crawler/pycache/parser.cpython-311.pyc
--- a/crawler/pycache/parser.cpython-39.pyc
+++ b/crawler/pycache/parser.cpython-39.pyc
--- a/crawler/pycache/parser_ai.cpython-311.pyc
+++ b/crawler/pycache/parser_ai.cpython-311.pyc
--- a/crawler/pycache/parser_ai.cpython-39.pyc
+++ b/crawler/pycache/parser_ai.cpython-39.pyc
--- a/crawler/pycache/pipeline.cpython-311.pyc
+++ b/crawler/pycache/pipeline.cpython-311.pyc
--- a/crawler/pycache/pipeline.cpython-39.pyc
+++ b/crawler/pycache/pipeline.cpython-39.pyc
--- a/crawler/pycache/realtime_conflict_service.cpython-311.pyc
+++ b/crawler/pycache/realtime_conflict_service.cpython-311.pyc
--- a/crawler/pycache/realtime_conflict_service.cpython-39.pyc
+++ b/crawler/pycache/realtime_conflict_service.cpython-39.pyc
--- a/crawler/pycache/translate_utils.cpython-311.pyc
+++ b/crawler/pycache/translate_utils.cpython-311.pyc
--- a/crawler/pycache/translate_utils.cpython-39.pyc
+++ b/crawler/pycache/translate_utils.cpython-39.pyc
--- a/crawler/article_fetcher.py
+++ b/crawler/article_fetcher.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""
+从文章 URL 抓取正文，供 AI 提取精确数据使用。
+RSS 仅提供标题和短摘要，正文可提供伤亡、番号、地点等具体数字与事实。
+"""
+import os
+import re
+from typing import Optional
+
+# 单页超时（秒）
+FETCH_TIMEOUT = int(os.environ.get("ARTICLE_FETCH_TIMEOUT", "12"))
+# 正文最大字符数，避免超长输入
+MAX_BODY_CHARS = int(os.environ.get("ARTICLE_MAX_BODY_CHARS", "6000"))
+# 是否启用正文抓取（设为 0 则仅用标题+摘要）
+FETCH_FULL_ARTICLE = os.environ.get("FETCH_FULL_ARTICLE", "1") == "1"
+
+
+def _strip_html(html: str) -> str:
+    """简单去除 HTML 标签与多余空白"""
+    if not html:
+        return ""
+    text = re.sub(r"<script[^>]*>[\s\S]*?</script>", " ", html, flags=re.I)
+    text = re.sub(r"<style[^>]*>[\s\S]*?</style>", " ", text, flags=re.I)
+    text = re.sub(r"<[^>]+>", " ", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+
+
+def fetch_article_body(url: str, timeout: int = FETCH_TIMEOUT) -> Optional[str]:
+    """
+    请求文章 URL，提取正文纯文本。失败或非 HTML 返回 None。
+    优先用 BeautifulSoup 取 main/article 或 body，否则退化为正则去标签。
+    """
+    if not url or not url.strip().startswith("http"):
+        return None
+    try:
+        import requests
+        headers = {"User-Agent": "US-Iran-Dashboard/1.0 (News Aggregator)"}
+        # 不跟随代理，避免墙内超时
+        proxies = {"http": None, "https": None} if os.environ.get("CRAWLER_USE_PROXY") != "1" else None
+        r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies)
+        r.raise_for_status()
+        ct = (r.headers.get("Content-Type") or "").lower()
+        if "html" not in ct and "xml" not in ct:
+            return None
+        html = r.text
+        if not html or len(html) < 200:
+            return None
+        try:
+            from bs4 import BeautifulSoup
+        except ImportError:
+            return _strip_html(html)[:MAX_BODY_CHARS]
+        try:
+            soup = BeautifulSoup(html, "html.parser")
+            for tag in ("article", "main", "[role='main']", ".article-body", ".post-content", ".entry-content", ".content"):
+                if tag.startswith((".", "[")):
+                    node = soup.select_one(tag)
+                else:
+                    node = soup.find(tag)
+                if node:
+                    body = node.get_text(separator=" ", strip=True)
+                    if len(body) > 300:
+                        return _strip_html(body)[:MAX_BODY_CHARS]
+            body = soup.body.get_text(separator=" ", strip=True) if soup.body else ""
+            if len(body) > 300:
+                return _strip_html(body)[:MAX_BODY_CHARS]
+        except Exception:
+            pass
+        return _strip_html(html)[:MAX_BODY_CHARS]
+    except Exception:
+        return None
+
+
+def enrich_item_with_body(item: dict, max_chars: int = MAX_BODY_CHARS) -> None:
+    """
+    若 item 有 url 且无 full_text，则抓取正文并写入 item["full_text"]。
+    用于 AI 提取时获得更多上下文。原地修改 item。
+    """
+    if not FETCH_FULL_ARTICLE:
+        return
+    url = (item.get("url") or "").strip()
+    if not url or item.get("full_text"):
+        return
+    body = fetch_article_body(url)
+    if not body:
+        return
+    title = (item.get("title") or "").strip()
+    summary = (item.get("summary") or "").strip()
+    combined = f"{title}\n{summary}\n{body}" if summary else f"{title}\n{body}"
+    item["full_text"] = combined[:max_chars]
--- a/crawler/cleaner_ai.py
+++ b/crawler/cleaner_ai.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+"""
+AI 清洗新闻数据，严格按面板字段约束输出
+面板 EventTimelinePanel 所需：summary(≤120字)、category(枚举)、severity(枚举)
+优先使用 DASHSCOPE_API_KEY（通义，无需 Ollama），否则 Ollama，最后规则兜底
+"""
+import os
+import re
+from typing import Optional
+
+CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1"
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
+DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()
+
+# 面板 schema：必须与 EventTimelinePanel / SituationUpdate 一致
+SUMMARY_MAX_LEN = 120  # 面板 line-clamp-2 展示
+CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
+SEVERITIES = ("low", "medium", "high", "critical")
+
+
+def _sanitize_summary(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
+    """确保 summary 符合面板：纯文本、无换行、限制长度"""
+    if not text or not isinstance(text, str):
+        return ""
+    s = re.sub(r"\s+", " ", str(text).strip())
+    s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", s)  # 去除控制字符
+    return s[:max_len].rstrip()
+
+
+def _rule_clean(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
+    """规则清洗：去空白、去控制符、截断"""
+    return _sanitize_summary(text, max_len)
+
+
+def _call_dashscope_summary(text: str, max_len: int, timeout: int = 8) -> Optional[str]:
+    """调用阿里云通义（DashScope）提炼摘要，无需 Ollama。需设置 DASHSCOPE_API_KEY"""
+    if not DASHSCOPE_API_KEY or CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 5:
+        return None
+    try:
+        import dashscope
+        from http import HTTPStatus
+        dashscope.api_key = DASHSCOPE_API_KEY
+        prompt = f"""将新闻提炼为1-2句简洁中文事实，直接输出纯文本，不要标号、引号、解释。限{max_len}字内。
+
+原文：{str(text)[:350]}
+
+输出："""
+        r = dashscope.Generation.call(
+            model="qwen-turbo",
+            messages=[{"role": "user", "content": prompt}],
+            result_format="message",
+            max_tokens=150,
+        )
+        if r.status_code != HTTPStatus.OK:
+            return None
+        out = (r.output.get("choices", [{}])[0].get("message", {}).get("content", "") or "").strip()
+        out = re.sub(r"^[\d\.\-\*\s]+", "", out)
+        out = re.sub(r"^['\"\s]+|['\"\s]+$", "", out)
+        out = _sanitize_summary(out, max_len)
+        if out and len(out) > 3:
+            return out
+        return None
+    except Exception:
+        return None
+
+
+def _call_ollama_summary(text: str, max_len: int, timeout: int = 6) -> Optional[str]:
+    """调用 Ollama 提炼摘要，输出须为纯文本、≤max_len 字"""
+    if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 5:
+        return None
+    try:
+        import requests
+        prompt = f"""将新闻提炼为1-2句简洁中文事实，直接输出纯文本，不要标号、引号、解释。限{max_len}字内。
+
+原文：{str(text)[:350]}
+
+输出："""
+        r = requests.post(
+            "http://localhost:11434/api/chat",
+            json={
+                "model": OLLAMA_MODEL,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream": False,
+                "options": {"num_predict": 150},
+            },
+            timeout=timeout,
+        )
+        if r.status_code != 200:
+            return None
+        out = (r.json().get("message", {}).get("content", "") or "").strip()
+        out = re.sub(r"^[\d\.\-\*\s]+", "", out)  # 去编号
+        out = re.sub(r"^['\"\s]+|['\"\s]+$", "", out)
+        out = _sanitize_summary(out, max_len)
+        if out and len(out) > 3:
+            return out
+        return None
+    except Exception:
+        return None
+
+
+def clean_news_for_panel(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
+    """清洗 summary 字段，供 EventTimelinePanel 展示。输出必为≤max_len 的纯文本"""
+    if not text or not isinstance(text, str):
+        return ""
+    t = str(text).strip()
+    if not t:
+        return ""
+    # 优先商业模型（通义），再 Ollama，最后规则
+    if DASHSCOPE_API_KEY:
+        res = _call_dashscope_summary(t, max_len, timeout=8)
+    else:
+        res = _call_ollama_summary(t, max_len, timeout=6)
+    if res:
+        return res
+    return _rule_clean(t, max_len)
+
+
+def ensure_category(cat: str) -> str:
+    """确保 category 在面板枚举内"""
+    return cat if cat in CATEGORIES else "other"
+
+
+def ensure_severity(sev: str) -> str:
+    """确保 severity 在面板枚举内"""
+    return sev if sev in SEVERITIES else "medium"
--- a/crawler/config.py
+++ b/crawler/config.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+"""爬虫配置"""
+import os
+from pathlib import Path
+
+# 数据库路径（与 server 共用 SQLite）
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
+
+# Node API 地址（用于通知推送）
+API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
+
+# 阿里云 DashScope API Key（用于 AI 提取面板数据，不设则回退到规则/Ollama）
+DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
+
+# 抓取间隔（秒）
+CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
+
+# 单源抓取超时（秒），避免某源卡住拖垮整轮
+FEED_TIMEOUT = int(os.environ.get("FEED_TIMEOUT", "12"))
+
+# RSS 源：世界主流媒体，覆盖美伊/中东多视角
+# 每项为 URL 字符串，或 {"name": "显示名", "url": "..."} 便于日志与排查
+RSS_FEEDS = [
+    # 美国
+    "https://feeds.reuters.com/reuters/topNews",
+    "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
+    # 英国
+    "https://feeds.bbci.co.uk/news/world/rss.xml",
+    "https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
+    "https://www.theguardian.com/world/rss",
+    # 法国
+    "https://www.france24.com/en/rss",
+    # 德国
+    "https://rss.dw.com/xml/rss-en-world",
+    # 俄罗斯
+    "https://tass.com/rss/v2.xml",
+    "https://www.rt.com/rss/",
+    # 中国
+    "https://english.news.cn/rss/world.xml",
+    "https://www.cgtn.com/rss/world",
+    # 凤凰网（军事 + 国际，中文视角）
+    {"name": "凤凰军事", "url": "https://feedx.net/rss/ifengmil.xml"},
+    {"name": "凤凰国际", "url": "https://feedx.net/rss/ifengworld.xml"},
+    # 境内媒体（境内直连友好，可补中文视角）
+    {"name": "人民网军事", "url": "http://www.people.com.cn/rss/military.xml"},
+    {"name": "人民网国际", "url": "http://www.people.com.cn/rss/world.xml"},
+    {"name": "新浪军事", "url": "http://rss.sina.com.cn/rss/jczs/index.shtml"},
+    {"name": "新浪新闻", "url": "http://rss.sina.com.cn/rss/roll/news.xml"},
+    {"name": "中国日报国际", "url": "http://www.chinadaily.com.cn/rss/world_rss.xml"},
+    {"name": "中国军网", "url": "https://english.chinamil.com.cn/rss.xml"},
+    # 伊朗
+    "https://www.presstv.ir/rss",
+    # 卡塔尔（中东）
+    "https://www.aljazeera.com/xml/rss/all.xml",
+    "https://www.aljazeera.com/xml/rss/middleeast.xml",
+]
+
+
+def get_feed_sources():
+    """返回 [(name, url), ...]，name 用于日志，缺省为 URL 的 host"""
+    import urllib.parse
+    out = []
+    for raw in RSS_FEEDS:
+        if isinstance(raw, dict):
+            name = raw.get("name") or "rss"
+            url = raw.get("url", "").strip()
+        else:
+            url = (raw or "").strip()
+            name = urllib.parse.urlparse(url).netloc or "rss"
+        if url:
+            out.append((name, url))
+    return out
+
+# 关键词过滤：至少匹配一个才会入库（与地图区域对应：伊拉克/叙利亚/海湾/红海/地中海等）
+KEYWORDS = [
+    # 伊朗
+    "iran", "iranian", "tehran", "德黑兰", "bushehr", "布什尔", "abbas", "阿巴斯",
+    # 以色列 / 巴勒斯坦
+    "israel", "以色列", "hamas", "gaza", "加沙", "hezbollah", "真主党",
+    # 美国
+    "usa", "us ", "american", "美军", "美国", "pentagon",
+    # 区域（地图覆盖）
+    "middle east", "中东", "persian gulf", "波斯湾", "gulf of oman", "阿曼湾",
+    "arabian sea", "阿拉伯海", "red sea", "红海", "mediterranean", "地中海",
+    "strait of hormuz", "霍尔木兹",
+    # 伊拉克 / 叙利亚
+    "iraq", "伊拉克", "baghdad", "巴格达", "erbil", "埃尔比勒", "basra", "巴士拉",
+    "syria", "叙利亚", "damascus", "大马士革", "deir", "代尔祖尔",
+    # 海湾国家
+    "saudi", "沙特", "riyadh", "利雅得", "qatar", "卡塔尔", "doha", "多哈",
+    "uae", "emirates", "阿联酋", "dubai", "迪拜", "abu dhabi",
+    "bahrain", "巴林", "kuwait", "科威特", "oman", "阿曼", "yemen", "也门",
+    # 约旦 / 土耳其 / 埃及 / 吉布提 / 黎巴嫩
+    "jordan", "约旦", "amman", "安曼",
+    "lebanon", "黎巴嫩",
+    "turkey", "土耳其", "incirlik", "因吉尔利克",
+    "egypt", "埃及", "cairo", "开罗", "sinai", "西奈",
+    "djibouti", "吉布提",
+    # 军事 / 基地
+    "al-asad", "al asad", "阿萨德", "al udeid", "乌代德", "incirlik",
+    "strike", "attack", "military", "missile", "核", "nuclear",
+    "carrier", "航母", "drone", "uav", "无人机", "retaliation", "报复",
+    "base", "基地", "troops", "troop", "soldier", "personnel",
+    # 胡塞 / 武装 / 军力
+    "houthi", "胡塞", "houthis",
+    "idf", "irgc", "革命卫队", "qassem soleimani", "苏莱曼尼",
+]
--- a/crawler/db_merge.py
+++ b/crawler/db_merge.py
@@ -0,0 +1,282 @@
+# -*- coding: utf-8 -*-
+"""
+将 AI 提取的结构化数据合并到 SQLite
+与 panel schema 及 situationData.getSituation 对齐，支持回放。
+
+地图打击数据（与前端攻击动画一致）：
+- map_strike_sources: [{ "id": "israel"|"lincoln"|"ford", "name": "显示名", "lng", "lat" }] 写入 map_strike_source
+- map_strike_lines: [{ "source_id", "target_lng", "target_lat", "target_name?", "struck_at?" }] 追加到 map_strike_line
+爬虫/AI 可按此格式输出，落库后 GET /api/situation 的 mapData.strikeSources/strikeLines 会更新，前端直接追加攻击动画。
+"""
+import os
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
+
+# 单次合并时各字段增量的上限，防止误把「累计总数」当增量导致数据剧增（可选，设为 0 表示不设限）
+MAX_DELTA_PER_MERGE = {
+    "personnel_killed": 500, "personnel_wounded": 1000, "civilian_killed": 300, "civilian_wounded": 500,
+    "bases_destroyed": 5, "bases_damaged": 10,
+    "aircraft": 50, "warships": 10, "armor": 30, "vehicles": 100,
+    "drones": 50, "missiles": 200, "helicopters": 20, "submarines": 5, "carriers": 2,
+    "civilian_ships": 20, "airport_port": 10,
+}
+
+
+def _clamp_delta(key: str, value: int) -> int:
+    """单次增量上限，避免误提「累计」导致波动"""
+    cap = MAX_DELTA_PER_MERGE.get(key, 0)
+    if cap <= 0:
+        return max(0, value)
+    return max(0, min(value, cap))
+
+
+def _ensure_tables(conn: sqlite3.Connection) -> None:
+    """确保所需表存在（与 db.js 一致）"""
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS situation_update (
+            id TEXT PRIMARY KEY, timestamp TEXT NOT NULL, category TEXT NOT NULL,
+            summary TEXT NOT NULL, severity TEXT NOT NULL
+        )
+    """)
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS combat_losses (
+            side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
+            bases_destroyed INTEGER NOT NULL, bases_damaged INTEGER NOT NULL,
+            personnel_killed INTEGER NOT NULL, personnel_wounded INTEGER NOT NULL,
+            aircraft INTEGER NOT NULL, warships INTEGER NOT NULL, armor INTEGER NOT NULL, vehicles INTEGER NOT NULL
+        )
+    """)
+    try:
+        conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        conn.execute("ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime('now'))")
+    except sqlite3.OperationalError:
+        pass
+    for col in ("drones", "missiles", "helicopters", "submarines", "tanks", "carriers", "civilian_ships", "airport_port"):
+        try:
+            conn.execute(f"ALTER TABLE combat_losses ADD COLUMN {col} INTEGER NOT NULL DEFAULT 0")
+        except sqlite3.OperationalError:
+            pass
+    conn.execute("CREATE TABLE IF NOT EXISTS wall_street_trend (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)")
+    conn.execute("CREATE TABLE IF NOT EXISTS retaliation_current (id INTEGER PRIMARY KEY CHECK (id = 1), value INTEGER NOT NULL)")
+    conn.execute("CREATE TABLE IF NOT EXISTS retaliation_history (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)")
+    conn.execute("CREATE TABLE IF NOT EXISTS situation (id INTEGER PRIMARY KEY CHECK (id = 1), data TEXT NOT NULL, updated_at TEXT NOT NULL)")
+    # 地图打击源与打击线（与 server/db.js 一致），供 getSituation mapData 与前端攻击动画使用
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS map_strike_source (
+            id TEXT PRIMARY KEY,
+            name TEXT NOT NULL,
+            lng REAL NOT NULL,
+            lat REAL NOT NULL
+        )
+    """)
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS map_strike_line (
+            source_id TEXT NOT NULL,
+            target_lng REAL NOT NULL,
+            target_lat REAL NOT NULL,
+            target_name TEXT,
+            struck_at TEXT,
+            FOREIGN KEY (source_id) REFERENCES map_strike_source(id)
+        )
+    """)
+    try:
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_map_strike_line_source ON map_strike_line(source_id)")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        for col in ("struck_at",):
+            cur = conn.execute("PRAGMA table_info(map_strike_line)")
+            cols = [r[1] for r in cur.fetchall()]
+            if col not in cols:
+                conn.execute(f"ALTER TABLE map_strike_line ADD COLUMN {col} TEXT")
+    except sqlite3.OperationalError:
+        pass
+    conn.commit()
+
+
+def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
+    """将提取数据合并到 DB，返回是否有更新"""
+    path = db_path or DB_PATH
+    if not os.path.exists(path):
+        return False
+    conn = sqlite3.connect(path, timeout=10)
+    try:
+        _ensure_tables(conn)
+        updated = False
+        # situation_update
+        if "situation_update" in extracted:
+            u = extracted["situation_update"]
+            uid = f"ai_{hash(u.get('summary','')+u.get('timestamp','')) % 10**10}"
+            conn.execute(
+                "INSERT OR IGNORE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)",
+                (uid, u.get("timestamp", ""), u.get("category", "other"), u.get("summary", "")[:500], u.get("severity", "medium")),
+            )
+            if conn.total_changes > 0:
+                updated = True
+        # combat_losses：统一按增量处理。AI 输出为本则报道的新增数，此处叠加到库内当前值，避免把「累计总数」当增量导致数据波动。
+        if "combat_losses_delta" in extracted:
+            for side, delta in extracted["combat_losses_delta"].items():
+                if side not in ("us", "iran"):
+                    continue
+                try:
+                    row = conn.execute(
+                        "SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles,drones,missiles,helicopters,submarines,tanks,carriers,civilian_ships,airport_port FROM combat_losses WHERE side = ?",
+                        (side,),
+                    ).fetchone()
+                    cur = {"personnel_killed": 0, "personnel_wounded": 0, "civilian_killed": 0, "civilian_wounded": 0,
+                           "bases_destroyed": 0, "bases_damaged": 0, "aircraft": 0, "warships": 0, "armor": 0, "vehicles": 0,
+                           "drones": 0, "missiles": 0, "helicopters": 0, "submarines": 0, "tanks": 0, "carriers": 0, "civilian_ships": 0, "airport_port": 0}
+                    if row:
+                        cur = {
+                            "personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
+                            "civilian_wounded": row[3] or 0, "bases_destroyed": row[4], "bases_damaged": row[5],
+                            "aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
+                            "drones": row[10] if len(row) > 10 else 0, "missiles": row[11] if len(row) > 11 else 0,
+                            "helicopters": row[12] if len(row) > 12 else 0, "submarines": row[13] if len(row) > 13 else 0,
+                            "tanks": row[14] if len(row) > 14 else 0, "carriers": row[15] if len(row) > 15 else (row[14] if len(row) > 14 else 0),
+                            "civilian_ships": row[16] if len(row) > 16 else 0, "airport_port": row[17] if len(row) > 17 else 0,
+                        }
+                    pk = max(0, (cur["personnel_killed"] or 0) + _clamp_delta("personnel_killed", delta.get("personnel_killed", 0)))
+                    pw = max(0, (cur["personnel_wounded"] or 0) + _clamp_delta("personnel_wounded", delta.get("personnel_wounded", 0)))
+                    ck = max(0, (cur["civilian_killed"] or 0) + _clamp_delta("civilian_killed", delta.get("civilian_killed", 0)))
+                    cw = max(0, (cur["civilian_wounded"] or 0) + _clamp_delta("civilian_wounded", delta.get("civilian_wounded", 0)))
+                    bd = max(0, (cur["bases_destroyed"] or 0) + _clamp_delta("bases_destroyed", delta.get("bases_destroyed", 0)))
+                    bm = max(0, (cur["bases_damaged"] or 0) + _clamp_delta("bases_damaged", delta.get("bases_damaged", 0)))
+                    ac = max(0, (cur["aircraft"] or 0) + _clamp_delta("aircraft", delta.get("aircraft", 0)))
+                    ws = max(0, (cur["warships"] or 0) + _clamp_delta("warships", delta.get("warships", 0)))
+                    ar = max(0, (cur["armor"] or 0) + _clamp_delta("armor", delta.get("armor", 0)))
+                    vh = max(0, (cur["vehicles"] or 0) + _clamp_delta("vehicles", delta.get("vehicles", 0)))
+                    dr = max(0, (cur["drones"] or 0) + _clamp_delta("drones", delta.get("drones", 0)))
+                    ms = max(0, (cur["missiles"] or 0) + _clamp_delta("missiles", delta.get("missiles", 0)))
+                    hp = max(0, (cur["helicopters"] or 0) + _clamp_delta("helicopters", delta.get("helicopters", 0)))
+                    sb = max(0, (cur["submarines"] or 0) + _clamp_delta("submarines", delta.get("submarines", 0)))
+                    cr = max(0, (cur["carriers"] or 0) + _clamp_delta("carriers", delta.get("carriers", 0)))
+                    cs = max(0, (cur["civilian_ships"] or 0) + _clamp_delta("civilian_ships", delta.get("civilian_ships", 0)))
+                    ap = max(0, (cur["airport_port"] or 0) + _clamp_delta("airport_port", delta.get("airport_port", 0)))
+                    ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
+                    if row:
+                        conn.execute(
+                            """UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
+                            bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?,
+                            drones=?, missiles=?, helicopters=?, submarines=?, tanks=?, carriers=?, civilian_ships=?, airport_port=?, updated_at=? WHERE side=?""",
+                            (pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, cur.get("tanks", 0), cr, cs, ap, ts, side),
+                        )
+                    else:
+                        conn.execute(
+                            """INSERT OR REPLACE INTO combat_losses (side, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
+                            bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, tanks, carriers, civilian_ships, airport_port, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                            (side, pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, 0, cr, cs, ap, ts),
+                        )
+                    if conn.total_changes > 0:
+                        updated = True
+                except Exception:
+                    pass
+        # force_summary 增量：导弹消耗（看板「导弹消耗」「导弹库存」由 force_summary 提供）
+        if "force_summary_delta" in extracted:
+            for side, delta in extracted["force_summary_delta"].items():
+                if side not in ("us", "iran"):
+                    continue
+                mc = delta.get("missile_consumed")
+                if mc is not None and isinstance(mc, (int, float)) and mc > 0:
+                    mc = min(int(mc), 500)
+                    try:
+                        cur = conn.execute(
+                            "UPDATE force_summary SET missile_consumed = missile_consumed + ?, missile_stock = max(0, missile_stock - ?) WHERE side = ?",
+                            (mc, mc, side),
+                        )
+                        if cur.rowcount > 0:
+                            updated = True
+                    except Exception:
+                        pass
+        # retaliation
+        if "retaliation" in extracted:
+            r = extracted["retaliation"]
+            conn.execute("INSERT OR REPLACE INTO retaliation_current (id, value) VALUES (1, ?)", (r["value"],))
+            conn.execute("INSERT INTO retaliation_history (time, value) VALUES (?, ?)", (r["time"], r["value"]))
+            updated = True
+        # wall_street_trend
+        if "wall_street" in extracted:
+            w = extracted["wall_street"]
+            conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (w["time"], w["value"]))
+            updated = True
+        # key_location：更新双方攻击地点（美军基地被打击 side=us，伊朗设施被打击 side=iran）的 status/damage_level
+        if "key_location_updates" in extracted:
+            try:
+                for u in extracted["key_location_updates"]:
+                    kw_raw = (u.get("name_keywords") or "").strip()
+                    if not kw_raw:
+                        continue
+                    # 支持 "a|b|c" 或 "a b c" 分隔
+                    kw = [k.strip() for k in kw_raw.replace("|", " ").split() if k.strip()]
+                    side = u.get("side")
+                    status = (u.get("status") or "attacked")[:20]
+                    dmg = u.get("damage_level", 2)
+                    if not kw or side not in ("us", "iran"):
+                        continue
+                    # 简化：name LIKE '%kw%' 对每个关键词 OR 连接，支持中英文
+                    conditions = " OR ".join("name LIKE ?" for _ in kw)
+                    params = [status, dmg, side] + [f"%{k}%" for k in kw]
+                    cur = conn.execute(
+                        f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
+                        params,
+                    )
+                    if cur.rowcount > 0:
+                        updated = True
+            except Exception:
+                pass
+        # map_strike_source：打击源（与前端 mapData.strikeSources 一致），爬虫可补充或覆盖
+        if "map_strike_sources" in extracted:
+            try:
+                for s in extracted["map_strike_sources"]:
+                    sid = (s.get("id") or "").strip()
+                    name = (s.get("name") or "").strip() or sid
+                    lng = float(s.get("lng", 0))
+                    lat = float(s.get("lat", 0))
+                    if sid:
+                        conn.execute(
+                            "INSERT OR REPLACE INTO map_strike_source (id, name, lng, lat) VALUES (?, ?, ?, ?)",
+                            (sid, name[:200], lng, lat),
+                        )
+                        if conn.total_changes > 0:
+                            updated = True
+            except Exception:
+                pass
+        # map_strike_lines：打击线（与前端 mapData.strikeLines 一致），爬虫可追加新打击，便于前端追加攻击动画
+        if "map_strike_lines" in extracted:
+            try:
+                for line in extracted["map_strike_lines"]:
+                    source_id = (line.get("source_id") or "").strip()
+                    target_lng = float(line.get("target_lng", 0))
+                    target_lat = float(line.get("target_lat", 0))
+                    target_name = (line.get("target_name") or "").strip()[:200] or None
+                    struck_at = (line.get("struck_at") or "").strip() or None
+                    if source_id:
+                        conn.execute(
+                            "INSERT INTO map_strike_line (source_id, target_lng, target_lat, target_name, struck_at) VALUES (?, ?, ?, ?, ?)",
+                            (source_id, target_lng, target_lat, target_name, struck_at),
+                        )
+                        if conn.total_changes > 0:
+                            updated = True
+            except Exception:
+                pass
+        if updated:
+            conn.execute("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)", (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),))
+        conn.commit()
+        return updated
+    except Exception as e:
+        conn.rollback()
+        raise e
+    finally:
+        conn.close()
--- a/crawler/db_writer.py
+++ b/crawler/db_writer.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+"""写入 SQLite 并确保 situation_update 表存在"""
+import sqlite3
+import hashlib
+import os
+from datetime import datetime, timezone
+from typing import List, Optional
+
+from config import DB_PATH
+
+CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
+SEVERITIES = ("low", "medium", "high", "critical")
+
+
+def _ensure_table(conn: sqlite3.Connection) -> None:
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS situation_update (
+            id TEXT PRIMARY KEY,
+            timestamp TEXT NOT NULL,
+            category TEXT NOT NULL,
+            summary TEXT NOT NULL,
+            severity TEXT NOT NULL
+        )
+    """)
+    conn.commit()
+
+
+def _make_id(title: str, url: str, published: str) -> str:
+    raw = f"{title}|{url}|{published}"
+    return "nw_" + hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
+
+
+def _to_utc_iso(dt: datetime) -> str:
+    if dt.tzinfo:
+        dt = dt.astimezone(timezone.utc)
+    return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
+
+
+def insert_update(
+    conn: sqlite3.Connection,
+    title: str,
+    summary: str,
+    url: str,
+    published: datetime,
+    category: str = "other",
+    severity: str = "medium",
+) -> bool:
+    """插入一条更新，若 id 已存在则跳过。返回是否插入了新记录。"""
+    _ensure_table(conn)
+    ts = _to_utc_iso(published)
+    uid = _make_id(title, url, ts)
+    if category not in CATEGORIES:
+        category = "other"
+    if severity not in SEVERITIES:
+        severity = "medium"
+    try:
+        conn.execute(
+            "INSERT OR IGNORE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)",
+            (uid, ts, category, summary[:500], severity),
+        )
+        conn.commit()
+        return conn.total_changes > 0
+    except Exception:
+        conn.rollback()
+        return False
+
+
+def touch_situation_updated_at(conn: sqlite3.Connection) -> None:
+    """更新 situation 表的 updated_at"""
+    conn.execute(
+        "INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)",
+        (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),),
+    )
+    conn.commit()
+
+
+def touch_situation_updated_at_path(db_path: Optional[str] = None) -> bool:
+    """仅更新 situation.updated_at 为当前时间（每次爬虫运行都调用，便于前端显示「最后抓取时间」）。返回是否成功。"""
+    path = db_path or DB_PATH
+    if not os.path.exists(path):
+        return False
+    conn = sqlite3.connect(path, timeout=10)
+    try:
+        touch_situation_updated_at(conn)
+        return True
+    finally:
+        conn.close()
+
+
+def write_updates(updates: List[dict], db_path: Optional[str] = None) -> int:
+    """
+    updates: [{"title","summary","url","published","category","severity"}, ...]
+    db_path: 与 pipeline 一致，缺省用 config.DB_PATH
+    返回新增条数。
+    """
+    path = db_path or DB_PATH
+    if not os.path.exists(path):
+        return 0
+    conn = sqlite3.connect(path, timeout=10)
+    try:
+        count = 0
+        for u in updates:
+            pub = u.get("published")
+            if isinstance(pub, str):
+                try:
+                    pub = datetime.fromisoformat(pub.replace("Z", "+00:00"))
+                except ValueError:
+                    pub = datetime.utcnow()
+            elif pub is None:
+                pub = datetime.utcnow()
+            ok = insert_update(
+                conn,
+                title=u.get("title", "")[:200],
+                summary=u.get("summary", "") or u.get("title", ""),
+                url=u.get("url", ""),
+                published=pub,
+                category=u.get("category", "other"),
+                severity=u.get("severity", "medium"),
+            )
+            if ok:
+                count += 1
+        if count > 0:
+            touch_situation_updated_at(conn)
+        return count
+    finally:
+        conn.close()
--- a/crawler/extractor_ai.py
+++ b/crawler/extractor_ai.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+从新闻文本中 AI 提取结构化数据，映射到面板 schema
+输出符合 panel_schema 的字段，供 db_merge 写入
+"""
+import json
+import os
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+from panel_schema import validate_category, validate_severity, validate_summary
+
+CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1"
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
+
+
+# 用于 AI 提取的原文最大长度（有正文时取更长以提取精确数据）
+EXTRACT_TEXT_MAX_LEN = int(os.environ.get("EXTRACT_TEXT_MAX_LEN", "4000"))
+
+
+def _call_ollama_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]:
+    """调用 Ollama 从新闻全文/摘要中提取精确结构化数据，仅填写报道中明确给出的数字与事实。"""
+    if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 10:
+        return None
+    try:
+        import requests
+        raw = str(text).strip()[:EXTRACT_TEXT_MAX_LEN]
+        prompt = f"""从以下美伊/中东新闻**全文或摘要**中，提取**报道明确给出的数字与事实**，输出 JSON。
+
+输入说明：
+- 原文可能是英文、中文或其他语言（English / Chinese / Arabic / Persian 等），请先理解含义，再按要求输出。
+
+规则：
+1. 仅填写报道中**直接出现、可核对**的数据，不要推测或估算。
+2. 无明确依据的字段**必须省略**，不要填 0 或猜。
+3. **战损一律按增量**：只填本则报道中「本次/此次/今日/本轮」**新增**的伤亡或损毁数量。若报道只给「累计总数」「迄今共」「total so far」等，**不要填写**该字段（避免与库内已有累计值重复叠加）。
+4. **攻击地点**：提取双方遭袭的具体地点。美军/盟军基地被打击 → side=us；伊朗/亲伊设施被打击 → side=iran。name_keywords 用「中文名|英文名」便于匹配，可填多处。
+
+字段说明：
+- summary: 1-2 句中文事实概括，≤80 字
+- category: deployment|alert|intel|diplomatic|other
+- severity: low|medium|high|critical
+- 战损（**仅填本则报道的新增增量**，如「此次 5 人丧生」「今日又损 2 架」）:
+  us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
+  us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
+  us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
+  us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
+  us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines,
+  us_carriers, iran_carriers, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
+- retaliation_sentiment: 0-100，仅当报道涉及伊朗报复/反击情绪时
+- wall_street_value: 0-100，仅当报道涉及美股/市场时
+- key_location_updates: **双方攻击地点**。每项 {{ "name_keywords": "阿萨德|asad|al-asad", "side": "us或iran（被打击方）", "status": "attacked", "damage_level": 1-3 }}。美军基地例：阿萨德|asad、乌代德|udeid、埃尔比勒|erbil、因吉尔利克|incirlik。伊朗例：德黑兰|tehran、布什尔|bushehr、伊斯法罕|isfahan、阿巴斯|abbas、纳坦兹|natanz
+- **导弹消耗增量**（仅当报道明确提到「发射/消耗 了 X 枚导弹」时填，用于看板导弹消耗累计）: us_missile_consumed_delta, iran_missile_consumed_delta（本则报道中该方新增消耗枚数，整数）
+
+原文：
+{raw}
+
+直接输出 JSON 对象，不要解释，不要加反引号或代码块标记："""
+        r = requests.post(
+            "http://localhost:11434/api/chat",
+            json={
+                "model": OLLAMA_MODEL,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream": False,
+                "options": {"num_predict": 384},
+            },
+            timeout=timeout,
+        )
+        if r.status_code != 200:
+            return None
+        raw = (r.json().get("message", {}).get("content", "") or "").strip()
+        raw = re.sub(r"^```\w*\s*|\s*```$", "", raw)
+        return json.loads(raw)
+    except Exception:
+        return None
+
+
+def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
+    """
+    从新闻文本提取结构化数据，严格符合面板 schema
+    返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, ... }
+    """
+    ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+    out: Dict[str, Any] = {}
+    parsed = _call_ollama_extract(text)
+    if not parsed:
+        return out
+    # situation_update
+    if parsed.get("summary"):
+        out["situation_update"] = {
+            "summary": validate_summary(str(parsed["summary"])[:120], 120),
+            "category": validate_category(str(parsed.get("category", "other")).lower()),
+            "severity": validate_severity(str(parsed.get("severity", "medium")).lower()),
+            "timestamp": ts,
+        }
+    # combat_losses 增量（仅数字字段）
+    loss_us = {}
+    loss_ir = {}
+    for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "carriers", "civilian_ships", "airport_port"]:
+        uk = f"us_{k}"
+        ik = f"iran_{k}"
+        if uk in parsed and isinstance(parsed[uk], (int, float)):
+            loss_us[k] = max(0, int(parsed[uk]))
+        if ik in parsed and isinstance(parsed[ik], (int, float)):
+            loss_ir[k] = max(0, int(parsed[ik]))
+    if loss_us or loss_ir:
+        out["combat_losses_delta"] = {}
+        if loss_us:
+            out["combat_losses_delta"]["us"] = loss_us
+        if loss_ir:
+            out["combat_losses_delta"]["iran"] = loss_ir
+    # retaliation
+    if "retaliation_sentiment" in parsed:
+        v = parsed["retaliation_sentiment"]
+        if isinstance(v, (int, float)) and 0 <= v <= 100:
+            out["retaliation"] = {"value": int(v), "time": ts}
+    # wall_street
+    if "wall_street_value" in parsed:
+        v = parsed["wall_street_value"]
+        if isinstance(v, (int, float)) and 0 <= v <= 100:
+            out["wall_street"] = {"time": ts, "value": int(v)}
+    # key_location_updates：受袭基地
+    if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list):
+        valid = []
+        for u in parsed["key_location_updates"]:
+            if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"):
+                valid.append({
+                    "name_keywords": str(u["name_keywords"]),
+                    "side": u["side"],
+                    "status": str(u.get("status", "attacked"))[:20],
+                    "damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2,
+                })
+        if valid:
+            out["key_location_updates"] = valid
+    # force_summary 增量：导弹消耗（看板「导弹消耗」由 force_summary.missile_consumed 提供）
+    fs_delta = {}
+    for side_key, side_val in [("us_missile_consumed_delta", "us"), ("iran_missile_consumed_delta", "iran")]:
+        v = parsed.get(side_key)
+        if isinstance(v, (int, float)) and v > 0:
+            fs_delta[side_val] = {"missile_consumed": min(500, int(v))}
+    if fs_delta:
+        out["force_summary_delta"] = fs_delta
+    return out
--- a/crawler/extractor_dashscope.py
+++ b/crawler/extractor_dashscope.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+"""
+阿里云 DashScope（通义千问）提取面板结构化数据
+从新闻文本中提取战损、报复指数、基地状态等，供 db_merge 落库
+API Key 通过环境变量 DASHSCOPE_API_KEY 配置
+"""
+import json
+import os
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+
+from panel_schema import validate_category, validate_severity, validate_summary
+
+
+EXTRACT_TEXT_MAX_LEN = int(os.environ.get("EXTRACT_TEXT_MAX_LEN", "4000"))
+
+
+def _call_dashscope_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]:
+    """调用阿里云 DashScope 从新闻全文中提取精确结构化数据，仅填写报道明确给出的数字与事实。"""
+    api_key = os.environ.get("DASHSCOPE_API_KEY", "").strip()
+    if not api_key or not text or len(str(text).strip()) < 10:
+        return None
+    try:
+        import dashscope
+        from http import HTTPStatus
+
+        dashscope.api_key = api_key
+        raw = str(text).strip()[:EXTRACT_TEXT_MAX_LEN]
+
+        prompt = f"""从以下美伊/中东新闻**全文或摘要**中，提取**报道明确给出的数字与事实**，输出 JSON。规则：
+1. 仅填写报道中**直接出现、可核对**的数据，不要推测或估算。
+2. 无明确依据的字段**必须省略**，不要填 0 或猜。
+3. **战损一律按增量**：只填本则报道中「本次/此次/今日」**新增**数量。报道若只给「累计总数」「迄今共」**不要填**该字段。
+4. **攻击地点**：提取双方遭袭地点。美军/盟军基地被打击 → side=us；伊朗/亲伊设施被打击 → side=iran。name_keywords 用「中文|英文」，可填多处。
+
+字段：
+- summary: 1-2 句中文事实概括，≤80 字
+- category: deployment|alert|intel|diplomatic|other
+- severity: low|medium|high|critical
+- 战损（**仅填本则报道的新增增量**）: us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded, us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded, us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged, us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles, us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines, us_carriers, iran_carriers, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
+- retaliation_sentiment: 0-100（仅当报道涉及伊朗报复情绪时）
+- wall_street_value: 0-100（仅当报道涉及美股/市场时）
+- key_location_updates: **双方攻击地点**。每项 {{"name_keywords":"阿萨德|asad","side":"us或iran（被打击方）","status":"attacked","damage_level":1-3}}。美军基地：阿萨德|asad、乌代德|udeid、埃尔比勒|erbil、因吉尔利克|incirlik。伊朗：德黑兰|tehran、布什尔|bushehr、伊斯法罕|isfahan、阿巴斯|abbas、纳坦兹|natanz
+- **导弹消耗增量**（仅当报道明确提到「发射/消耗 了 X 枚导弹」时填）: us_missile_consumed_delta, iran_missile_consumed_delta（本则该方新增消耗枚数，整数）
+
+原文：
+{raw}
+
+直接输出 JSON，不要其他解释："""
+
+        response = dashscope.Generation.call(
+            model="qwen-turbo",
+            messages=[{"role": "user", "content": prompt}],
+            result_format="message",
+            max_tokens=512,
+        )
+
+        if response.status_code != HTTPStatus.OK:
+            return None
+        raw = (response.output.get("choices", [{}])[0].get("message", {}).get("content", "") or "").strip()
+        raw = re.sub(r"^```\w*\s*|\s*```$", "", raw)
+        return json.loads(raw)
+    except Exception:
+        return None
+
+
+def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
+    """
+    从新闻文本提取结构化数据，符合面板 schema
+    返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, key_location_updates? }
+    """
+    ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+    out: Dict[str, Any] = {}
+    parsed = _call_dashscope_extract(text)
+    if not parsed:
+        return out
+
+    if parsed.get("summary"):
+        out["situation_update"] = {
+            "summary": validate_summary(str(parsed["summary"])[:120], 120),
+            "category": validate_category(str(parsed.get("category", "other")).lower()),
+            "severity": validate_severity(str(parsed.get("severity", "medium")).lower()),
+            "timestamp": ts,
+        }
+
+    loss_us = {}
+    loss_ir = {}
+    for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded",
+              "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles",
+              "drones", "missiles", "helicopters", "submarines", "carriers", "civilian_ships", "airport_port"]:
+        uk, ik = f"us_{k}", f"iran_{k}"
+        if uk in parsed and isinstance(parsed[uk], (int, float)):
+            loss_us[k] = max(0, int(parsed[uk]))
+        if ik in parsed and isinstance(parsed[ik], (int, float)):
+            loss_ir[k] = max(0, int(parsed[ik]))
+    if loss_us or loss_ir:
+        out["combat_losses_delta"] = {}
+        if loss_us:
+            out["combat_losses_delta"]["us"] = loss_us
+        if loss_ir:
+            out["combat_losses_delta"]["iran"] = loss_ir
+
+    if "retaliation_sentiment" in parsed:
+        v = parsed["retaliation_sentiment"]
+        if isinstance(v, (int, float)) and 0 <= v <= 100:
+            out["retaliation"] = {"value": int(v), "time": ts}
+
+    if "wall_street_value" in parsed:
+        v = parsed["wall_street_value"]
+        if isinstance(v, (int, float)) and 0 <= v <= 100:
+            out["wall_street"] = {"time": ts, "value": int(v)}
+
+    # force_summary 增量：导弹消耗（看板「导弹消耗」）
+    fs_delta = {}
+    for key, side in [("us_missile_consumed_delta", "us"), ("iran_missile_consumed_delta", "iran")]:
+        v = parsed.get(key)
+        if isinstance(v, (int, float)) and v > 0:
+            fs_delta[side] = {"missile_consumed": min(500, int(v))}
+    if fs_delta:
+        out["force_summary_delta"] = fs_delta
+
+    if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list):
+        valid = []
+        for u in parsed["key_location_updates"]:
+            if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"):
+                valid.append({
+                    "name_keywords": str(u["name_keywords"]),
+                    "side": u["side"],
+                    "status": str(u.get("status", "attacked"))[:20],
+                    "damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2,
+                })
+        if valid:
+            out["key_location_updates"] = valid
+
+    return out
--- a/crawler/extractor_rules.py
+++ b/crawler/extractor_rules.py
@@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+"""
+基于规则的新闻数据提取（无需 Ollama）
+从新闻文本中提取战损、报复情绪等数值，供 db_merge 写入
+"""
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+
+
+def _first_int(text: str, pattern: str) -> Optional[int]:
+    m = re.search(pattern, text, re.I)
+    if m and m.group(1) and m.group(1).replace(",", "").isdigit():
+        return max(0, int(m.group(1).replace(",", "")))
+    return None
+
+
+def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
+    """
+    规则提取：匹配数字+关键词，输出符合 panel schema 的字段（无需 Ollama）
+    """
+    ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
+    out: Dict[str, Any] = {}
+    t = (text or "").lower()
+
+    loss_us, loss_ir = {}, {}
+
+    # 美军人员伤亡（中文，优先匹配）
+    v = _first_int(t, r"造成\s*(\d+)\s*名?\s*美军\s*伤亡")
+    if v is not None:
+        loss_us["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)\s*名?\s*美军\s*伤亡") if loss_us.get("personnel_killed") is None else None
+    if v is not None:
+        loss_us["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国军队|美国)\s*(?:死亡|阵亡)")
+    if v is not None:
+        loss_us["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国)\s*受伤")
+    if v is None and ("美军" in (text or "") or "美国" in (text or "")):
+        v = _first_int(text or t, r"另有\s*(\d+)\s*人\s*受伤")
+    if v is not None:
+        loss_us["personnel_wounded"] = v
+    v = _first_int(t, r"美军\s*伤亡\s*(\d+)")
+    if v is not None and loss_us.get("personnel_killed") is None:
+        loss_us["personnel_killed"] = v
+
+    # 美军人员伤亡（英文）
+    v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
+    if v is not None:
+        loss_us["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:us|american)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
+    if v is not None:
+        loss_us["personnel_killed"] = v
+    v = _first_int(t, r"(?:us|american)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
+    if v is not None:
+        loss_us["personnel_wounded"] = v
+
+    # 伊朗人员伤亡（中文）
+    v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*伤亡")
+    if v is not None:
+        loss_ir["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)\s*名?\s*(?:伊朗|伊朗军队)[\s\w]*(?:死亡|阵亡)")
+    if v is not None:
+        loss_ir["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*受伤")
+    if v is not None:
+        loss_ir["personnel_wounded"] = v
+
+    # 伊朗人员伤亡（英文）
+    v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|guard|killed|dead)")
+    if v is not None:
+        loss_ir["personnel_killed"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:iranian|iran)[\s\w]*(?:troop|soldier|guard|killed|dead)")
+    if v is not None:
+        loss_ir["personnel_killed"] = v
+    v = _first_int(t, r"(?:iran|iranian)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
+    if v is not None:
+        loss_ir["personnel_wounded"] = v
+
+    # 平民伤亡（中英文，按阵营归属）
+    v = _first_int(t, r"(\d+)\s*名?\s*平民\s*(?:伤亡|死亡)")
+    if v is not None:
+        if "伊朗" in text or "iran" in t:
+            loss_ir["civilian_killed"] = v
+        else:
+            loss_us["civilian_killed"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)") if loss_us.get("civilian_killed") is None and loss_ir.get("civilian_killed") is None else None
+    if v is not None:
+        if "iran" in t:
+            loss_ir["civilian_killed"] = v
+        else:
+            loss_us["civilian_killed"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:wounded|injured)")
+    if v is not None:
+        if "iran" in t:
+            loss_ir["civilian_wounded"] = v
+        else:
+            loss_us["civilian_wounded"] = v
+    v = _first_int(text or t, r"伊朗[\s\w]*(?:空袭|打击)[\s\w]*造成[^\d]*(\d+)[\s\w]*(?:平民|人|伤亡)")
+    if v is not None:
+        loss_ir["civilian_killed"] = v
+
+    # 基地损毁（仅匹配 base/基地，排除"军事目标"等泛指）
+    skip_bases = "军事目标" in (text or "") and "基地" not in (text or "") and "base" not in t
+    if not skip_bases:
+        v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)")
+        if v is not None:
+            loss_us["bases_destroyed"] = v
+        v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)")
+        if v is not None:
+            loss_us["bases_damaged"] = v
+        if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"):
+            loss_us["bases_destroyed"] = 1
+        if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"):
+            loss_us["bases_damaged"] = 1
+
+    # 战机 / 舰船（根据上下文判断阵营）
+    v = _first_int(t, r"(\d+)[\s\w]*(?:aircraft|plane|jet|fighter|f-?16|f-?35|f-?18)[\s\w]*(?:down|destroyed|lost|shot)")
+    if v is not None:
+        if "us" in t or "american" in t or "u.s" in t:
+            loss_us["aircraft"] = v
+        elif "iran" in t:
+            loss_ir["aircraft"] = v
+        else:
+            loss_us["aircraft"] = v
+    v = _first_int(t, r"(\d+)[\s\w]*(?:ship|destroyer|warship|vessel)[\s\w]*(?:hit|damaged|sunk)")
+    if v is not None:
+        if "iran" in t:
+            loss_ir["warships"] = v
+        else:
+            loss_us["warships"] = v
+
+    # 无人机 drone / uav / 无人机
+    v = _first_int(t, r"(\d+)[\s\w]*(?:drone|uav|无人机)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
+    if v is None:
+        v = _first_int(text or t, r"(?:击落|摧毁)[^\d]*(\d+)[\s\w]*(?:drone|uav|无人机|架)")
+    if v is None:
+        v = _first_int(t, r"(?:drone|uav|无人机)[\s\w]*(\d+)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
+    if v is not None:
+        if "iran" in t or "iranian" in t or "shahed" in t or "沙希德" in t or "伊朗" in (text or ""):
+            loss_ir["drones"] = v
+        else:
+            loss_us["drones"] = v
+
+    # 导弹 missile / 导弹
+    v = _first_int(t, r"(\d+)[\s\w]*(?:missile|导弹)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截|击落)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["missiles"] = v
+        else:
+            loss_us["missiles"] = v
+    v = _first_int(t, r"(?:missile|导弹)[\s\w]*(\d+)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截)") if not loss_us.get("missiles") and not loss_ir.get("missiles") else None
+    if v is not None:
+        if "iran" in t:
+            loss_ir["missiles"] = v
+        else:
+            loss_us["missiles"] = v
+
+    # 直升机 helicopter / 直升机
+    v = _first_int(t, r"(\d+)[\s\w]*(?:helicopter|直升机)[\s\w]*(?:down|destroyed|crashed|crashes|击落|坠毁)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["helicopters"] = v
+        else:
+            loss_us["helicopters"] = v
+
+    # 潜艇 submarine / 潜艇
+    v = _first_int(t, r"(\d+)[\s\w]*(?:submarine|潜艇)[\s\w]*(?:sunk|damaged|hit|destroyed|击沉|受损)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["submarines"] = v
+        else:
+            loss_us["submarines"] = v
+
+    # 航母 carrier / 航空母舰 / 航母
+    v = _first_int(t, r"(\d+)[\s\w]*(?:carrier|aircraft\s*carrier|航母|航空母舰)[\s\w]*(?:destroyed|damaged|lost|hit|sunk|摧毁|损毁|击毁|沉没)")
+    if v is not None:
+        if "iran" in t or "iranian" in t:
+            loss_ir["carriers"] = v
+        else:
+            loss_us["carriers"] = v
+
+    # 民船 civilian ship / 商船 / 民船
+    v = _first_int(t, r"(\d+)[\s\w]*(?:civilian\s*ship|merchant|商船|民船)[\s\w]*(?:sunk|damaged|hit|击沉|受损)")
+    if v is None:
+        v = _first_int(text or t, r"(?:民船|商船|货船)[\s\w]*(\d+)[\s\w]*(?:艘)?[\s\w]*(?:击沉|受损|袭击)")
+    if v is not None:
+        if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
+            loss_ir["civilian_ships"] = v
+        else:
+            loss_us["civilian_ships"] = v
+
+    # 机/港 airport / port / 机场 / 港口
+    v = _first_int(t, r"(\d+)[\s\w]*(?:airport|port|机场|港口)[\s\w]*(?:destroyed|damaged|hit|struck|摧毁|受损|袭击)")
+    if v is None:
+        v = _first_int(text or t, r"(?:机场|港口)[\s\w]*(\d+)[\s\w]*(?:处|个)?[\s\w]*(?:受损|袭击|摧毁)")
+    if v is not None:
+        if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
+            loss_ir["airport_port"] = v
+        else:
+            loss_us["airport_port"] = v
+
+    if loss_us:
+        out.setdefault("combat_losses_delta", {})["us"] = loss_us
+    if loss_ir:
+        out.setdefault("combat_losses_delta", {})["iran"] = loss_ir
+    if "retaliat" in t or "revenge" in t or "报复" in t or "反击" in t:
+        out["retaliation"] = {"value": 75, "time": ts}
+    if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t:
+        out["wall_street"] = {"time": ts, "value": 55}
+
+    # key_location_updates：受袭基地（与 key_location.name 匹配）
+    # 新闻提及基地遭袭时，更新对应基地 status；放宽触发词以匹配更多英文报道
+    attack_words = ("attack" in t or "attacked" in t or "hit" in t or "strike" in t or "struck" in t or "strikes" in t
+                    or "damage" in t or "damaged" in t or "target" in t or "targeted" in t or "bomb" in t or "bombed" in t
+                    or "袭击" in (text or "") or "遭袭" in (text or "") or "打击" in (text or "") or "受损" in (text or "") or "摧毁" in (text or ""))
+    base_attacked = ("base" in t or "基地" in t or "outpost" in t or "facility" in t) and attack_words
+    if base_attacked:
+        updates: list = []
+        # 常见美军基地关键词 -> name_keywords（用于 db_merge 的 LIKE 匹配，需与 key_location.name 能匹配）
+        bases_all = [
+            ("阿萨德|阿因|asad|assad|ain", "us"),
+            ("巴格达|baghdad", "us"),
+            ("乌代德|udeid|卡塔尔|qatar", "us"),
+            ("阿克罗蒂里|akrotiri|塞浦路斯|cyprus", "us"),
+            ("巴格拉姆|bagram|阿富汗|afghanistan", "us"),
+            ("埃尔比勒|erbil", "us"),
+            ("因吉尔利克|incirlik|土耳其|turkey", "us"),
+            ("苏尔坦|sultan|沙特|saudi", "us"),
+            ("坦夫|tanf|叙利亚|syria", "us"),
+            ("达夫拉|dhafra|阿联酋|uae", "us"),
+            ("内瓦提姆|nevatim|拉蒙|ramon|以色列|israel", "us"),
+            ("赛利耶|sayliyah", "us"),
+            ("巴林|bahrain", "us"),
+            ("科威特|kuwait", "us"),
+            # 伊朗基地
+            ("阿巴斯港|abbas|bandar abbas", "iran"),
+            ("德黑兰|tehran", "iran"),
+            ("布什尔|bushehr", "iran"),
+            ("伊斯法罕|isfahan|esfahan", "iran"),
+            ("纳坦兹|natanz", "iran"),
+            ("米纳布|minab", "iran"),
+            ("卡拉季|karaj", "iran"),
+            ("克尔曼沙赫|kermanshah", "iran"),
+            ("大不里士|tabriz", "iran"),
+            ("霍尔木兹|hormuz", "iran"),
+        ]
+        for kws, side in bases_all:
+            if any(k in t for k in kws.split("|")):
+                updates.append({"name_keywords": kws, "side": side, "status": "attacked", "damage_level": 2})
+        if updates:
+            out["key_location_updates"] = updates
+
+    return out
--- a/crawler/main.py
+++ b/crawler/main.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""爬虫入口：定时执行完整写库流水线（抓取 → 清洗 → 去重 → 映射 → 更新表 → 通知 API）"""
+import time
+import sys
+from pathlib import Path
+
+# 确保能导入 config
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+
+from config import DB_PATH, API_BASE, CRAWL_INTERVAL
+from pipeline import run_full_pipeline
+
+
+def run_once() -> int:
+    """执行一轮：抓取、清洗、去重、映射、写表、通知。返回本轮新增条数（面板或资讯）。"""
+    n_fetched, n_news, n_panel = run_full_pipeline(
+        db_path=DB_PATH,
+        api_base=API_BASE,
+        translate=True,
+        notify=True,
+    )
+    return n_panel or n_news
+
+
+def main() -> None:
+    print("Crawler started. DB:", DB_PATH)
+    print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
+    while True:
+        try:
+            n = run_once()
+            if n > 0:
+                print(f"[{time.strftime('%H:%M:%S')}] 抓取完成，去重后新增 {n} 条，已写库并通知 API")
+        except KeyboardInterrupt:
+            break
+        except Exception as e:
+            print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
+        time.sleep(CRAWL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
--- a/crawler/news_storage.py
+++ b/crawler/news_storage.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+"""
+资讯内容独立存储，支持历史去重
+爬虫拉回数据 → 计算 content_hash → 若已存在则跳过（去重）→ 新数据落库 news_content
+"""
+import hashlib
+import os
+import re
+import sqlite3
+from datetime import datetime, timezone
+from typing import List, Optional, Tuple
+
+from config import DB_PATH
+
+
+def _to_utc_iso(dt: datetime) -> str:
+    if dt.tzinfo:
+        dt = dt.astimezone(timezone.utc)
+    return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
+
+
+def _normalize_for_hash(text: str) -> str:
+    """归一化文本用于生成去重 hash"""
+    if not text:
+        return ""
+    t = re.sub(r"\s+", " ", str(text).strip().lower())[:600]
+    return re.sub(r"[\x00-\x1f]", "", t)
+
+
+def content_hash(title: str, summary: str, url: str) -> str:
+    """根据标题、摘要、URL 生成去重 hash，相似内容视为重复"""
+    raw = _normalize_for_hash(title) + "|" + _normalize_for_hash(summary) + "|" + (url or "").strip()
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
+
+
+def _ensure_table(conn: sqlite3.Connection) -> None:
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS news_content (
+            id TEXT PRIMARY KEY,
+            content_hash TEXT NOT NULL UNIQUE,
+            title TEXT NOT NULL,
+            summary TEXT NOT NULL,
+            url TEXT NOT NULL DEFAULT '',
+            source TEXT NOT NULL DEFAULT '',
+            published_at TEXT NOT NULL,
+            category TEXT NOT NULL DEFAULT 'other',
+            severity TEXT NOT NULL DEFAULT 'medium',
+            created_at TEXT NOT NULL DEFAULT (datetime('now'))
+        )
+    """)
+    try:
+        conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_news_content_hash ON news_content(content_hash)")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_news_content_pub ON news_content(published_at DESC)")
+    except sqlite3.OperationalError:
+        pass
+    conn.commit()
+
+
+def exists_by_hash(conn: sqlite3.Connection, h: str) -> bool:
+    row = conn.execute("SELECT 1 FROM news_content WHERE content_hash = ? LIMIT 1", (h,)).fetchone()
+    return row is not None
+
+
+def insert_news(
+    conn: sqlite3.Connection,
+    *,
+    title: str,
+    summary: str,
+    url: str = "",
+    source: str = "",
+    published: datetime,
+    category: str = "other",
+    severity: str = "medium",
+) -> Optional[str]:
+    """
+    插入资讯，若 content_hash 已存在则跳过（去重）
+    返回: 新插入的 id，或 None 表示重复跳过
+    """
+    _ensure_table(conn)
+    h = content_hash(title, summary, url)
+    if exists_by_hash(conn, h):
+        return None
+    uid = "nc_" + hashlib.sha256(f"{h}{datetime.utcnow().isoformat()}".encode()).hexdigest()[:14]
+    ts = _to_utc_iso(published)
+    conn.execute(
+        """INSERT INTO news_content (id, content_hash, title, summary, url, source, published_at, category, severity)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+        (uid, h, (title or "")[:500], (summary or "")[:2000], (url or "")[:500], (source or "")[:100], ts, category, severity),
+    )
+    conn.commit()
+    return uid
+
+
+def save_and_dedup(items: List[dict], db_path: Optional[str] = None) -> Tuple[List[dict], int]:
+    """
+    去重后落库 news_content
+    items: [{"title","summary","url","published","category","severity","source"?}, ...]
+    返回: (通过去重的新项列表, 实际新增条数)
+    """
+    path = db_path or DB_PATH
+    if not os.path.exists(path):
+        return [], 0
+    conn = sqlite3.connect(path, timeout=10)
+    try:
+        _ensure_table(conn)
+        new_items: List[dict] = []
+        count = 0
+        for u in items:
+            title = (u.get("title") or "")[:500]
+            summary = (u.get("summary") or u.get("title") or "")[:2000]
+            url = (u.get("url") or "")[:500]
+            source = (u.get("source") or "")[:100]
+            pub = u.get("published")
+            if isinstance(pub, str):
+                try:
+                    pub = datetime.fromisoformat(pub.replace("Z", "+00:00"))
+                except ValueError:
+                    pub = datetime.now(timezone.utc)
+            elif pub is None:
+                pub = datetime.now(timezone.utc)
+            cat = u.get("category", "other")
+            sev = u.get("severity", "medium")
+            uid = insert_news(
+                conn,
+                title=title,
+                summary=summary,
+                url=url,
+                source=source,
+                published=pub,
+                category=cat,
+                severity=sev,
+            )
+            if uid:
+                count += 1
+                new_items.append({**u, "news_id": uid})
+        return new_items, count
+    finally:
+        conn.close()
--- a/crawler/panel_schema.py
+++ b/crawler/panel_schema.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+"""
+前端面板完整数据 schema，与 DB / situationData / useReplaySituation 对齐
+爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端
+"""
+from typing import Any, Dict, List, Optional, Tuple
+
+# 事件脉络
+SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
+SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical")
+SUMMARY_MAX_LEN = 120
+
+# 战损
+CombatLossesRow = Dict[str, Any]  # bases_destroyed, bases_damaged, personnel_killed, ...
+
+# 时间序列（回放用）
+TimeSeriesPoint = Tuple[str, int]  # (ISO time, value)
+
+# AI 可从新闻中提取的字段
+EXTRACTABLE_FIELDS = {
+    "situation_update": ["summary", "category", "severity", "timestamp"],
+    "combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "tanks", "carriers", "civilian_ships", "airport_port"],
+    "retaliation": ["value"],  # 0-100
+    "wall_street_trend": ["time", "value"],  # 0-100
+    "conflict_stats": ["estimated_casualties", "estimated_strike_count"],
+}
+
+
+def validate_category(cat: str) -> str:
+    return cat if cat in SITUATION_UPDATE_CATEGORIES else "other"
+
+
+def validate_severity(sev: str) -> str:
+    return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium"
+
+
+def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str:
+    import re
+    if not s or not isinstance(s, str):
+        return ""
+    t = re.sub(r"\s+", " ", str(s).strip())[:max_len]
+    return re.sub(r"[\x00-\x1f]", "", t).rstrip()
--- a/crawler/parser.py
+++ b/crawler/parser.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+"""新闻分类与严重度判定"""
+import re
+from typing import List
+
+try:
+    from typing import Literal  # type: ignore
+except ImportError:
+    try:
+        from typing_extensions import Literal  # type: ignore
+    except ImportError:
+        from typing import Any
+
+        class _LiteralFallback:
+            def __getitem__(self, item):
+                return Any
+
+        Literal = _LiteralFallback()
+
+Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
+Severity = Literal["low", "medium", "high", "critical"]
+
+# 分类关键词
+CAT_DEPLOYMENT = ["deploy", "carrier", "航母", "military build", "troop", "forces"]
+CAT_ALERT = ["strike", "attack", "fire", "blast", "hit", "爆炸", "袭击", "打击"]
+CAT_INTEL = ["satellite", "intel", "image", "surveillance", "卫星", "情报"]
+CAT_DIPLOMATIC = ["talk", "negotiation", "diplomat", "sanction", "谈判", "制裁"]
+
+
+def _match(text: str, words: List[str]) -> bool:
+    t = (text or "").lower()
+    for w in words:
+        if w.lower() in t:
+            return True
+    return False
+
+
+def classify(text: str) -> Category:
+    if _match(text, CAT_ALERT):
+        return "alert"
+    if _match(text, CAT_DEPLOYMENT):
+        return "deployment"
+    if _match(text, CAT_INTEL):
+        return "intel"
+    if _match(text, CAT_DIPLOMATIC):
+        return "diplomatic"
+    return "other"
+
+
+def severity(text: str, category: Category) -> Severity:
+    t = (text or "").lower()
+    critical = [
+        "nuclear", "核", "strike", "attack", "killed", "dead", "casualty",
+        "war", "invasion", "袭击", "打击", "死亡",
+    ]
+    high = [
+        "missile", "drone", "bomb", "explosion", "blasted", "fire",
+        "导弹", "无人机", "爆炸", "轰炸",
+    ]
+    if _match(t, critical):
+        return "critical"
+    if _match(t, high) or category == "alert":
+        return "high"
+    if category == "deployment":
+        return "medium"
+    return "low"
--- a/crawler/parser_ai.py
+++ b/crawler/parser_ai.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+"""
+AI 新闻分类与严重度判定
+优先 DASHSCOPE_API_KEY（通义，无需 Ollama），否则 Ollama，最后规则
+设置 PARSER_AI_DISABLED=1 可只用规则（更快）
+"""
+import os
+from typing import Any, Optional, Tuple
+
+try:
+    from typing import Literal  # type: ignore
+except ImportError:
+    try:
+        from typing_extensions import Literal  # type: ignore
+    except ImportError:
+        class _LiteralFallback:
+            def __getitem__(self, item):
+                return Any
+
+        Literal = _LiteralFallback()
+
+Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
+Severity = Literal["low", "medium", "high", "critical"]
+
+PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
+DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()
+
+_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
+_SEVERITIES = ("low", "medium", "high", "critical")
+
+
+def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
+    """从 AI 回复解析 category:severity"""
+    t = (text or "").strip().lower()
+    cat, sev = "other", "low"
+    for c in _CATEGORIES:
+        if c in t:
+            cat = c
+            break
+    for s in _SEVERITIES:
+        if s in t:
+            sev = s
+            break
+    return cat, sev  # type: ignore
+
+
+def _call_dashscope(text: str, timeout: int = 6) -> Optional[Tuple[Category, Severity]]:
+    """调用阿里云通义（DashScope）分类，无需 Ollama。需设置 DASHSCOPE_API_KEY"""
+    if not DASHSCOPE_API_KEY or PARSER_AI_DISABLED:
+        return None
+    try:
+        import dashscope
+        from http import HTTPStatus
+        dashscope.api_key = DASHSCOPE_API_KEY
+        prompt = f"""Classify this news about US-Iran/middle east (one line only):
+- category: deployment|alert|intel|diplomatic|other
+- severity: low|medium|high|critical
+
+News: {text[:300]}
+
+Reply format: category:severity (e.g. alert:high)"""
+        r = dashscope.Generation.call(
+            model="qwen-turbo",
+            messages=[{"role": "user", "content": prompt}],
+            result_format="message",
+            max_tokens=32,
+        )
+        if r.status_code != HTTPStatus.OK:
+            return None
+        out = r.output.get("choices", [{}])[0].get("message", {}).get("content", "")
+        return _parse_ai_response(out)
+    except Exception:
+        return None
+
+
+def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
+    """调用 Ollama 本地模型。需先运行 ollama run llama3.1"""
+    if PARSER_AI_DISABLED:
+        return None
+    try:
+        import requests
+        prompt = f"""Classify this news about US-Iran/middle east (one line only):
+- category: deployment|alert|intel|diplomatic|other
+- severity: low|medium|high|critical
+
+News: {text[:300]}
+
+Reply format: category:severity (e.g. alert:high)"""
+        r = requests.post(
+            "http://localhost:11434/api/chat",
+            json={
+                "model": OLLAMA_MODEL,
+                "messages": [{"role": "user", "content": prompt}],
+                "stream": False,
+                "options": {"num_predict": 32},
+            },
+            timeout=timeout,
+        )
+        if r.status_code != 200:
+            return None
+        out = r.json().get("message", {}).get("content", "")
+        return _parse_ai_response(out)
+    except Exception:
+        return None
+
+
+def _rule_classify(text: str) -> Category:
+    from parser import classify
+    return classify(text)
+
+
+def _rule_severity(text: str, category: Category) -> Severity:
+    from parser import severity
+    return severity(text, category)
+
+
+def _call_ai(text: str) -> Optional[Tuple[Category, Severity]]:
+    """优先通义，再 Ollama"""
+    if DASHSCOPE_API_KEY:
+        return _call_dashscope(text)
+    return _call_ollama(text)
+
+
+def classify(text: str) -> Category:
+    """分类。AI 失败时回退规则"""
+    res = _call_ai(text)
+    if res:
+        return res[0]
+    return _rule_classify(text)
+
+
+def severity(text: str, category: Category) -> Severity:
+    """严重度。AI 失败时回退规则"""
+    res = _call_ai(text)
+    if res:
+        return res[1]
+    return _rule_severity(text, category)
+
+
+def classify_and_severity(text: str) -> Tuple[Category, Severity]:
+    """一次调用返回分类和严重度（减少 AI 调用）"""
+    if PARSER_AI_DISABLED:
+        from parser import classify, severity
+        c = classify(text)
+        return c, severity(text, c)
+    res = _call_ai(text)
+    if res:
+        return res
+    return _rule_classify(text), _rule_severity(text, _rule_classify(text))
--- a/crawler/pipeline.py
+++ b/crawler/pipeline.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+"""
+统一写库流水线：抓取 → 清洗 → 去重 → 映射到前端库字段 → 更新表 → 通知
+与 server/README.md 第五节「爬虫侧写库链路」一致，供 main.py 与 realtime_conflict_service 共用。
+"""
+import os
+from datetime import datetime, timezone
+from typing import Callable, Optional, Tuple
+
+from config import DB_PATH, API_BASE
+from db_writer import touch_situation_updated_at_path
+
+
+def _notify_api(api_base: str) -> bool:
+    """调用 Node API 触发立即广播"""
+    try:
+        import urllib.request
+        token = os.environ.get("API_CRAWLER_TOKEN", "").strip()
+        req = urllib.request.Request(
+            f"{api_base.rstrip('/')}/api/crawler/notify",
+            method="POST",
+            headers={
+                "Content-Type": "application/json",
+                **({"X-Crawler-Token": token} if token else {}),
+            },
+        )
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return resp.status == 200
+    except Exception as e:
+        print(f"  [warn] notify API failed: {e}")
+        return False
+
+
+def _extract_and_merge(items: list, db_path: str) -> bool:
+    """AI 从新闻全文或标题+摘要中提取精确结构化数据，合并到 combat_losses / key_location 等表。"""
+    if not items or not os.path.exists(db_path):
+        return False
+    try:
+        from db_merge import merge
+        use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
+        if use_dashscope:
+            from extractor_dashscope import extract_from_news
+            limit = 10
+        elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
+            from extractor_rules import extract_from_news
+            limit = 25
+        else:
+            from extractor_ai import extract_from_news
+            limit = 10
+        merged_any = False
+        for it in items[:limit]:
+            # 优先用正文（article_fetcher 抓取），否则用标题+摘要，供 AI 提取精确数字
+            text = it.get("full_text") or ((it.get("title", "") or "") + " " + (it.get("summary", "") or ""))
+            if len(text.strip()) < 20:
+                continue
+            pub = it.get("published")
+            ts = None
+            if pub:
+                try:
+                    if isinstance(pub, str):
+                        pub_dt = datetime.fromisoformat(pub.replace("Z", "+00:00"))
+                    else:
+                        pub_dt = pub
+                    if pub_dt.tzinfo:
+                        pub_dt = pub_dt.astimezone(timezone.utc)
+                    ts = pub_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
+                except Exception:
+                    pass
+            extracted = extract_from_news(text, timestamp=ts)
+            if extracted and merge(extracted, db_path=db_path):
+                merged_any = True
+        return merged_any
+    except Exception as e:
+        print(f"  [warn] AI 面板数据提取/合并: {e}")
+        return False
+
+
+def run_full_pipeline(
+    db_path: Optional[str] = None,
+    api_base: Optional[str] = None,
+    *,
+    translate: bool = True,
+    notify: bool = True,
+    on_notify: Optional[Callable[[], None]] = None,
+) -> Tuple[int, int, int]:
+    """
+    执行完整写库链路：
+    1. 爬虫抓取实时数据
+    2. AI 清洗（标题/摘要/分类）→ 有效数据
+    3. 去重（news_content content_hash）→ 仅新项进入后续
+    4. 有效数据映射到前端库字段（situation_update、news_content、combat_losses 等）
+    5. 更新数据库表；若有更新则通知后端
+
+    translate: 是否对标题/摘要做翻译（英→中）
+    notify: 是否在流水线末尾调用 POST /api/crawler/notify
+    on_notify: 若提供，在通知前调用（供 gdelt 服务做 GDELT 回填等）
+
+    返回: (本轮抓取条数, 去重后新增资讯数, 写入 situation_update 条数)
+    """
+    path = db_path or DB_PATH
+    base = api_base or API_BASE
+
+    from scrapers.rss_scraper import fetch_all
+    from db_writer import write_updates
+    from news_storage import save_and_dedup
+    from cleaner_ai import clean_news_for_panel, ensure_category, ensure_severity
+
+    # 1. 抓取
+    items = fetch_all()
+    if not items:
+        return 0, 0, 0
+
+    # 可选：仅保留指定起始时间之后的条目（如 CRAWL_START_DATE=2026-02-28T00:00:00）
+    start_date_env = os.environ.get("CRAWL_START_DATE", "").strip()
+    if start_date_env:
+        try:
+            raw = start_date_env.replace("Z", "+00:00").strip()
+            start_dt = datetime.fromisoformat(raw)
+            if start_dt.tzinfo is None:
+                start_dt = start_dt.replace(tzinfo=timezone.utc)
+            else:
+                start_dt = start_dt.astimezone(timezone.utc)
+            before = len(items)
+            items = [it for it in items if (it.get("published") or datetime.min.replace(tzinfo=timezone.utc)) >= start_dt]
+            if before > len(items):
+                print(f"  [pipeline] 按 CRAWL_START_DATE={start_date_env} 过滤后保留 {len(items)} 条（原 {before} 条）")
+        except Exception as e:
+            print(f"  [warn] CRAWL_START_DATE 解析失败，忽略: {e}")
+
+    if not items:
+        return 0, 0, 0
+    n_total = len(items)
+    print(f"  [pipeline] 抓取 {n_total} 条")
+    for i, it in enumerate(items[:5]):
+        title = (it.get("title") or it.get("summary") or "").strip()[:60]
+        print(f"    [{i + 1}] {title}" + ("…" if len((it.get("title") or it.get("summary") or "")[:60]) >= 60 else ""))
+    if n_total > 5:
+        print(f"    ... 共 {n_total} 条")
+
+    # 2. 清洗（标题/摘要/分类，符合面板 schema）
+    if translate:
+        from translate_utils import translate_to_chinese
+        for it in items:
+            raw_title = translate_to_chinese(it.get("title", "") or "")
+            raw_summary = translate_to_chinese(it.get("summary", "") or it.get("title", ""))
+            it["title"] = clean_news_for_panel(raw_title, max_len=80)
+            it["summary"] = clean_news_for_panel(raw_summary or raw_title, max_len=120)
+    else:
+        for it in items:
+            it["title"] = clean_news_for_panel(it.get("title", "") or "", max_len=80)
+            it["summary"] = clean_news_for_panel(it.get("summary", "") or it.get("title", ""), max_len=120)
+    for it in items:
+        it["category"] = ensure_category(it.get("category", "other"))
+        it["severity"] = ensure_severity(it.get("severity", "medium"))
+        it["source"] = it.get("source") or "rss"
+
+    # 3. 去重：落库 news_content，仅新项返回
+    new_items, n_news = save_and_dedup(items, db_path=path)
+    if new_items:
+        print(f"  [pipeline] 去重后新增 {n_news} 条，写入事件脉络 {len(new_items)} 条")
+        for i, it in enumerate(new_items[:3]):
+            title = (it.get("title") or it.get("summary") or "").strip()[:55]
+            print(f"    新增 [{i + 1}] {title}" + ("…" if len((it.get("title") or it.get("summary") or "").strip()) > 55 else ""))
+
+    # 3.5 数据增强：为参与 AI 提取的条目抓取正文，便于从全文提取精确数据（伤亡、基地等）
+    if new_items:
+        try:
+            from article_fetcher import enrich_item_with_body
+            # 仅对前若干条抓取正文，避免单轮请求过多
+            enrich_limit = int(os.environ.get("ARTICLE_FETCH_LIMIT", "10"))
+            for it in new_items[:enrich_limit]:
+                enrich_item_with_body(it)
+        except Exception as e:
+            print(f"  [warn] 正文抓取: {e}")
+
+    # 4. 映射到前端库字段并更新表（与去重/AI 使用同一 db path）
+    n_panel = write_updates(new_items, db_path=path) if new_items else 0
+    if new_items:
+        _extract_and_merge(new_items, path)
+
+    # 4.5 每次运行都刷新 situation.updated_at，便于前端显示「最后抓取时间」（否则只有新增条目时才更新，数据会一直停在旧日期）
+    touch_situation_updated_at_path(db_path=path)
+
+    # 5. 通知（每次运行都通知，让 API 重载并广播最新 lastUpdated）
+    if on_notify:
+        on_notify()
+    if notify:
+        _notify_api(base)
+
+    return len(items), n_news, n_panel
--- a/crawler/pyproject.toml
+++ b/crawler/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "usa-crawler"
+version = "1.0.0"
+description = "GDELT + RSS 爬虫与实时冲突服务"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "requests>=2.32.0",
+    "feedparser>=6.0.10",
+    "beautifulsoup4>=4.12.0",
+    "pytest>=8.0.0",
+    "fastapi>=0.115.0",
+    "uvicorn[standard]>=0.32.0",
+    "deep-translator>=1.11.0",
+    "dashscope>=1.20.0",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
--- a/crawler/realtime_conflict_service.py
+++ b/crawler/realtime_conflict_service.py
@@ -0,0 +1,504 @@
+# -*- coding: utf-8 -*-
+"""
+GDELT 实时冲突抓取 + API 服务
+核心数据源：GDELT Project，约 15 分钟级更新，含经纬度、事件编码、参与方、事件强度
+"""
+import os
+# 直连外网，避免系统代理导致 ProxyError / 超时（需代理时设置 CRAWLER_USE_PROXY=1）
+if os.environ.get("CRAWLER_USE_PROXY") != "1":
+    os.environ.setdefault("NO_PROXY", "*")
+
+import hashlib
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import List, Optional
+
+import asyncio
+import logging
+import requests
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+logging.getLogger("uvicorn").setLevel(logging.INFO)
+app = FastAPI(title="GDELT Conflict Service")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
+
+# 配置
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
+API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
+QUERY = os.environ.get("GDELT_QUERY", "United States Iran military")
+MAX_RECORDS = int(os.environ.get("GDELT_MAX_RECORDS", "30"))
+FETCH_INTERVAL_SEC = int(os.environ.get("FETCH_INTERVAL_SEC", "60"))
+RSS_INTERVAL_SEC = int(os.environ.get("RSS_INTERVAL_SEC", "60"))  # 每分钟抓取世界主流媒体
+# 时间范围：1h=1小时 1d=1天 1week=1周；不设则默认 3 个月（易返回旧文）
+GDELT_TIMESPAN = os.environ.get("GDELT_TIMESPAN", "1d")
+# 设为 1 则跳过 GDELT，仅用 RSS 新闻作为事件脉络（GDELT 国外可能无法访问）
+GDELT_DISABLED = os.environ.get("GDELT_DISABLED", "0") == "1"
+
+# 伊朗攻击源（无经纬度时默认）
+IRAN_COORD = [51.3890, 35.6892]  # Tehran [lng, lat]
+
+# 请求直连，不经过系统代理（避免 ProxyError / 代理超时）
+_REQ_KW = {"timeout": 15, "headers": {"User-Agent": "US-Iran-Dashboard/1.0"}}
+if os.environ.get("CRAWLER_USE_PROXY") != "1":
+    _REQ_KW["proxies"] = {"http": None, "https": None}
+
+EVENT_CACHE: List[dict] = []
+
+
+# ==========================
+# 冲突强度评分 (1–10)
+# ==========================
+def calculate_impact_score(title: str) -> int:
+    score = 1
+    t = (title or "").lower()
+    if "missile" in t or "导弹" in t:
+        score += 3
+    if "strike" in t or "袭击" in t or "打击" in t:
+        score += 2
+    if "killed" in t or "death" in t or "casualt" in t or "死亡" in t or "伤亡" in t:
+        score += 4
+    if "troops" in t or "soldier" in t or "士兵" in t or "军人" in t:
+        score += 2
+    if "attack" in t or "attacked" in t or "攻击" in t:
+        score += 3
+    if "nuclear" in t or "核" in t:
+        score += 4
+    if "explosion" in t or "blast" in t or "bomb" in t or "爆炸" in t:
+        score += 2
+    return min(score, 10)
+
+
+# 根据 severity 映射到 impact_score
+def _severity_to_score(sev: str) -> int:
+    m = {"critical": 9, "high": 7, "medium": 5, "low": 2}
+    return m.get((sev or "").lower(), 5)
+
+
+# 根据文本推断坐标 [lng, lat]，用于 GDELT 禁用时 RSS→gdelt_events
+_LOC_COORDS = [
+    (["阿克罗蒂里", "akrotiri", "塞浦路斯", "cyprus"], (32.98, 34.58)),
+    (["巴格拉姆", "bagram", "阿富汗", "afghanistan"], (69.26, 34.95)),
+    (["巴格达", "baghdad", "伊拉克", "iraq"], (44.37, 33.31)),
+    (["贝鲁特", "beirut", "黎巴嫩", "lebanon"], (35.49, 33.89)),
+    (["耶路撒冷", "jerusalem", "特拉维夫", "tel aviv", "以色列", "israel"], (35.21, 31.77)),
+    (["阿巴斯港", "bandar abbas", "霍尔木兹", "hormuz"], (56.27, 27.18)),
+    (["米纳布", "minab"], (57.08, 27.13)),
+    (["德黑兰", "tehran", "伊朗", "iran"], (51.389, 35.689)),
+    (["大马士革", "damascus", "叙利亚", "syria"], (36.28, 33.50)),
+    (["迪拜", "dubai", "阿联酋", "uae"], (55.27, 25.20)),
+    (["沙特", "saudi"], (46.73, 24.71)),
+    (["巴基斯坦", "pakistan"], (73.06, 33.72)),
+    (["奥斯汀", "austin"], (-97.74, 30.27)),
+]
+
+
+def _infer_coords(text: str) -> tuple:
+    t = (text or "").lower()
+    for kws, (lng, lat) in _LOC_COORDS:
+        for k in kws:
+            if k in t:
+                return (lng, lat)
+    return (IRAN_COORD[0], IRAN_COORD[1])
+
+
+# ==========================
+# 获取 GDELT 实时事件
+# ==========================
+def _parse_article(article: dict) -> Optional[dict]:
+    title_raw = article.get("title") or article.get("seendate") or ""
+    if not title_raw:
+        return None
+    from translate_utils import translate_to_chinese
+    from cleaner_ai import clean_news_for_panel
+    title = translate_to_chinese(str(title_raw)[:500])
+    title = clean_news_for_panel(title, max_len=150)
+    url = article.get("url") or article.get("socialimage") or ""
+    seendate = article.get("seendate") or datetime.utcnow().isoformat()
+    lat = article.get("lat")
+    lng = article.get("lng")
+    # 无经纬度时使用伊朗坐标（攻击源）
+    if lat is None or lng is None:
+        lat, lng = IRAN_COORD[1], IRAN_COORD[0]
+    try:
+        lat, lng = float(lat), float(lng)
+    except (TypeError, ValueError):
+        lat, lng = IRAN_COORD[1], IRAN_COORD[0]
+    impact = calculate_impact_score(title_raw)
+    event_id = hashlib.sha256(f"{url}{seendate}".encode()).hexdigest()[:24]
+    return {
+        "event_id": event_id,
+        "event_time": seendate,
+        "title": title[:500],
+        "lat": lat,
+        "lng": lng,
+        "impact_score": impact,
+        "url": url,
+    }
+
+
+def fetch_gdelt_events() -> None:
+    if GDELT_DISABLED:
+        return
+    url = (
+        "https://api.gdeltproject.org/api/v2/doc/doc"
+        f"?query={QUERY}"
+        "&mode=ArtList"
+        "&format=json"
+        f"&maxrecords={MAX_RECORDS}"
+        f"&timespan={GDELT_TIMESPAN}"
+        "&sort=datedesc"
+    )
+    try:
+        resp = requests.get(url, **_REQ_KW)
+        resp.raise_for_status()
+        data = resp.json()
+        articles = data.get("articles", data) if isinstance(data, dict) else (data if isinstance(data, list) else [])
+        if not isinstance(articles, list):
+            articles = []
+        new_events = []
+        for a in articles:
+            ev = _parse_article(a) if isinstance(a, dict) else None
+            if ev:
+                new_events.append(ev)
+        # 按 event_time 排序，最新在前
+        new_events.sort(key=lambda e: e.get("event_time", ""), reverse=True)
+        global EVENT_CACHE
+        EVENT_CACHE = new_events
+        # 写入 SQLite 并通知 Node
+        _write_to_db(new_events)
+        _notify_node()
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] GDELT 更新 {len(new_events)} 条事件")
+    except Exception:
+        pass
+
+
+def _ensure_table(conn: sqlite3.Connection) -> None:
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS gdelt_events (
+            event_id TEXT PRIMARY KEY,
+            event_time TEXT NOT NULL,
+            title TEXT NOT NULL,
+            lat REAL NOT NULL,
+            lng REAL NOT NULL,
+            impact_score INTEGER NOT NULL,
+            url TEXT,
+            created_at TEXT DEFAULT (datetime('now'))
+        )
+    """)
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS conflict_stats (
+            id INTEGER PRIMARY KEY CHECK (id = 1),
+            total_events INTEGER NOT NULL,
+            high_impact_events INTEGER NOT NULL,
+            estimated_casualties INTEGER NOT NULL,
+            estimated_strike_count INTEGER NOT NULL,
+            updated_at TEXT NOT NULL
+        )
+    """)
+    conn.commit()
+
+
+def _write_to_db(events: List[dict]) -> None:
+    if not os.path.exists(DB_PATH):
+        return
+    conn = sqlite3.connect(DB_PATH, timeout=10)
+    try:
+        _ensure_table(conn)
+        for e in events:
+            conn.execute(
+                "INSERT OR REPLACE INTO gdelt_events (event_id, event_time, title, lat, lng, impact_score, url) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                (
+                    e["event_id"],
+                    e.get("event_time", ""),
+                    e.get("title", ""),
+                    e.get("lat", 0),
+                    e.get("lng", 0),
+                    e.get("impact_score", 1),
+                    e.get("url", ""),
+                ),
+            )
+        # 战损统计模型（展示用）
+        high = sum(1 for x in events if x.get("impact_score", 0) >= 7)
+        strikes = sum(1 for x in events if "strike" in (x.get("title") or "").lower() or "attack" in (x.get("title") or "").lower())
+        casualties = min(5000, high * 80 + len(events) * 10)  # 估算
+        conn.execute(
+            "INSERT OR REPLACE INTO conflict_stats (id, total_events, high_impact_events, estimated_casualties, estimated_strike_count, updated_at) VALUES (1, ?, ?, ?, ?, ?)",
+            (len(events), high, casualties, strikes, datetime.utcnow().isoformat()),
+        )
+        conn.execute(
+            "INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)",
+            (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),),
+        )
+        conn.commit()
+    except Exception as e:
+        print(f"写入 DB 失败: {e}")
+        conn.rollback()
+    finally:
+        conn.close()
+
+
+def _notify_node() -> None:
+    try:
+        headers = {}
+        token = os.environ.get("API_CRAWLER_TOKEN", "").strip()
+        if token:
+          headers["X-Crawler-Token"] = token
+        r = requests.post(
+            f"{API_BASE}/api/crawler/notify",
+            timeout=5,
+            headers=headers,
+            proxies={"http": None, "https": None},
+        )
+        if r.status_code != 200:
+            print("  [warn] notify API 失败")
+    except Exception as e:
+        print(f"  [warn] notify API: {e}")
+
+
+def _rss_to_gdelt_fallback() -> None:
+    """GDELT 禁用时，将 situation_update 同步到 gdelt_events，使地图有冲突点"""
+    if not GDELT_DISABLED or not os.path.exists(DB_PATH):
+        return
+    try:
+        conn = sqlite3.connect(DB_PATH, timeout=10)
+        rows = conn.execute(
+            "SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 50"
+        ).fetchall()
+        conn.close()
+        events = []
+        for r in rows:
+            uid, ts, cat, summary, sev = r
+            lng, lat = _infer_coords((summary or "")[:300])
+            impact = _severity_to_score(sev)
+            events.append({
+                "event_id": f"rss_{uid}",
+                "event_time": ts,
+                "title": (summary or "")[:500],
+                "lat": lat,
+                "lng": lng,
+                "impact_score": impact,
+                "url": "",
+            })
+        if events:
+            global EVENT_CACHE
+            EVENT_CACHE = events
+            _write_to_db(events)
+            _notify_node()
+    except Exception as e:
+        print(f"  [warn] RSS→gdelt fallback: {e}")
+
+
+# ==========================
+# RSS 新闻抓取：使用统一流水线（抓取 → 清洗 → 去重 → 映射 → 写表 → 通知）
+# ==========================
+LAST_FETCH = {"items": 0, "inserted": 0, "error": None}
+
+
+def _refresh_panel_data() -> int:
+    """从近期事件重新提取并合并战损/据点等面板实时数据，不依赖本轮是否有新 RSS。返回合并条数。"""
+    if not os.path.exists(DB_PATH):
+        return 0
+    try:
+        from db_merge import merge
+        use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
+        if use_dashscope:
+            from extractor_dashscope import extract_from_news
+        elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
+            from extractor_rules import extract_from_news
+        else:
+            from extractor_ai import extract_from_news
+        conn = sqlite3.connect(DB_PATH, timeout=10)
+        rows = conn.execute(
+            "SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
+        ).fetchall()
+        conn.close()
+        merged = 0
+        for r in rows:
+            uid, ts, cat, summary = r
+            text = ((cat or "") + " " + (summary or "")).strip()
+            if len(text) < 20:
+                continue
+            try:
+                extracted = extract_from_news(text, timestamp=ts)
+                if extracted and merge(extracted, db_path=DB_PATH):
+                    merged += 1
+            except Exception:
+                pass
+        return merged
+    except Exception:
+        return 0
+
+
+def fetch_news() -> None:
+    """执行完整写库流水线；产出看板实时数据（战损、据点、冲突事件）+ 事件脉络。GDELT 禁用时用 RSS 回填 gdelt_events。"""
+    try:
+        from pipeline import run_full_pipeline
+        LAST_FETCH["error"] = None
+        n_fetched, n_news, n_panel = run_full_pipeline(
+            db_path=DB_PATH,
+            api_base=API_BASE,
+            translate=True,
+            notify=False,
+        )
+        LAST_FETCH["items"] = n_fetched
+        LAST_FETCH["inserted"] = n_news
+        if GDELT_DISABLED:
+            _rss_to_gdelt_fallback()
+        _notify_node()
+        ts = datetime.now().strftime("%H:%M:%S")
+        print(f"[{ts}] 抓取 {n_fetched} 条，去重新增 {n_news} 条，写脉络 {n_panel} 条 → 面板实时数据（战损/据点）已由本批提取更新")
+        if n_fetched == 0:
+            print(f"[{ts}] （0 条：检查网络、RSS 源或 KEYWORDS 过滤）")
+    except Exception as e:
+        LAST_FETCH["error"] = str(e)
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}")
+
+
+# 每 N 轮做一次「从近期事件回填面板实时数据」，保证战损/据点等与最新内容一致
+BACKFILL_CYCLES = int(os.environ.get("BACKFILL_CYCLES", "2"))
+_cycle_count = 0
+
+# ==========================
+# 定时任务（asyncio 后台任务，避免 APScheduler executor 关闭竞态）
+# ==========================
+_bg_task: Optional[asyncio.Task] = None
+
+
+async def _periodic_fetch() -> None:
+    global _cycle_count
+    loop = asyncio.get_event_loop()
+    while True:
+        try:
+            await loop.run_in_executor(None, fetch_news)
+            await loop.run_in_executor(None, fetch_gdelt_events)
+            _cycle_count += 1
+            if _cycle_count >= BACKFILL_CYCLES:
+                _cycle_count = 0
+                merged = _refresh_panel_data()
+                if merged > 0:
+                    _notify_node()
+                    ts = datetime.now().strftime("%H:%M:%S")
+                    print(f"[{ts}] 面板实时数据回填：从近期事件合并 {merged} 条（战损/据点）")
+        except asyncio.CancelledError:
+            break
+        except Exception as e:
+            print(f"  [warn] 定时抓取: {e}")
+        await asyncio.sleep(min(RSS_INTERVAL_SEC, FETCH_INTERVAL_SEC))
+
+
+# ==========================
+# API 接口
+# ==========================
+@app.post("/crawler/backfill")
+def crawler_backfill():
+    """从 situation_update 重新解析并合并战损/报复等数据，用于修复历史数据未提取的情况"""
+    if not os.path.exists(DB_PATH):
+        return {"ok": False, "error": "db not found"}
+    try:
+        from db_merge import merge
+        use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
+        if use_dashscope:
+            from extractor_dashscope import extract_from_news
+        elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
+            from extractor_rules import extract_from_news
+        else:
+            from extractor_ai import extract_from_news
+        conn = sqlite3.connect(DB_PATH, timeout=10)
+        rows = conn.execute(
+            "SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
+        ).fetchall()
+        conn.close()
+        merged = 0
+        for r in rows:
+            uid, ts, cat, summary = r
+            text = ((cat or "") + " " + (summary or "")).strip()
+            if len(text) < 20:
+                continue
+            try:
+                extracted = extract_from_news(text, timestamp=ts)
+                if extracted and merge(extracted, db_path=DB_PATH):
+                    merged += 1
+            except Exception:
+                pass
+        _notify_node()
+        return {"ok": True, "processed": len(rows), "merged": merged}
+    except Exception as e:
+        return {"ok": False, "error": str(e)}
+
+
+@app.get("/crawler/status")
+def crawler_status():
+    """爬虫状态：用于排查数据更新链路"""
+    import os
+    db_ok = os.path.exists(DB_PATH)
+    total = 0
+    if db_ok:
+        try:
+            conn = sqlite3.connect(DB_PATH, timeout=3)
+            total = conn.execute("SELECT COUNT(*) FROM situation_update").fetchone()[0]
+            conn.close()
+        except Exception:
+            pass
+    return {
+        "db_path": DB_PATH,
+        "db_exists": db_ok,
+        "situation_update_count": total,
+        "last_fetch_items": LAST_FETCH.get("items", 0),
+        "last_fetch_inserted": LAST_FETCH.get("inserted", 0),
+        "last_fetch_error": LAST_FETCH.get("error"),
+    }
+
+
+@app.get("/events")
+def get_events():
+    return {
+        "updated_at": datetime.utcnow().isoformat(),
+        "count": len(EVENT_CACHE),
+        "events": EVENT_CACHE,
+        "conflict_stats": _get_conflict_stats(),
+    }
+
+
+def _get_conflict_stats() -> dict:
+    if not os.path.exists(DB_PATH):
+        return {"total_events": 0, "high_impact_events": 0, "estimated_casualties": 0, "estimated_strike_count": 0}
+    try:
+        conn = sqlite3.connect(DB_PATH, timeout=5)
+        row = conn.execute("SELECT total_events, high_impact_events, estimated_casualties, estimated_strike_count FROM conflict_stats WHERE id = 1").fetchone()
+        conn.close()
+        if row:
+            return {
+                "total_events": row[0],
+                "high_impact_events": row[1],
+                "estimated_casualties": row[2],
+                "estimated_strike_count": row[3],
+            }
+    except Exception:
+        pass
+    return {"total_events": 0, "high_impact_events": 0, "estimated_casualties": 0, "estimated_strike_count": 0}
+
+
+@app.on_event("startup")
+async def startup():
+    """仅启动后台定时任务，不阻塞首次抓取，避免启动超时（验证脚本 /crawler/status 可尽快就绪）"""
+    global _bg_task
+    _bg_task = asyncio.create_task(_periodic_fetch())
+
+
+@app.on_event("shutdown")
+async def shutdown():
+    global _bg_task
+    if _bg_task and not _bg_task.done():
+        _bg_task.cancel()
+        try:
+            await _bg_task
+        except asyncio.CancelledError:
+            pass
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/crawler/requirements.txt
+++ b/crawler/requirements.txt
@@ -0,0 +1,10 @@
+# Python 3.11+ 爬虫依赖（使用当前最新兼容版本）
+# 安装: pip install -r crawler/requirements.txt
+requests>=2.32.0
+feedparser>=6.0.10
+beautifulsoup4>=4.12.0
+pytest>=8.0.0
+fastapi>=0.115.0
+uvicorn[standard]>=0.32.0
+deep-translator>=1.11.0
+dashscope>=1.20.0
--- a/crawler/run_once.py
+++ b/crawler/run_once.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+单独运行爬虫一轮：抓取 → 清洗 → 去重 → 写库 → 通知 Node（可选）
+终端直接输出抓取条数及内容摘要，便于排查。
+用法（项目根或 crawler 目录）:
+  python run_once.py
+  python -c "import run_once; run_once.main()"
+或: npm run crawler:once
+"""
+import os
+import sys
+from datetime import datetime
+
+# 保证可导入同目录模块
+if __name__ == "__main__":
+    sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+def main():
+    from config import DB_PATH, API_BASE
+    from pipeline import run_full_pipeline
+
+    crawl_start = os.environ.get("CRAWL_START_DATE", "").strip()
+    print("========================================")
+    print("爬虫单次运行（RSS → 清洗 → 去重 → 写库）")
+    print("DB:", DB_PATH)
+    print("API_BASE:", API_BASE)
+    if crawl_start:
+        print("时间范围: 仅保留 CRAWL_START_DATE 之后:", crawl_start)
+    print("========================================\n")
+
+    n_fetched, n_news, n_panel = run_full_pipeline(
+        db_path=DB_PATH,
+        api_base=API_BASE,
+        translate=True,
+        notify=True,
+    )
+
+    print("")
+    print("----------------------------------------")
+    print("本轮结果:")
+    print(f"  抓取: {n_fetched} 条")
+    print(f"  去重后新增资讯: {n_news} 条")
+    print(f"  写入事件脉络: {n_panel} 条")
+    if n_fetched == 0:
+        print("  （0 条：检查网络、RSS 源或 config.KEYWORDS 过滤）")
+    print("----------------------------------------")
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/crawler/run_uvicorn.sh
+++ b/crawler/run_uvicorn.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+# PM2 用：在 crawler 目录下启动 uvicorn（GDELT/RSS 实时服务 :8000）
+set -e
+cd "$(dirname "$0")"
+[ -n "$LANG" ] || export LANG="${LANG:-en_US.UTF-8}"
+[ -n "$LC_ALL" ] || export LC_ALL="${LC_ALL:-en_US.UTF-8}"
+# 若项目根目录有 .env，可在此加载（PM2 一般已在 ecosystem 里配 env）
+if [ -f ../.env ]; then set -a; . ../.env; set +a; fi
+exec python3 -m uvicorn realtime_conflict_service:app --host 0.0.0.0 --port 8000
--- a/crawler/scrapers/init.py
+++ b/crawler/scrapers/init.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
--- a/crawler/scrapers/pycache/init.cpython-311.pyc
+++ b/crawler/scrapers/pycache/init.cpython-311.pyc
--- a/crawler/scrapers/pycache/init.cpython-39.pyc
+++ b/crawler/scrapers/pycache/init.cpython-39.pyc
--- a/crawler/scrapers/pycache/rss_scraper.cpython-311.pyc
+++ b/crawler/scrapers/pycache/rss_scraper.cpython-311.pyc
--- a/crawler/scrapers/pycache/rss_scraper.cpython-39.pyc
+++ b/crawler/scrapers/pycache/rss_scraper.cpython-39.pyc
--- a/crawler/scrapers/rss_scraper.py
+++ b/crawler/scrapers/rss_scraper.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+"""RSS 抓取：按源独立超时与错误隔离，单源失败不影响其他源"""
+import re
+import socket
+from datetime import datetime, timezone
+from typing import List, Set, Tuple
+
+import feedparser
+
+from config import KEYWORDS, FEED_TIMEOUT, get_feed_sources
+from parser_ai import classify_and_severity
+
+
+def _parse_date(entry) -> datetime:
+    for attr in ("published_parsed", "updated_parsed"):
+        val = getattr(entry, attr, None)
+        if val:
+            try:
+                return datetime(*val[:6], tzinfo=timezone.utc)
+            except (TypeError, ValueError):
+                pass
+    return datetime.now(timezone.utc)
+
+
+def _strip_html(s: str) -> str:
+    return re.sub(r"<[^>]+>", "", s) if s else ""
+
+
+def _matches_keywords(text: str) -> bool:
+    t = (text or "").lower()
+    for k in KEYWORDS:
+        if k.lower() in t:
+            return True
+    return False
+
+
+def _fetch_one_feed(name: str, url: str, timeout: int) -> List[dict]:
+    """抓取单个 RSS 源，超时或异常返回空列表。不负责去重。"""
+    old_timeout = socket.getdefaulttimeout()
+    socket.setdefaulttimeout(timeout)
+    try:
+        feed = feedparser.parse(
+            url,
+            request_headers={"User-Agent": "US-Iran-Dashboard/1.0"},
+            agent="US-Iran-Dashboard/1.0",
+        )
+    except Exception as e:
+        print(f"  [rss] {name} error: {e}")
+        return []
+    finally:
+        socket.setdefaulttimeout(old_timeout)
+
+    out = []
+    for entry in feed.entries:
+        title = getattr(entry, "title", "") or ""
+        raw_summary = getattr(entry, "summary", "") or getattr(entry, "description", "") or ""
+        summary = _strip_html(raw_summary)
+        link = getattr(entry, "link", "") or ""
+        text = f"{title} {summary}"
+        if not _matches_keywords(text):
+            continue
+        published = _parse_date(entry)
+        cat, sev = classify_and_severity(text)
+        out.append({
+            "title": title,
+            "summary": summary[:400] if summary else title,
+            "url": link,
+            "published": published,
+            "category": cat,
+            "severity": sev,
+            "source": name,
+        })
+    return out
+
+
+def fetch_all() -> List[dict]:
+    """抓取所有配置的 RSS 源，按源超时与隔离错误，全局去重后返回。"""
+    sources = get_feed_sources()
+    if not sources:
+        return []
+
+    items: List[dict] = []
+    seen: Set[Tuple[str, str]] = set()
+
+    for name, url in sources:
+        batch = _fetch_one_feed(name, url, FEED_TIMEOUT)
+        for item in batch:
+            key = (item["title"][:80], item["url"])
+            if key in seen:
+                continue
+            seen.add(key)
+            items.append(item)
+
+    return items
--- a/crawler/tests/init.py
+++ b/crawler/tests/init.py
@@ -0,0 +1 @@
+# crawler tests
--- a/crawler/tests/pycache/init.cpython-39.pyc
+++ b/crawler/tests/pycache/init.cpython-39.pyc
--- a/crawler/tests/pycache/test_extraction.cpython-39-pytest-8.4.2.pyc
+++ b/crawler/tests/pycache/test_extraction.cpython-39-pytest-8.4.2.pyc
--- a/crawler/tests/test_extraction.py
+++ b/crawler/tests/test_extraction.py
@@ -0,0 +1,198 @@
+# -*- coding: utf-8 -*-
+"""
+爬虫数据清洗与字段映射测试
+验证 extractor_rules、extractor_dashscope、db_merge 的正确性
+"""
+import os
+import sqlite3
+import tempfile
+from pathlib import Path
+
+import pytest
+
+# 确保 crawler 在 path 中
+ROOT = Path(__file__).resolve().parent.parent
+if str(ROOT) not in __import__("sys").path:
+    __import__("sys").path.insert(0, str(ROOT))
+
+from extractor_rules import extract_from_news as extract_rules
+
+
+class TestExtractorRules:
+    """规则提取器单元测试"""
+
+    def test_trump_1000_targets_no_bases(self):
+        """特朗普说伊朗有1000个军事目标遭到袭击 -> 不应提取 bases_destroyed/bases_damaged"""
+        text = "特朗普说伊朗有1000个军事目标遭到袭击，美国已做好进一步打击准备"
+        out = extract_rules(text)
+        delta = out.get("combat_losses_delta", {})
+        for side in ("us", "iran"):
+            if side in delta:
+                assert delta[side].get("bases_destroyed") is None, f"{side} bases_destroyed 不应被提取"
+                assert delta[side].get("bases_damaged") is None, f"{side} bases_damaged 不应被提取"
+
+    def test_base_damaged_when_explicit(self):
+        """阿萨德基地遭袭 -> 应提取 key_location_updates，且 combat_losses 若有则正确"""
+        text = "阿萨德空军基地遭袭，损失严重"
+        out = extract_rules(text)
+        # 规则会触发 key_location_updates（因为 base_attacked 且匹配 阿萨德）
+        assert "key_location_updates" in out
+        kl = out["key_location_updates"]
+        assert len(kl) >= 1
+        assert any(u.get("side") == "us" and "阿萨德" in (u.get("name_keywords") or "") for u in kl)
+
+    def test_us_personnel_killed(self):
+        """3名美军阵亡 -> personnel_killed=3"""
+        text = "据报道，3名美军阵亡，另有5人受伤"
+        out = extract_rules(text)
+        assert "combat_losses_delta" in out
+        us = out["combat_losses_delta"].get("us", {})
+        assert us.get("personnel_killed") == 3
+        assert us.get("personnel_wounded") == 5
+
+    def test_iran_personnel_killed(self):
+        """10名伊朗士兵死亡"""
+        text = "伊朗方面称10名伊朗士兵死亡"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("personnel_killed") == 10
+
+    def test_civilian_us_context(self):
+        """美军空袭造成50名平民伤亡 -> loss_us"""
+        text = "美军空袭造成50名平民伤亡"
+        out = extract_rules(text)
+        us = out.get("combat_losses_delta", {}).get("us", {})
+        assert us.get("civilian_killed") == 50
+
+    def test_civilian_iran_context(self):
+        """伊朗空袭造成伊拉克平民50人伤亡 -> loss_ir"""
+        text = "伊朗空袭造成伊拉克平民50人伤亡"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("civilian_killed") == 50
+
+    def test_drone_attribution_iran(self):
+        """美军击落伊朗10架无人机 -> iran drones=10"""
+        text = "美军击落伊朗10架无人机"
+        out = extract_rules(text)
+        iran = out.get("combat_losses_delta", {}).get("iran", {})
+        assert iran.get("drones") == 10
+
+    def test_empty_or_short_text(self):
+        """短文本或无内容 -> 无 combat_losses"""
+        assert extract_rules("") == {} or "combat_losses_delta" not in extract_rules("")
+        assert "combat_losses_delta" not in extract_rules("abc") or not extract_rules("abc").get("combat_losses_delta")
+
+
+class TestDbMerge:
+    """db_merge 字段映射与增量逻辑测试"""
+
+    @pytest.fixture
+    def temp_db(self):
+        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+            path = f.name
+        yield path
+        try:
+            os.unlink(path)
+        except OSError:
+            pass
+
+    def test_merge_combat_losses_delta(self, temp_db):
+        """merge 正确将 combat_losses_delta 叠加到 DB"""
+        from db_merge import merge
+
+        merge({"combat_losses_delta": {"us": {"personnel_killed": 3, "personnel_wounded": 2}}}, db_path=temp_db)
+        merge({"combat_losses_delta": {"us": {"personnel_killed": 2}}}, db_path=temp_db)
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute("SELECT personnel_killed, personnel_wounded FROM combat_losses WHERE side='us'").fetchone()
+        conn.close()
+        assert row[0] == 5
+        assert row[1] == 2
+
+    def test_merge_all_combat_fields(self, temp_db):
+        """merge 正确映射所有 combat_losses 字段"""
+        from db_merge import merge
+
+        delta = {
+            "personnel_killed": 1,
+            "personnel_wounded": 2,
+            "civilian_killed": 3,
+            "civilian_wounded": 4,
+            "bases_destroyed": 1,
+            "bases_damaged": 2,
+            "aircraft": 3,
+            "warships": 4,
+            "armor": 5,
+            "vehicles": 6,
+            "drones": 7,
+            "missiles": 8,
+            "helicopters": 9,
+            "submarines": 10,
+        }
+        merge({"combat_losses_delta": {"iran": delta}}, db_path=temp_db)
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute(
+            """SELECT personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
+               bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles,
+               drones, missiles, helicopters, submarines FROM combat_losses WHERE side='iran'"""
+        ).fetchone()
+        conn.close()
+        assert row == (1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+
+    def test_merge_key_location_requires_table(self, temp_db):
+        """key_location_updates 需要 key_location 表中有行才能更新"""
+        from db_merge import merge
+
+        conn = sqlite3.connect(temp_db)
+        conn.execute(
+            """CREATE TABLE IF NOT EXISTS key_location (id INTEGER PRIMARY KEY, side TEXT, name TEXT, lat REAL, lng REAL, type TEXT, region TEXT, status TEXT, damage_level INTEGER)"""
+        )
+        conn.execute(
+            "INSERT INTO key_location (side, name, lat, lng, type, region, status, damage_level) VALUES ('us', '阿萨德空军基地', 33.0, 43.0, 'Base', 'IRQ', 'operational', 0)"
+        )
+        conn.commit()
+        conn.close()
+
+        merge(
+            {"key_location_updates": [{"name_keywords": "阿萨德|asad", "side": "us", "status": "attacked", "damage_level": 2}]},
+            db_path=temp_db,
+        )
+
+        conn = sqlite3.connect(temp_db)
+        row = conn.execute("SELECT status, damage_level FROM key_location WHERE name LIKE '%阿萨德%'").fetchone()
+        conn.close()
+        assert row[0] == "attacked"
+        assert row[1] == 2
+
+
+class TestEndToEndTrumpExample:
+    """端到端：特朗普 1000 军事目标案例"""
+
+    def test_full_pipeline_trump_no_bases(self, tmp_path):
+        """完整流程：规则提取 + merge，特朗普案例不应增加 bases"""
+        from db_merge import merge
+
+        db_path = str(tmp_path / "test.db")
+        (tmp_path / "test.db").touch()  # 确保文件存在，merge 才会执行
+        merge({"combat_losses_delta": {"us": {"bases_destroyed": 0, "bases_damaged": 0}, "iran": {"bases_destroyed": 0, "bases_damaged": 0}}}, db_path=db_path)
+
+        text = "特朗普说伊朗有1000个军事目标遭到袭击"
+        out = extract_rules(text)
+        # 规则提取不应包含 bases
+        assert "combat_losses_delta" not in out or (
+            "iran" not in out.get("combat_losses_delta", {})
+            or out["combat_losses_delta"].get("iran", {}).get("bases_destroyed") is None
+            and out["combat_losses_delta"].get("iran", {}).get("bases_damaged") is None
+        )
+        if "combat_losses_delta" in out:
+            merge(out, db_path=db_path)
+
+        conn = sqlite3.connect(db_path)
+        iran = conn.execute("SELECT bases_destroyed, bases_damaged FROM combat_losses WHERE side='iran'").fetchone()
+        conn.close()
+        # 若提取器没输出 bases，merge 不会改；若有错误输出则需要为 0
+        if iran:
+            assert iran[0] == 0
+            assert iran[1] == 0
--- a/crawler/translate_utils.py
+++ b/crawler/translate_utils.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""英译中，入库前统一翻译"""
+import os
+import re
+from typing import Optional
+
+
+def _is_mostly_chinese(text: str) -> bool:
+    if not text or len(text.strip()) < 2:
+        return False
+    chinese = len(re.findall(r"[\u4e00-\u9fff]", text))
+    return chinese / max(len(text), 1) > 0.3
+
+
+def translate_to_chinese(text: str) -> str:
+    """将文本翻译成中文，失败或已是中文则返回原文。
+
+    说明：
+    - 默认关闭外部翻译（deep_translator），直接返回原文，避免因网络或代理问题阻塞整条流水线。
+    - 如需开启翻译，可显式设置环境变量 TRANSLATE_DISABLED=0。
+    """
+    if not text or not text.strip():
+        return text
+    # 默认禁用翻译：TRANSLATE_DISABLED 未设置时视为开启（值为 "1"）
+    if os.environ.get("TRANSLATE_DISABLED", "1") == "1":
+        return text
+    s = str(text).strip()
+    if len(s) > 2000:
+        s = s[:2000]
+    if _is_mostly_chinese(s):
+        return text
+    for translator in ["google", "mymemory"]:
+        try:
+            if translator == "google":
+                from deep_translator import GoogleTranslator
+                out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
+            else:
+                from deep_translator import MyMemoryTranslator
+                out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s)
+            if out and out.strip() and out != s:
+                return out
+        except Exception:
+            continue
+    return text
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -0,0 +1,13 @@
+# 开发模式：挂载源码 + 热重载，代码更新后无需重新 build
+# 使用: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+# 或: docker compose --profile dev up -d (需在 dev 服务加 profiles)
+services:
+  api:
+    volumes:
+      - ./server:/app/server:ro
+    command: ["node", "--watch", "server/index.js"]
+
+  crawler:
+    volumes:
+      - ./crawler:/app
+    command: ["uvicorn", "realtime_conflict_service:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,37 @@
+services:
+  api:
+    image: usa-dashboard-api:latest
+    build:
+      context: .
+      args:
+        - VITE_MAPBOX_ACCESS_TOKEN=${VITE_MAPBOX_ACCESS_TOKEN:-}
+    ports:
+      - "3001:3001"
+    environment:
+      - DB_PATH=/data/data.db
+      - API_PORT=3001
+    volumes:
+      - app-data:/data
+    restart: unless-stopped
+
+  crawler:
+    image: usa-dashboard-crawler:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.crawler
+    ports:
+      - "8000:8000"
+    environment:
+      - DB_PATH=/data/data.db
+      - API_BASE=http://api:3001
+      - GDELT_DISABLED=1
+      - RSS_INTERVAL_SEC=60
+      - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY:-}
+    volumes:
+      - app-data:/data
+    depends_on:
+      - api
+    restart: unless-stopped
+
+volumes:
+  app-data:
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -e
+export DB_PATH="${DB_PATH:-/data/data.db}"
+if [ ! -f "$DB_PATH" ]; then
+  echo "==> Seeding database..."
+  node server/seed.js
+fi
+exec node server/index.js
--- a/docs/BACKEND_MODULES.md
+++ b/docs/BACKEND_MODULES.md
@@ -0,0 +1,91 @@
+# 后端模块说明
+
+## 一、现有模块结构
+
+```
+server/
+├── index.js         # HTTP + WebSocket 入口
+├── routes.js        # REST API 路由
+├── db.js            # SQLite schema 与连接
+├── situationData.js # 态势数据聚合 (从 DB 读取)
+├── seed.js          # 初始数据填充
+├── data.db          # SQLite 数据库
+└── package.json
+
+crawler/
+├── realtime_conflict_service.py  # GDELT 实时冲突服务 (核心)
+├── requirements.txt
+├── config.py, db_writer.py       # 旧 RSS 爬虫（可保留）
+├── main.py
+└── README.md
+```
+
+### 1. server/index.js
+- Express + CORS
+- WebSocket (`/ws`)，每 5 秒广播 `situation`
+- `POST /api/crawler/notify`：爬虫写入后触发立即广播
+
+### 2. server/routes.js
+- `GET /api/situation`：完整态势
+- `GET /api/events`：GDELT 事件 + 冲突统计
+- `GET /api/health`：健康检查
+
+### 3. server/db.js
+- 表：`situation`、`force_summary`、`power_index`、`force_asset`、
+  `key_location`、`combat_losses`、`wall_street_trend`、
+  `retaliation_current`、`retaliation_history`、`situation_update`、
+  **`gdelt_events`**、**`conflict_stats`**
+
+---
+
+## 二、GDELT 核心数据源
+
+**GDELT Project**：全球冲突数据库，约 15 分钟级更新，含经纬度、事件编码、参与方、事件强度。
+
+### realtime_conflict_service.py
+
+- 定时（默认 60 秒）从 GDELT API 抓取
+- 冲突强度评分：missile +3, strike +2, killed +4 等
+- 无经纬度时默认攻击源：`IRAN_COORD = [51.3890, 35.6892]`
+- 写入 `gdelt_events`、`conflict_stats`
+- 调用 `POST /api/crawler/notify` 触发 Node 广播
+
+### 冲突强度 → 地图效果
+
+| impact_score | 效果       |
+|--------------|------------|
+| 1–3          | 绿色点     |
+| 4–6          | 橙色闪烁   |
+| 7–10         | 红色脉冲扩散 |
+
+### 战损统计模型（展示用）
+
+- `total_events`
+- `high_impact_events` (impact ≥ 7)
+- `estimated_casualties`
+- `estimated_strike_count`
+
+---
+
+## 三、数据流
+
+```
+GDELT API → Python 服务(60s) → gdelt_events, conflict_stats
+                                    ↓
+              POST /api/crawler/notify → situation.updated_at
+                                    ↓
+              WebSocket 广播 getSituation() → 前端
+```
+
+---
+
+## 四、运行方式
+
+```bash
+# 1. 启动 Node API
+npm run api
+
+# 2. 启动 GDELT 服务
+npm run gdelt
+# 或: cd crawler && uvicorn realtime_conflict_service:app --port 8000
+```
--- a/docs/CRAWLER_LOGIC.md
+++ b/docs/CRAWLER_LOGIC.md
@@ -0,0 +1,137 @@
+# 爬虫逻辑梳理与数据校验
+
+## 一、两条入口，数据流不同
+
+### 1. 入口 A：`npm run crawler`（main.py）
+
+- **流程**：RSS 抓取 → 关键词过滤 → 分类/严重度 → **直接写 situation_update** → 通知 API
+- **不经过**：翻译、news_content、AI 提取（战损/基地等）
+- **写入表**：`situation_update`、`situation.updated_at`
+- **用途**：轻量、只给「事件脉络」喂新条目，不更新战损/基地/报复指数
+
+```
+RSS_FEEDS → fetch_all() → KEYWORDS 过滤 → parser_ai.classify_and_severity
+    → write_updates(items) → situation_update INSERT + situation 表 touch
+    → notify_api()
+```
+
+### 2. 入口 B：`npm run gdelt`（realtime_conflict_service.py）
+
+- **流程**：RSS 抓取 → 翻译 → 清洗 → **news_content 去重** → situation_update → **AI 提取 → db_merge** → GDELT 事件（可选）→ 通知 API
+- **写入表**：`news_content`、`situation_update`、`situation`；提取后还有 `combat_losses`、`key_location`、`retaliation_*`、`wall_street_trend` 等
+- **用途**：完整管线，前端「战损 / 军事基地 / 报复 / 美股」等数据都依赖这条
+
+```
+RSS → fetch_all() → translate_to_chinese → cleaner_ai → save_and_dedup → news_content
+    → write_updates(new_items) → situation_update
+    → _extract_and_merge_panel_data(new_items) → extract_from_news() → db_merge.merge()
+    → (可选) fetch_gdelt_events() → gdelt_events, conflict_stats
+    → _notify_node()
+```
+
+**结论**：要检查「抓回的数据是否有效」且包含战损/基地等，应跑 **入口 B**（gdelt 服务）；若只关心事件脉络条数，可看入口 A。
+
+---
+
+## 二、入口 B 逐步拆解（用于逐段校验）
+
+### 2.1 RSS 抓取与过滤
+
+| 步骤 | 位置 | 说明 |
+|------|------|------|
+| 源列表 | `config.RSS_FEEDS` | 多国媒体 RSS，见 config.py |
+| 抓取 | `scrapers.rss_scraper.fetch_all()` | feedparser，单源超时 10s |
+| 过滤 | `_matches_keywords(text)` | 标题+摘要 至少命中 `config.KEYWORDS` 中一个才保留 |
+| 去重 | `(title[:80], link)` | 同一条不重复加入当次列表 |
+| 分类 | `parser_ai.classify_and_severity(text)` | 得到 category、severity（Ollama 或规则） |
+
+**校验**：`npm run crawler:test` 看本次抓到的条数；若为 0，查网络或放宽/检查 KEYWORDS。
+
+### 2.2 翻译与清洗（仅入口 B）
+
+| 步骤 | 位置 | 说明 |
+|------|------|------|
+| 翻译 | `translate_utils.translate_to_chinese()` | 标题/摘要译成中文（依赖配置） |
+| 清洗 | `cleaner_ai.clean_news_for_panel()` | 截断、清理；`ensure_category` / `ensure_severity` 合法化 |
+
+### 2.3 落库：news_content（去重）与 situation_update
+
+| 步骤 | 位置 | 说明 |
+|------|------|------|
+| 去重 | `news_storage.save_and_dedup(items)` | 按 `content_hash(title, summary, url)` 判重，只插入新记录 |
+| 表 | `news_content` | id, content_hash, title, summary, url, source, published_at, category, severity |
+| 表 | `situation_update` | 仅对 **去重后的 new_items** 调用 `write_updates()`，供前端「事件脉络」 |
+
+**校验**：
+
+- `news_content`：`SELECT COUNT(*), MAX(published_at) FROM news_content`
+- `situation_update`：`SELECT COUNT(*), MAX(timestamp) FROM situation_update`
+- 服务状态：`GET http://localhost:8000/crawler/status` 看 `last_fetch_items` / `last_fetch_inserted` / `last_fetch_error`
+
+### 2.4 AI 提取与 db_merge（战损 / 基地 / 报复等）
+
+| 步骤 | 位置 | 说明 |
+|------|------|------|
+| 输入 | `_extract_and_merge_panel_data(new_items)` | 仅处理本次 **新增** 的 new_items，前 limit 条（DashScope 10 条，规则 25 条，Ollama 10 条） |
+| 文本 | 每条 `title + " " + summary`，长度 &lt; 20 跳过 |
+| 提取器选择 | 环境变量 | `DASHSCOPE_API_KEY` → extractor_dashscope；`CLEANER_AI_DISABLED=1` → extractor_rules；否则 extractor_ai（Ollama） |
+| 输出结构 | 见 panel_schema / 各 extractor | `situation_update?`, `combat_losses_delta?`, `retaliation?`, `wall_street?`, `key_location_updates?` |
+| 合并 | `db_merge.merge(extracted)` | 见下表 |
+
+**merge 映射概要**：
+
+| 提取字段 | 写入表/逻辑 |
+|----------|-------------|
+| situation_update | situation_update 表 INSERT（id 为 hash） |
+| combat_losses_delta | combat_losses 表，按 side 增量叠加 |
+| retaliation | retaliation_current 替换 + retaliation_history 追加 |
+| wall_street | wall_street_trend 表 INSERT |
+| key_location_updates | key_location 表 UPDATE status/damage_level（name LIKE 关键词） |
+
+**校验**：
+
+- 战损：`SELECT * FROM combat_losses`
+- 基地：`SELECT id, name, side, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level > 0`
+- 报复：`SELECT * FROM retaliation_current` 与 `retaliation_history` 最近几条
+- 事件脉络：`SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 20`
+
+### 2.5 GDELT（可选）
+
+- `GDELT_DISABLED=1` 时跳过 GDELT，仅用 RSS；可用 `_rss_to_gdelt_fallback()` 用 RSS 标题生成 gdelt_events。
+- 未禁用时：`fetch_gdelt_events()` 拉 GDELT → 写 `gdelt_events`、`conflict_stats`。
+
+**校验**：`SELECT COUNT(*), MAX(event_time) FROM gdelt_events`；`SELECT * FROM conflict_stats WHERE id=1`。
+
+---
+
+## 三、如何检查「抓回的数据是否有效」
+
+1. **确认跑的入口**  
+   - 只跑 `npm run crawler`：只有 situation_update 会有新数据，战损/基地不会变。  
+   - 跑 `npm run gdelt` 且服务常驻：才会既有 situation_update，又有 combat_losses、key_location 等。
+
+2. **看 DB 与 API**  
+   - 同上：查 `news_content`、`situation_update`、`combat_losses`、`key_location`、`retaliation_*`、`gdelt_events`、`conflict_stats`。  
+   - 前端数据来源：`GET /api/situation`（见 server/situationData.js），对照上述表即可。
+
+3. **看提取是否触发**  
+   - 若 `combat_losses` / `key_location` 一直不更新：确认是入口 B、有 new_items、提取器未报错；可对单条新闻跑 `extract_from_news(text)` 看是否产出 combat_losses_delta / key_location_updates。
+
+4. **重跑历史提取（补数据）**  
+   - `POST http://localhost:8000/crawler/backfill`：用当前 situation_update 最近 50 条重新做一次提取并 merge，可用来修历史未提取的数据。
+
+---
+
+## 四、配置与环境变量（与数据有效性相关）
+
+| 变量 | 作用 |
+|------|------|
+| DB_PATH | 与 server 共用的 SQLite 路径，必须一致 |
+| API_BASE | 通知 Node 的地址，merge 后通知前端 |
+| DASHSCOPE_API_KEY | 有则用 DashScope 提取；无则用 Ollama 或规则 |
+| CLEANER_AI_DISABLED=1 | 用规则提取（extractor_rules），不用 Ollama |
+| GDELT_DISABLED=1 | 不用 GDELT，仅 RSS；RSS 可转 gdelt_events 占位 |
+| CRAWL_INTERVAL | main.py 抓取间隔（秒） |
+| RSS_INTERVAL_SEC / FETCH_INTERVAL_SEC | realtime 服务里 RSS / GDELT 间隔 |
+
+按上述顺序对照「入口 → RSS → 去重 → situation_update → 提取 → merge → 表」即可逐段检查爬虫抓回的数据是否有效。
--- a/docs/CRAWLER_PIPELINE.md
+++ b/docs/CRAWLER_PIPELINE.md
@@ -0,0 +1,65 @@
+# 爬虫数据流水线
+
+## 数据流
+
+```
+RSS 抓取
+    ↓ 翻译、清洗
+    ↓ news_storage.save_and_dedup() → 历史去重
+    ↓
+news_content（资讯独立表，供后续消费）
+    ↓
+    ↓ 去重后的新数据
+    ↓
+situation_update（面板展示用）
+    ↓
+    ↓ AI 提取（阿里云 DashScope）
+    ↓
+combat_losses / retaliation / key_location / wall_street_trend
+    ↓
+    ↓ notify Node
+    ↓
+前端 WebSocket + 轮询
+```
+
+## 阿里云 DashScope API Key
+
+设置环境变量 `DASHSCOPE_API_KEY` 后，爬虫使用阿里云通义千问进行 AI 提取。不设置时回退到规则提取（`extractor_rules`）或 Ollama（若可用）。
+
+```bash
+# 本地
+export DASHSCOPE_API_KEY=sk-xxx
+
+# Docker
+docker compose up -d -e DASHSCOPE_API_KEY=sk-xxx
+# 或在 .env 中写入 DASHSCOPE_API_KEY=sk-xxx
+```
+
+## 表说明
+
+| 表 | 用途 |
+|----|------|
+| `news_content` | 资讯原文，独立存储，支持去重（content_hash），供后续消费 |
+| `situation_update` | 面板「近期更新」展示 |
+| `combat_losses` | 战损数据（AI/规则提取） |
+| `key_location` | 基地状态 |
+| `gdelt_events` | 地图冲突点 |
+
+## 去重逻辑
+
+根据 `content_hash = sha256(normalize(title) + normalize(summary) + url)` 判断，相同或高度相似内容视为重复，不入库。
+
+## 消费资讯
+
+- HTTP: `GET /api/news?limit=50`
+- 调试: `/db` 面板查看 `news_content` 表
+
+## 链路验证
+
+运行脚本一键检查全链路：
+
+```bash
+./scripts/verify-pipeline.sh
+```
+
+支持环境变量覆盖：`API_URL`、`CRAWLER_URL`
--- a/docs/DATA_FLOW.md
+++ b/docs/DATA_FLOW.md
@@ -0,0 +1,62 @@
+# 前端数据更新链路与字段映射
+
+## 1. 前端数据点
+
+| 组件 | 数据 | API 字段 | DB 表/列 |
+|------|------|----------|----------|
+| HeaderPanel | lastUpdated | situation.lastUpdated | situation.updated_at |
+| HeaderPanel | powerIndex | usForces/iranForces.powerIndex | power_index |
+| HeaderPanel | feedbackCount, shareCount | POST /api/feedback, /api/share | feedback, share_count |
+| TimelinePanel | recentUpdates | situation.recentUpdates | situation_update |
+| WarMap | keyLocations | usForces/iranForces.keyLocations | key_location |
+| BaseStatusPanel | 基地统计 | keyLocations (status, damage_level) | key_location |
+| CombatLossesPanel | 人员/平民伤亡 | combatLosses, civilianCasualtiesTotal | combat_losses |
+| CombatLossesOtherPanel | 装备毁伤 | combatLosses (bases, aircraft, drones, …) | combat_losses |
+| PowerChart | 雷达图 | powerIndex | power_index |
+| WallStreetTrend | 美股趋势 | wallStreetInvestmentTrend | wall_street_trend |
+| RetaliationGauge | 报复指数 | retaliationSentiment | retaliation_current/history |
+
+**轮询**: `fetchSituation()` 加载，WebSocket `/ws` 每 3 秒广播。`GET /api/situation` → `getSituation()`。
+
+## 2. 爬虫 → DB 字段映射
+
+| 提取器输出 | DB 表 | 逻辑 |
+|------------|-------|------|
+| situation_update | situation_update | INSERT |
+| combat_losses_delta | combat_losses | 增量叠加 (ADD) |
+| retaliation | retaliation_current, retaliation_history | REPLACE / APPEND |
+| wall_street | wall_street_trend | INSERT |
+| key_location_updates | key_location | UPDATE status, damage_level WHERE name LIKE |
+
+### combat_losses 字段对应
+
+| 提取器 (us/iran) | DB 列 |
+|------------------|-------|
+| personnel_killed | personnel_killed |
+| personnel_wounded | personnel_wounded |
+| civilian_killed | civilian_killed |
+| civilian_wounded | civilian_wounded |
+| bases_destroyed | bases_destroyed |
+| bases_damaged | bases_damaged |
+| aircraft, warships, armor, vehicles | 同名 |
+| drones, missiles, helicopters, submarines | 同名 |
+
+## 3. 测试用例
+
+运行: `npm run crawler:test:extraction`
+
+| 用例 | 输入 | 预期 |
+|------|------|------|
+| 特朗普 1000 军事目标 | "特朗普说伊朗有1000个军事目标遭到袭击" | 不提取 bases_destroyed/bases_damaged |
+| 阿萨德基地遭袭 | "阿萨德空军基地遭袭，损失严重" | 输出 key_location_updates |
+| 美军伤亡 | "3名美军阵亡，另有5人受伤" | personnel_killed=3, personnel_wounded=5 |
+| 伊朗平民 | "伊朗空袭造成伊拉克平民50人伤亡" | iran.civilian_killed=50 |
+| 伊朗无人机 | "美军击落伊朗10架无人机" | iran.drones=10 |
+| db_merge 增量 | 两次 merge 3+2 | personnel_killed=5 |
+
+## 4. 注意事项
+
+- **bases_***: 仅指已确认损毁/受损的基地；"军事目标"/targets 不填 bases_*。
+- **正则 [\s\w]***: 会匹配数字，导致 (\d+) 只捕获末位；数字前用 `[^\d]*`。
+- **伊朗平民**: 规则已支持 "伊朗空袭造成…平民" 归入 loss_ir。
+- **key_location**: 需 name LIKE '%keyword%' 匹配，关键词见 extractor_rules.bases_all。
--- a/docs/DEBUG_PANELS.md
+++ b/docs/DEBUG_PANELS.md
@@ -0,0 +1,269 @@
+# 看板板块逐项调试指南
+
+本文档按前端每个板块列出：**数据来源表**、**谁写入**、**如何验证**、**常见问题**，便于逐项排查。
+
+---
+
+## 数据流总览
+
+```
+前端 Dashboard
+  → useReplaySituation() → situation (来自 WebSocket / GET /api/situation)
+  → getSituation() 读 server/situationData.js
+  → 从 SQLite (server/data.db) 多表 SELECT 后拼成 JSON
+```
+
+- **写入方**：`server/seed.js`（初始化）、爬虫流水线（`crawler/pipeline.py` + `db_merge.py`）、GDELT 服务（`gdelt_events` / `conflict_stats`）。
+- **读入方**：仅 `server/situationData.js` 的 `getSituation()`，被 `/api/situation` 与 WebSocket 广播使用。
+
+---
+
+## 1. 顶栏 (HeaderPanel)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 最后更新时间 | `situation.lastUpdated` | `situation.updated_at`（表 `situation` id=1） | 爬虫 notify 时更新 |
+| 在看/看过 | `stats.viewers` / `stats.cumulative` | `visits` / `visitor_count`，见 `POST /api/visit` | 与爬虫无关 |
+| 美/伊战力条 | `usForces.powerIndex.overall` / `iranForces.powerIndex.overall` | `power_index` 表 | **仅 seed** |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.lastUpdated, .usForces.powerIndex.overall, .iranForces.powerIndex.overall'`
+- 看板顶栏是否显示时间、双战力数值。
+
+**常见问题**
+
+- `lastUpdated` 不变：爬虫未调 `POST /api/crawler/notify` 或 Node 未执行 `reloadFromFile()`。
+- 战力条为 0：未跑 seed 或 `power_index` 无数据。
+
+---
+
+## 2. 事件脉络 / 时间线 (TimelinePanel → EventTimelinePanel + RecentUpdatesPanel)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 近期更新列表 | `situation.recentUpdates` | `situation_update` 表（ORDER BY timestamp DESC LIMIT 50） | 爬虫 `write_updates(new_items)` + seed 若干条 |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.recentUpdates | length'`
+- `curl -s http://localhost:3001/api/situation | jq '.recentUpdates[0]'`
+- 或用调试接口：`curl -s -H "x-api-key: $API_ADMIN_KEY" http://localhost:3001/api/db/dashboard | jq '.situation_update | length'`
+
+**常见问题**
+
+- 条数为 0：未 seed 且爬虫未写入；或爬虫只跑 main.py（入口 A）未跑 gdelt（入口 B）时，仍会写 `situation_update`，但若 RSS 抓取 0 条则无新数据。
+- 不更新：爬虫未启动；或未调 notify；或 Node 与爬虫用的不是同一个 `data.db`（路径/环境变量不一致）。
+
+---
+
+## 3. 地图 (WarMap)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 美军据点 | `usForces.keyLocations` | `key_location` WHERE side='us' | seed 全量；爬虫通过 `key_location_updates` 只更新 status/damage_level |
+| 伊朗据点 | `iranForces.keyLocations` | `key_location` WHERE side='iran' | 同上 |
+| 冲突点（绿/橙/红） | `situation.conflictEvents` | `gdelt_events` 表（ORDER BY event_time DESC LIMIT 30） | GDELT API 写入；或 GDELT 关闭时 RSS 回填 |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.usForces.keyLocations | length, .conflictEvents | length'`
+- 地图上是否有基地/舰船点位、是否有冲突点图层。
+
+**常见问题**
+
+- 无冲突点：`gdelt_events` 为空；未跑 gdelt 或 GDELT 被墙且未用 RSS 回填（`_rss_to_gdelt_fallback`）。
+- 基地状态不更新：爬虫提取的 `key_location_updates` 的 `name_keywords` 与 `key_location.name` 无法 LIKE 匹配（名称不一致）。
+
+---
+
+## 4. 美国基地状态 (BaseStatusPanel)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 基地列表 | `usForces.keyLocations` 中 `type === 'Base'` | `key_location` side='us' | 同 WarMap |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '[.usForces.keyLocations[] | select(.type == "Base")] | length'`
+- 看板左侧「美国基地」是否展示且状态/损伤与预期一致。
+
+**常见问题**
+
+- 与「地图」一致；若 seed 的 key_location 有 type/region，而爬虫只更新 status/damage_level，名称必须能与 extractor 的 name_keywords 匹配。
+
+---
+
+## 5. 战损 (CombatLossesPanel + CombatLossesOtherPanel)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 美军/伊朗阵亡/受伤/装备等 | `usForces.combatLosses` / `iranForces.combatLosses` | `combat_losses` 表（side=us/iran） | seed 初始值；爬虫 AI 提取 `combat_losses_delta` 后 db_merge **增量**叠加 |
+| 冲突统计（估计伤亡等） | `situation.conflictStats` | `conflict_stats` 表 id=1 | GDELT 或 RSS 回填时写入 |
+| 平民伤亡合计 | `situation.civilianCasualtiesTotal` | 由 combat_losses 双方平民字段 + conflict_stats.estimated_casualties 计算 | 见 situationData.js |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.usForces.combatLosses, .iranForces.combatLosses, .conflictStats'`
+- 看板战损数字是否与 API 一致。
+
+**常见问题**
+
+- 战损一直不变：新闻中无明确伤亡/装备数字；或未跑入口 B（gdelt）；或 AI 提取器未启用/报错（Ollama/通义/规则）；或 merge 时单次增量被上限截断。
+- 数字异常大：提取器误把「累计总数」当成本条增量；已用 `MAX_DELTA_PER_MERGE` 做上限。
+
+---
+
+## 6. 伊朗基地状态 (IranBaseStatusPanel)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 基地/港/核/导弹等 | `iranForces.keyLocations` 中 type 为 Base/Port/Nuclear/Missile | `key_location` side='iran' | 同 WarMap |
+
+**验证与常见问题**
+
+- 同「美国基地」；确保 seed 中伊朗 key_location 的 name 与爬虫 extractor 的 name_keywords 能匹配（如德黑兰、伊斯法罕、布什尔等）。
+
+---
+
+## 7. 战力对比图 (PowerChart)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 美/伊战力指数 | `usForces.powerIndex` / `iranForces.powerIndex` | `power_index` 表 | **仅 seed**，爬虫不写 |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.usForces.powerIndex, .iranForces.powerIndex'`
+
+**常见问题**
+
+- 为 0 或缺失：未执行 seed；或 `power_index` 表空。
+
+---
+
+## 8. 华尔街/投资趋势 (InvestmentTrendChart)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 时间序列 | `usForces.wallStreetInvestmentTrend` | `wall_street_trend` 表（time, value） | seed 写入初始曲线；爬虫仅在提取出 `wall_street` 时 **INSERT 新点** |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.usForces.wallStreetInvestmentTrend | length'`
+- 看板右侧美国下方趋势图是否有数据。
+
+**常见问题**
+
+- 无曲线：未 seed 或表空。
+- 不随新闻更新：提取器未输出 `wall_street` 或新闻中无相关表述。
+
+---
+
+## 9. 美国力量摘要 (ForcePanel side=us)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 摘要数字 | `usForces.summary` | `force_summary` side='us' | **仅 seed** |
+| 战力指数 | `usForces.powerIndex` | `power_index` | **仅 seed** |
+| 资产列表 | `usForces.assets` | `force_asset` side='us' | **仅 seed** |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.usForces.summary, .usForces.assets | length'`
+
+**常见问题**
+
+- 全为 0 或空：未 seed；爬虫不更新这些表。
+
+---
+
+## 10. 报复情绪 (RetaliationGauge)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 当前值 | `iranForces.retaliationSentiment` | `retaliation_current` id=1 | seed 初始；爬虫提取 `retaliation` 时 **替换** 当前值并 **追加** history |
+| 历史曲线 | `iranForces.retaliationSentimentHistory` | `retaliation_history` 表 | 同上 |
+
+**验证**
+
+- `curl -s http://localhost:3001/api/situation | jq '.iranForces.retaliationSentiment, .iranForces.retaliationSentimentHistory | length'`
+
+**常见问题**
+
+- 不更新：新闻中无报复相关表述；或提取器未输出 `retaliation`。
+
+---
+
+## 11. 伊朗力量摘要 (ForcePanel side=iran)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 同美国侧 | `iranForces.summary` / `powerIndex` / `assets` | `force_summary` / `power_index` / `force_asset` side='iran' | **仅 seed** |
+
+**验证与常见问题**
+
+- 同「美国力量摘要」。
+
+---
+
+## 12. 资讯列表 (GET /api/news，若有单独页面消费)
+
+| 项目 | 数据来源 | 写入方 | 说明 |
+|------|----------|--------|------|
+| 资讯行 | `news_content` 表 | 爬虫 `save_and_dedup` 后写入 | 仅入口 B 流水线；事件脉络来自 situation_update，资讯表独立 |
+
+**验证**
+
+- `curl -s -H "x-api-key: $API_ADMIN_KEY" http://localhost:3001/api/news?limit=5 | jq '.items | length'`
+- 若未配 ADMIN_KEY，部分环境可能不鉴权也可访问，视 routes 配置而定。
+
+**常见问题**
+
+- `items` 为 0：未跑入口 B；或去重后无新增；或 RSS 抓取 0 条。
+
+---
+
+## 快速检查命令汇总
+
+```bash
+# 1. API 与态势整体
+curl -s http://localhost:3001/api/health
+curl -s http://localhost:3001/api/situation | jq '{
+  lastUpdated,
+  recentUpdates: (.recentUpdates | length),
+  conflictEvents: (.conflictEvents | length),
+  usPower: .usForces.powerIndex.overall,
+  iranPower: .iranForces.powerIndex.overall,
+  usLosses: .usForces.combatLosses.personnelCasualties,
+  iranLosses: .iranForces.combatLosses.personnelCasualties,
+  usBases: (.usForces.keyLocations | length),
+  iranBases: (.iranForces.keyLocations | length),
+  wallStreetLen: (.usForces.wallStreetInvestmentTrend | length),
+  retaliationCur: .iranForces.retaliationSentiment
+}'
+
+# 2. 各表行数（需 sqlite3）
+DB="${DB_PATH:-server/data.db}"
+for t in force_summary power_index force_asset key_location combat_losses wall_street_trend retaliation_current retaliation_history situation_update gdelt_events conflict_stats news_content; do
+  echo -n "$t: "; sqlite3 "$DB" "SELECT COUNT(*) FROM $t" 2>/dev/null || echo "?"
+done
+
+# 3. 爬虫状态与通知
+curl -s http://localhost:8000/crawler/status | jq .
+curl -s -X POST http://localhost:3001/api/crawler/notify
+```
+
+---
+
+## 建议调试顺序
+
+1. **先确认 API 与 DB 一致**：`npm run api` 已起、`GET /api/situation` 返回 200，且 `lastUpdated`、`recentUpdates` 等存在。
+2. **确认 seed**：若从未 seed，先跑 `node server/seed.js`（或项目提供的 seed 命令），再刷新看板，检查战力/摘要/基地/战损等是否有初始值。
+3. **事件脉络**：确认爬虫已起（`npm run gdelt`）、RSS 能抓到条数、`situation_update` 条数增加、notify 后前端/API 的 `recentUpdates` 增加。
+4. **战损/基地/报复/美股**：确认跑的是入口 B、提取器可用（Ollama 或 DASHSCOPE_API_KEY 或规则）、新闻内容包含可解析的伤亡/基地/报复表述；必要时用 crawler 的提取单测或 backfill 接口验证。
+5. **地图冲突点**：确认 `gdelt_events` 有数据（GDELT 或 RSS 回填）；冲突统计看 `conflict_stats`。
+
+按上述顺序逐板块对照「数据来源 → 写入方 → 验证命令 → 常见问题」，即可定位每个板块不更新或显示异常的原因。
+
+**若只关心战损、基地、地图战区**：见 **docs/DEBUG_战损_基地_地图.md**，并运行 `./scripts/debug-panels-focus.sh` 做专项检查。
--- a/docs/DEBUG_战损_基地_地图.md
+++ b/docs/DEBUG_战损_基地_地图.md
@@ -0,0 +1,135 @@
+# 战损、基地、地图战区 — 专项调试
+
+只关心这三块时，按下面数据源 + 排查顺序即可。
+
+---
+
+## 一、战损 (combat_losses)
+
+### 数据流
+
+```
+RSS 新闻(标题+摘要/正文) → 爬虫流水线 run_full_pipeline
+  → extract_from_news(text) → combat_losses_delta { us: { personnel_killed, ... }, iran: { ... } }
+  → db_merge.merge() → 按「增量」叠加到 combat_losses 表
+  → POST /api/crawler/notify → Node 重载 DB
+  → getSituation() 读 combat_losses → 前端 CombatLossesPanel / CombatLossesOtherPanel
+```
+
+- **表**：`combat_losses`（side=us / iran），字段含 personnel_killed、personnel_wounded、bases_destroyed、bases_damaged、aircraft、drones、missiles 等。
+- **初始值**：`node server/seed.js` 会写入美/伊两行。
+- **更新条件**：只有新闻里**明确出现可解析的伤亡/装备数字**（如「2 名美军死亡」「14 人受伤」「1 架战机受损」）时，提取器才会输出 `combat_losses_delta`，merge 才会叠加。
+
+### 提取器选择（三选一）
+
+| 环境变量 | 使用模块 | 说明 |
+|----------|----------|------|
+| `DASHSCOPE_API_KEY` 已设 | `extractor_dashscope` | 通义抽取，精度较好 |
+| 未设通义 且 `CLEANER_AI_DISABLED≠1` | `extractor_ai` | 需本机 Ollama（如 llama3.1） |
+| 未设通义 且 `CLEANER_AI_DISABLED=1` | `extractor_rules` | 规则正则，无需模型 |
+
+### 验证命令
+
+```bash
+# API 返回的战损
+curl -s http://localhost:3001/api/situation | jq '{
+  us: .usForces.combatLosses.personnelCasualties,
+  iran: .iranForces.combatLosses.personnelCasualties,
+  conflictStats: .conflictStats
+}'
+
+# 表内原始值
+sqlite3 server/data.db "SELECT side, personnel_killed, personnel_wounded, bases_destroyed, bases_damaged, aircraft FROM combat_losses"
+```
+
+### 常见问题
+
+| 现象 | 可能原因 | 处理 |
+|------|----------|------|
+| 战损数字从不变化 | 1) 只跑了 main.py 未跑 gdelt<br>2) 新闻里没有明确伤亡/装备数字<br>3) 提取器未启用或报错（Ollama 未起、通义未配） | 跑 `npm run gdelt`；用带数字的新闻测；看爬虫日志是否有提取/merge 报错 |
+| 数字暴增一次 | 提取器把「累计总数」当成本条增量 | 已用 MAX_DELTA_PER_MERGE 做单次上限；可查 db_merge.py |
+| 想用已有事件脉络重算战损 | 历史新闻当时未做提取 | `curl -X POST http://localhost:8000/crawler/backfill` 用 situation_update 最近 50 条重新提取并 merge |
+
+---
+
+## 二、基地 (key_location)
+
+### 数据流
+
+```
+RSS 新闻 → extract_from_news → key_location_updates: [ { name_keywords, side, status, damage_level } ]
+  → db_merge.merge() → UPDATE key_location SET status=?, damage_level=? WHERE side=? AND (name LIKE ? OR ...)
+  → getSituation() 读 key_location → 前端 BaseStatusPanel(美) / IranBaseStatusPanel(伊) / WarMap 据点层
+```
+
+- **表**：`key_location`（side=us / iran），字段含 name、lat、lng、type、region、**status**、**damage_level**。
+- **初始数据**：seed 写入大量美/伊据点和基地（含 name）；**爬虫只更新已有行的 status、damage_level**，不新增行。
+- **匹配规则**：提取器的 `name_keywords`（如 `阿萨德|asad`）会按 **LIKE '%关键词%'** 与 `key_location.name` 匹配。例如 name 为「阿萨德空军基地」时，关键词「阿萨德」能匹配。
+
+### 规则提取器支持的基地关键词（与 seed name 对应关系）
+
+- **美军**：阿萨德|阿因|asad → 匹配 seed「阿萨德空军基地」「阿因·阿萨德」；巴格达 → 巴格达外交支援中心；乌代德|卡塔尔 → 乌代德空军基地；埃尔比勒 → 埃尔比勒空军基地；因吉尔利克|土耳其 → 因吉尔利克空军基地；苏尔坦|沙特 → 苏尔坦亲王空军基地；坦夫|叙利亚 → 坦夫驻军；达夫拉|阿联酋 → 达夫拉空军基地；内瓦提姆|拉蒙|以色列 → 内瓦提姆/拉蒙等；赛利耶、巴林、科威特 等。
+- **伊朗**：阿巴斯港、德黑兰、布什尔、伊斯法罕、纳坦兹、米纳布、霍尔木兹 等（seed 中需有对应 name 的伊朗据点）。
+
+若 seed 里没有某据点，或 name 与关键词完全对不上（例如英文报道只写 "Al-Asad" 而 seed 只有「阿萨德空军基地」），规则里已含 asad/阿萨德，一般能匹配；若仍不匹配，可查 `key_location.name` 与 extractor_rules.py / extractor_dashscope 的 name_keywords 是否有一致子串。
+
+### 验证命令
+
+```bash
+# 被标为遭袭的据点
+curl -s http://localhost:3001/api/situation | jq '[.usForces.keyLocations[], .iranForces.keyLocations[]] | map(select(.status == "attacked")) | length'
+
+# 表内 status / damage_level
+sqlite3 server/data.db "SELECT side, name, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level IS NOT NULL LIMIT 20"
+```
+
+### 常见问题
+
+| 现象 | 可能原因 | 处理 |
+|------|----------|------|
+| 基地状态从不更新 | 1) 新闻未提及「某基地遭袭」类表述<br>2) 提取的 name_keywords 与 key_location.name 无法 LIKE 匹配 | 确认 seed 的 name 含中文/英文与提取器关键词一致；或扩展 extractor 的 name_keywords |
+| 地图/基地面板无据点 | key_location 表空 | 先执行 `node server/seed.js` |
+
+---
+
+## 三、地图战区 / 冲突点 (gdelt_events + conflict_stats)
+
+### 数据流
+
+- **正常模式**：`fetch_gdelt_events()` 请求 GDELT API → 解析为事件列表 → `_write_to_db(events)` 写入 `gdelt_events` 和 `conflict_stats`（总事件数、高影响事件数、估计伤亡、打击次数等）。
+- **GDELT 不可用**：设 `GDELT_DISABLED=1` 时，`fetch_news()` 里在流水线结束后调 `_rss_to_gdelt_fallback()`，用 **situation_update 最近 50 条** 按 summary 推断经纬度（`_infer_coords`）和 impact_score（由 severity 映射），写入 `gdelt_events`，这样地图仍有冲突点。
+
+前端 WarMap 根据 `conflictEvents`（= gdelt_events）的 impact_score 分绿/橙/红三层显示；战损区「冲突统计」来自 `conflict_stats`。
+
+### 验证命令
+
+```bash
+# 冲突点条数 + 冲突统计
+curl -s http://localhost:3001/api/situation | jq '{ conflictEvents: (.conflictEvents | length), conflictStats: .conflictStats }'
+
+# 表内
+sqlite3 server/data.db "SELECT COUNT(*) FROM gdelt_events"
+sqlite3 server/data.db "SELECT * FROM conflict_stats WHERE id = 1"
+```
+
+### 常见问题
+
+| 现象 | 可能原因 | 处理 |
+|------|----------|------|
+| 地图没有冲突点 | 1) gdelt_events 表空<br>2) 未跑 gdelt 或 GDELT 被墙且未开 RSS 回填 | 跑 `npm run gdelt`；国内可设 `GDELT_DISABLED=1`，靠 situation_update 回填 |
+| 冲突点不更新 | 爬虫未调 notify，或 Node/爬虫用的不是同一个 data.db | 确认 API_BASE、DB_PATH 一致；看 Node 终端是否有 `[crawler/notify] DB 已重载` |
+| conflict_stats 全 0 | 从未成功写入过 gdelt_events（GDELT 与 RSS 回填都未执行） | 先让 gdelt_events 有数据（见上） |
+
+---
+
+## 四、一键检查（仅战损 / 基地 / 地图）
+
+在项目根执行：
+
+```bash
+./scripts/debug-panels-focus.sh
+```
+
+会检查：API 是否通、`combat_losses` / `key_location` / `gdelt_events` / `conflict_stats` 行数及关键字段、并给出简短结论。需已启动 API（`npm run api`）；可选 `jq`、`sqlite3` 以输出更全。
+
+详细逐板块说明见 `docs/DEBUG_PANELS.md`。
--- a/docs/DOCKER_MIRROR.md
+++ b/docs/DOCKER_MIRROR.md
@@ -0,0 +1,30 @@
+# Docker 拉取超时 / 配置镜像加速
+
+国内环境从 Docker Hub 拉取镜像常超时，需在 Docker 中配置镜像加速。
+
+## Docker Desktop（macOS / Windows）
+
+1. 打开 **Docker Desktop**
+2. **Settings** → **Docker Engine**
+3. 在 JSON 中增加 `registry-mirrors`（若已有其他配置，只需合并进该字段）：
+
+```json
+{
+  "registry-mirrors": [
+    "https://docker.m.daocloud.io",
+    "https://docker.1ms.run"
+  ]
+}
+```
+
+4. 点击 **Apply & Restart**
+5. 重新执行：`docker compose up -d --build`
+
+## 备选镜像源
+
+可替换或补充到 `registry-mirrors` 中：
+
+- `https://docker.m.daocloud.io`（DaoCloud）
+- `https://docker.1ms.run`
+- `https://docker.rainbond.cc`（好雨科技）
+- 阿里云 / 腾讯云等：在对应云控制台的「容器镜像服务」中获取个人专属加速地址
--- a/docs/PRODUCTION.md
+++ b/docs/PRODUCTION.md
@@ -0,0 +1,68 @@
+# 生产部署与数据对齐
+
+## 1. 当前项目是否能在 Docker 中单独运行
+
+- **能**。爬虫镜像 `Dockerfile.crawler` 自包含 Python 3.11 + `crawler/requirements.txt`（含 dashscope），无宿主机 Python 版本依赖。
+- **两种常见用法**：
+  - **docker-compose 一起跑**：API + 爬虫都在容器内，共用一个命名卷 `app-data`，天然对齐。
+  - **爬虫单独 Docker、API 在宿主机**：爬虫容器通过挂载宿主机上的 **同一个** `server/data.db`，并设置 `API_BASE` 指向宿主机 API，即可单独运行且数据一致。
+
+## 2. 数据对齐（必须满足）
+
+| 角色   | 使用的 DB 路径（示例）        | 说明 |
+|--------|-------------------------------|------|
+| Node API | `process.env.DB_PATH` 或 `server/data.db` | 见 `server/db.js`、`docker-entrypoint.sh` |
+| 爬虫（Docker 内） | `DB_PATH=/data/data.db`，且 `/data/data.db` 由宿主机同一文件挂载 | 见 `Dockerfile.crawler`、`crawler/config.py` |
+
+**原则**：API 和爬虫必须读写 **同一个 SQLite 文件**。否则会出现「爬虫写了库、API 读不到」或反之。
+
+- **docker-compose 全容器**：两边都用卷 `app-data`，路径均为 `/data/data.db`，自动对齐。
+- **API 宿主机 + 爬虫 Docker**：宿主机 API 的 `DB_PATH` 指向例如 `$PROJECT/server/data.db`；爬虫启动时用 `-v $PROJECT/server/data.db:/data/data.db` 和 `-e DB_PATH=/data/data.db`，即对齐。
+
+## 3. 生产脚本与用法
+
+### 3.1 爬虫单独 Docker（API 在宿主机，如 PM2）
+
+```bash
+# 首次：构建镜像并启动爬虫容器（会读 .env 中的 DASHSCOPE_API_KEY）
+./scripts/production-start.sh
+
+# 或分步：
+docker build -t usa-dashboard-crawler:latest -f Dockerfile.crawler .
+./scripts/run-crawler-docker-standalone.sh
+```
+
+可调环境变量（在运行脚本前 export 或写在 .env）：
+
+- `PROJECT_ROOT`：项目根目录，默认当前目录；用于解析 `server/data.db`。
+- `DB_FILE`：宿主机 DB 绝对路径，默认 `$PROJECT_ROOT/server/data.db`。
+- `API_BASE`：爬虫通知 API 的地址，默认 `http://host.docker.internal:3001`（Linux 下脚本会自动加 `--add-host=host.docker.internal:host-gateway`）。
+- `DASHSCOPE_API_KEY`：阿里云 DashScope，启用 AI 清洗（可选）。
+
+### 3.2 docker-compose 全栈（API + 爬虫都在容器）
+
+```bash
+# 启动
+docker compose up -d
+# 或传入 DASHSCOPE_API_KEY
+DASHSCOPE_API_KEY=sk-xxx docker compose up -d
+
+# 停止
+docker compose down
+```
+
+此时 API 与爬虫共用卷 `app-data`，DB 路径均为 `/data/data.db`，无需额外对齐。
+
+### 3.3 宿主机 API（PM2）使用的 DB 路径
+
+确保 PM2 启动 API 时使用的 DB 与爬虫挂载的是同一文件，例如：
+
+- 在 ecosystem 或启动命令里设置：`DB_PATH=/www/wwwroot/www.airtep.com2/usa/server/data.db`
+- 或项目根目录即部署目录时，不设则默认为 `server/data.db`（相对路径以进程 cwd 为准）。
+
+## 4. 检查清单
+
+- [ ] API 与爬虫使用**同一 DB 文件**（见上表）。
+- [ ] 爬虫能访问到 API：`API_BASE` 在「爬虫单独 Docker」场景下指向宿主机（如 `http://host.docker.internal:3001`），在 compose 场景下为 `http://api:3001`。
+- [ ] 若需 AI 清洗：在爬虫侧设置 `DASHSCOPE_API_KEY`（compose 或 standalone 脚本的 .env/环境变量）。
+- [ ] 首次部署或无 DB 时：先创建并初始化 DB（例如 `DB_PATH=server/data.db node server/seed.js`），再启动爬虫容器。
--- a/ecosystem.config.cjs
+++ b/ecosystem.config.cjs
@@ -0,0 +1,43 @@
+/**
+ * PM2 进程配置：API + 爬虫（GDELT/RSS uvicorn 服务）
+ * 用法：
+ *   pm2 start ecosystem.config.cjs          # 启动全部
+ *   pm2 restart ecosystem.config.cjs       # 重启全部
+ *   pm2 stop ecosystem.config.cjs          # 停止全部
+ *   pm2 logs nsa_api / pm2 logs nsa_crawler
+ * 需 .env 时可在启动前 source .env，或在应用内用 dotenv 加载。
+ */
+module.exports = {
+  apps: [
+    {
+      name: 'nsa_api',
+      script: 'server/index.js',
+      cwd: __dirname,
+      interpreter: 'node',
+      instances: 1,
+      autorestart: true,
+      watch: false,
+      max_memory_restart: '300M',
+      env: {
+        NODE_ENV: 'production',
+        API_PORT: 3001,
+      },
+    },
+    {
+      name: 'nsa_crawler',
+      script: 'crawler/run_uvicorn.sh',
+      cwd: __dirname,
+      interpreter: 'bash',
+      instances: 1,
+      autorestart: true,
+      watch: false,
+      max_memory_restart: '300M',
+      env: {
+        CLEANER_AI_DISABLED: '1',
+        PARSER_AI_DISABLED: '0',
+        GDELT_DISABLED: '1',
+        RSS_INTERVAL_SEC: '60',
+      },
+    },
+  ],
+};
--- a/g.sh
+++ b/g.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# 快速 git add + commit + push
+# 用法: g "fix: 提交说明"
+set -e
+
+msg="${1:-}"
+if [[ -z "$msg" ]]; then
+  echo "用法: g \"commit message\""
+  echo "示例: g \"fix: 修复登录问题\""
+  exit 1
+fi
+
+# 检查是否有改动
+if [[ -z $(git status --porcelain) ]]; then
+  echo "无文件改动，跳过提交"
+  exit 0
+fi
+
+git add .
+git commit -m "$msg"
+git push
+echo "✓ 已推送"
--- a/index.html
+++ b/index.html
@@ -3,7 +3,7 @@
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" type="image/svg+xml" href="/usa_logo.png" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover" />
    <title>美伊军事态势显示</title>
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
--- a/map.md
+++ b/map.md
@@ -281,4 +281,10 @@ const IRAN_SOURCE = [51.3890, 35.6892] // Tehran

 所有动画走 WebGL 图层

-禁止 DOM 动画
+禁止 DOM 动画
+
+
+
+git代码更新:git fetch origin && git reset --hard origin/master
+前端发版：npm  run build
+后端发版：pm2 restart 3
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,16 +8,18 @@
      "name": "us-iran-military-dashboard",
      "version": "1.0.0",
      "dependencies": {
-        "better-sqlite3": "^11.6.0",
        "cors": "^2.8.5",
        "echarts": "^5.5.0",
        "echarts-for-react": "^3.0.2",
        "express": "^4.21.1",
-        "lucide-react": "^0.460.0",
+        "lucide-react": "^0.576.0",
        "mapbox-gl": "^3.6.0",
        "react": "^18.3.1",
        "react-dom": "^18.3.1",
        "react-map-gl": "^7.1.7",
+        "react-router-dom": "^7.13.1",
+        "sql.js": "^1.11.0",
+        "swagger-ui-express": "^5.0.1",
        "ws": "^8.19.0",
        "zustand": "^5.0.0"
      },
@@ -1342,6 +1344,12 @@
        "win32"
      ]
    },
+    "node_modules/@scarf/scarf": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmmirror.com/@scarf/scarf/-/scarf-1.4.0.tgz",
+      "integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
+      "hasInstallScript": true
+    },
    "node_modules/@types/babel__core": {
      "version": "7.20.5",
      "resolved": "https://registry.npmmirror.com/@types/babel__core/-/babel__core-7.20.5.tgz",
@@ -1921,25 +1929,6 @@
      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
      "dev": true
    },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmmirror.com/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
    "node_modules/baseline-browser-mapping": {
      "version": "2.10.0",
      "resolved": "https://registry.npmmirror.com/baseline-browser-mapping/-/baseline-browser-mapping-2.10.0.tgz",
@@ -1952,16 +1941,6 @@
        "node": ">=6.0.0"
      }
    },
-    "node_modules/better-sqlite3": {
-      "version": "11.10.0",
-      "resolved": "https://registry.npmmirror.com/better-sqlite3/-/better-sqlite3-11.10.0.tgz",
-      "integrity": "sha512-EwhOpyXiOEL/lKzHz9AW1msWFNzGc/z+LzeB3/jnFJpxu+th2yqvzsSWas1v9jgs9+xiXJcD5A8CJxAG2TaghQ==",
-      "hasInstallScript": true,
-      "dependencies": {
-        "bindings": "^1.5.0",
-        "prebuild-install": "^7.1.1"
-      }
-    },
    "node_modules/binary-extensions": {
      "version": "2.3.0",
      "resolved": "https://registry.npmmirror.com/binary-extensions/-/binary-extensions-2.3.0.tgz",
@@ -1974,24 +1953,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/bindings": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmmirror.com/bindings/-/bindings-1.5.0.tgz",
-      "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
-      "dependencies": {
-        "file-uri-to-path": "1.0.0"
-      }
-    },
-    "node_modules/bl": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmmirror.com/bl/-/bl-4.1.0.tgz",
-      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
-      "dependencies": {
-        "buffer": "^5.5.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.4.0"
-      }
-    },
    "node_modules/body-parser": {
      "version": "1.20.4",
      "resolved": "https://registry.npmmirror.com/body-parser/-/body-parser-1.20.4.tgz",
@@ -2083,29 +2044,6 @@
        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
      }
    },
-    "node_modules/buffer": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmmirror.com/buffer/-/buffer-5.7.1.tgz",
-      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.1.13"
-      }
-    },
    "node_modules/bytes": {
      "version": "3.1.2",
      "resolved": "https://registry.npmmirror.com/bytes/-/bytes-3.1.2.tgz",
@@ -2253,11 +2191,6 @@
        "node": ">= 6"
      }
    },
-    "node_modules/chownr": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmmirror.com/chownr/-/chownr-1.1.4.tgz",
-      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
-    },
    "node_modules/color-convert": {
      "version": "2.0.1",
      "resolved": "https://registry.npmmirror.com/color-convert/-/color-convert-2.0.1.tgz",
@@ -2399,28 +2332,6 @@
        }
      }
    },
-    "node_modules/decompress-response": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmmirror.com/decompress-response/-/decompress-response-6.0.0.tgz",
-      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
-      "dependencies": {
-        "mimic-response": "^3.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/deep-extend": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmmirror.com/deep-extend/-/deep-extend-0.6.0.tgz",
-      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
-      "engines": {
-        "node": ">=4.0.0"
-      }
-    },
    "node_modules/deep-is": {
      "version": "0.1.4",
      "resolved": "https://registry.npmmirror.com/deep-is/-/deep-is-0.1.4.tgz",
@@ -2444,14 +2355,6 @@
        "npm": "1.2.8000 || >= 1.4.16"
      }
    },
-    "node_modules/detect-libc": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmmirror.com/detect-libc/-/detect-libc-2.1.2.tgz",
-      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
-      "engines": {
-        "node": ">=8"
-      }
-    },
    "node_modules/didyoumean": {
      "version": "1.2.2",
      "resolved": "https://registry.npmmirror.com/didyoumean/-/didyoumean-1.2.2.tgz",
@@ -2523,14 +2426,6 @@
        "node": ">= 0.8"
      }
    },
-    "node_modules/end-of-stream": {
-      "version": "1.4.5",
-      "resolved": "https://registry.npmmirror.com/end-of-stream/-/end-of-stream-1.4.5.tgz",
-      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
-      "dependencies": {
-        "once": "^1.4.0"
-      }
-    },
    "node_modules/es-define-property": {
      "version": "1.0.1",
      "resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.1.tgz",
@@ -2797,14 +2692,6 @@
        "node": ">= 0.6"
      }
    },
-    "node_modules/expand-template": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmmirror.com/expand-template/-/expand-template-2.0.3.tgz",
-      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
-      "engines": {
-        "node": ">=6"
-      }
-    },
    "node_modules/express": {
      "version": "4.22.1",
      "resolved": "https://registry.npmmirror.com/express/-/express-4.22.1.tgz",
@@ -2940,11 +2827,6 @@
        "node": ">=16.0.0"
      }
    },
-    "node_modules/file-uri-to-path": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmmirror.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
-      "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
-    },
    "node_modules/fill-range": {
      "version": "7.1.1",
      "resolved": "https://registry.npmmirror.com/fill-range/-/fill-range-7.1.1.tgz",
@@ -3051,11 +2933,6 @@
        "node": ">= 0.6"
      }
    },
-    "node_modules/fs-constants": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmmirror.com/fs-constants/-/fs-constants-1.0.0.tgz",
-      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
-    },
    "node_modules/fsevents": {
      "version": "2.3.3",
      "resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.3.tgz",
@@ -3135,11 +3012,6 @@
        "node": ">=0.10.0"
      }
    },
-    "node_modules/github-from-package": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmmirror.com/github-from-package/-/github-from-package-0.0.0.tgz",
-      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
-    },
    "node_modules/gl-matrix": {
      "version": "3.4.4",
      "resolved": "https://registry.npmmirror.com/gl-matrix/-/gl-matrix-3.4.4.tgz",
@@ -3246,25 +3118,6 @@
        "node": ">=0.10.0"
      }
    },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmmirror.com/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
    "node_modules/ignore": {
      "version": "5.3.2",
      "resolved": "https://registry.npmmirror.com/ignore/-/ignore-5.3.2.tgz",
@@ -3304,11 +3157,6 @@
      "resolved": "https://registry.npmmirror.com/inherits/-/inherits-2.0.4.tgz",
      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
    },
-    "node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmmirror.com/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
-    },
    "node_modules/ipaddr.js": {
      "version": "1.9.1",
      "resolved": "https://registry.npmmirror.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
@@ -3567,11 +3415,11 @@
      }
    },
    "node_modules/lucide-react": {
-      "version": "0.460.0",
-      "resolved": "https://registry.npmmirror.com/lucide-react/-/lucide-react-0.460.0.tgz",
-      "integrity": "sha512-BVtq/DykVeIvRTJvRAgCsOwaGL8Un3Bxh8MbDxMhEWlZay3T4IpEKDEpwt5KZ0KJMHzgm6jrltxlT5eXOWXDHg==",
+      "version": "0.576.0",
+      "resolved": "https://registry.npmmirror.com/lucide-react/-/lucide-react-0.576.0.tgz",
+      "integrity": "sha512-koNxU14BXrxUfZQ9cUaP0ES1uyPZKYDjk31FQZB6dQ/x+tXk979sVAn9ppZ/pVeJJyOxVM8j1E+8QEuSc02Vug==",
      "peerDependencies": {
-        "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc"
+        "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
      }
    },
    "node_modules/mapbox-gl": {
@@ -3706,17 +3554,6 @@
        "node": ">= 0.6"
      }
    },
-    "node_modules/mimic-response": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmmirror.com/mimic-response/-/mimic-response-3.1.0.tgz",
-      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
    "node_modules/minimatch": {
      "version": "3.1.5",
      "resolved": "https://registry.npmmirror.com/minimatch/-/minimatch-3.1.5.tgz",
@@ -3737,11 +3574,6 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
-    "node_modules/mkdirp-classic": {
-      "version": "0.5.3",
-      "resolved": "https://registry.npmmirror.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
-      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
-    },
    "node_modules/ms": {
      "version": "2.1.3",
      "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz",
@@ -3781,11 +3613,6 @@
        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
      }
    },
-    "node_modules/napi-build-utils": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmmirror.com/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
-      "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA=="
-    },
    "node_modules/natural-compare": {
      "version": "1.4.0",
      "resolved": "https://registry.npmmirror.com/natural-compare/-/natural-compare-1.4.0.tgz",
@@ -3800,28 +3627,6 @@
        "node": ">= 0.6"
      }
    },
-    "node_modules/node-abi": {
-      "version": "3.87.0",
-      "resolved": "https://registry.npmmirror.com/node-abi/-/node-abi-3.87.0.tgz",
-      "integrity": "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ==",
-      "dependencies": {
-        "semver": "^7.3.5"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/node-abi/node_modules/semver": {
-      "version": "7.7.4",
-      "resolved": "https://registry.npmmirror.com/semver/-/semver-7.7.4.tgz",
-      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
-      "bin": {
-        "semver": "bin/semver.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
    "node_modules/node-releases": {
      "version": "2.0.27",
      "resolved": "https://registry.npmmirror.com/node-releases/-/node-releases-2.0.27.tgz",
@@ -3876,14 +3681,6 @@
        "node": ">= 0.8"
      }
    },
-    "node_modules/once": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmmirror.com/once/-/once-1.4.0.tgz",
-      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
-      "dependencies": {
-        "wrappy": "1"
-      }
-    },
    "node_modules/optionator": {
      "version": "0.9.4",
      "resolved": "https://registry.npmmirror.com/optionator/-/optionator-0.9.4.tgz",
@@ -4188,32 +3985,6 @@
      "resolved": "https://registry.npmmirror.com/potpack/-/potpack-2.1.0.tgz",
      "integrity": "sha512-pcaShQc1Shq0y+E7GqJqvZj8DTthWV1KeHGdi0Z6IAin2Oi3JnLCOfwnCo84qc+HAp52wT9nK9H7FAJp5a44GQ=="
    },
-    "node_modules/prebuild-install": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmmirror.com/prebuild-install/-/prebuild-install-7.1.3.tgz",
-      "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
-      "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.",
-      "dependencies": {
-        "detect-libc": "^2.0.0",
-        "expand-template": "^2.0.3",
-        "github-from-package": "0.0.0",
-        "minimist": "^1.2.3",
-        "mkdirp-classic": "^0.5.3",
-        "napi-build-utils": "^2.0.0",
-        "node-abi": "^3.3.0",
-        "pump": "^3.0.0",
-        "rc": "^1.2.7",
-        "simple-get": "^4.0.0",
-        "tar-fs": "^2.0.0",
-        "tunnel-agent": "^0.6.0"
-      },
-      "bin": {
-        "prebuild-install": "bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
    "node_modules/prelude-ls": {
      "version": "1.2.1",
      "resolved": "https://registry.npmmirror.com/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -4240,15 +4011,6 @@
        "node": ">= 0.10"
      }
    },
-    "node_modules/pump": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmmirror.com/pump/-/pump-3.0.4.tgz",
-      "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
-      "dependencies": {
-        "end-of-stream": "^1.1.0",
-        "once": "^1.3.1"
-      }
-    },
    "node_modules/punycode": {
      "version": "2.3.1",
      "resolved": "https://registry.npmmirror.com/punycode/-/punycode-2.3.1.tgz",
@@ -4319,28 +4081,6 @@
        "node": ">= 0.8"
      }
    },
-    "node_modules/rc": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmmirror.com/rc/-/rc-1.2.8.tgz",
-      "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
-      "dependencies": {
-        "deep-extend": "^0.6.0",
-        "ini": "~1.3.0",
-        "minimist": "^1.2.0",
-        "strip-json-comments": "~2.0.1"
-      },
-      "bin": {
-        "rc": "cli.js"
-      }
-    },
-    "node_modules/rc/node_modules/strip-json-comments": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmmirror.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
-      "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
    "node_modules/react": {
      "version": "18.3.1",
      "resolved": "https://registry.npmmirror.com/react/-/react-18.3.1.tgz",
@@ -4396,6 +4136,54 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/react-router": {
+      "version": "7.13.1",
+      "resolved": "https://registry.npmmirror.com/react-router/-/react-router-7.13.1.tgz",
+      "integrity": "sha512-td+xP4X2/6BJvZoX6xw++A2DdEi++YypA69bJUV5oVvqf6/9/9nNlD70YO1e9d3MyamJEBQFEzk6mbfDYbqrSA==",
+      "dependencies": {
+        "cookie": "^1.0.1",
+        "set-cookie-parser": "^2.6.0"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=18",
+        "react-dom": ">=18"
+      },
+      "peerDependenciesMeta": {
+        "react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/react-router-dom": {
+      "version": "7.13.1",
+      "resolved": "https://registry.npmmirror.com/react-router-dom/-/react-router-dom-7.13.1.tgz",
+      "integrity": "sha512-UJnV3Rxc5TgUPJt2KJpo1Jpy0OKQr0AjgbZzBFjaPJcFOb2Y8jA5H3LT8HUJAiRLlWrEXWHbF1Z4SCZaQjWDHw==",
+      "dependencies": {
+        "react-router": "7.13.1"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=18",
+        "react-dom": ">=18"
+      }
+    },
+    "node_modules/react-router/node_modules/cookie": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmmirror.com/cookie/-/cookie-1.1.1.tgz",
+      "integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
    "node_modules/read-cache": {
      "version": "1.0.0",
      "resolved": "https://registry.npmmirror.com/read-cache/-/read-cache-1.0.0.tgz",
@@ -4405,19 +4193,6 @@
        "pify": "^2.3.0"
      }
    },
-    "node_modules/readable-stream": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmmirror.com/readable-stream/-/readable-stream-3.6.2.tgz",
-      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
    "node_modules/readdirp": {
      "version": "3.6.0",
      "resolved": "https://registry.npmmirror.com/readdirp/-/readdirp-3.6.0.tgz",
@@ -4645,6 +4420,11 @@
        "node": ">= 0.8.0"
      }
    },
+    "node_modules/set-cookie-parser": {
+      "version": "2.7.2",
+      "resolved": "https://registry.npmmirror.com/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz",
+      "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw=="
+    },
    "node_modules/set-value": {
      "version": "2.0.1",
      "resolved": "https://registry.npmmirror.com/set-value/-/set-value-2.0.1.tgz",
@@ -4753,49 +4533,6 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
-    "node_modules/simple-concat": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmmirror.com/simple-concat/-/simple-concat-1.0.1.tgz",
-      "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/simple-get": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmmirror.com/simple-get/-/simple-get-4.0.1.tgz",
-      "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "decompress-response": "^6.0.0",
-        "once": "^1.3.1",
-        "simple-concat": "^1.0.0"
-      }
-    },
    "node_modules/size-sensor": {
      "version": "1.0.3",
      "resolved": "https://registry.npmmirror.com/size-sensor/-/size-sensor-1.0.3.tgz",
@@ -4881,6 +4618,11 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/sql.js": {
+      "version": "1.14.0",
+      "resolved": "https://registry.npmmirror.com/sql.js/-/sql.js-1.14.0.tgz",
+      "integrity": "sha512-NXYh+kFqLiYRCNAaHD0PcbjFgXyjuolEKLMk5vRt2DgPENtF1kkNzzMlg42dUk5wIsH8MhUzsRhaUxIisoSlZQ=="
+    },
    "node_modules/statuses": {
      "version": "2.0.2",
      "resolved": "https://registry.npmmirror.com/statuses/-/statuses-2.0.2.tgz",
@@ -4889,14 +4631,6 @@
        "node": ">= 0.8"
      }
    },
-    "node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmmirror.com/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
-      "dependencies": {
-        "safe-buffer": "~5.2.0"
-      }
-    },
    "node_modules/strip-json-comments": {
      "version": "3.1.1",
      "resolved": "https://registry.npmmirror.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
@@ -4963,6 +4697,28 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
+    "node_modules/swagger-ui-dist": {
+      "version": "5.32.0",
+      "resolved": "https://registry.npmmirror.com/swagger-ui-dist/-/swagger-ui-dist-5.32.0.tgz",
+      "integrity": "sha512-nKZB0OuDvacB0s/lC2gbge+RigYvGRGpLLMWMFxaTUwfM+CfndVk9Th2IaTinqXiz6Mn26GK2zriCpv6/+5m3Q==",
+      "dependencies": {
+        "@scarf/scarf": "=1.4.0"
+      }
+    },
+    "node_modules/swagger-ui-express": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmmirror.com/swagger-ui-express/-/swagger-ui-express-5.0.1.tgz",
+      "integrity": "sha512-SrNU3RiBGTLLmFU8GIJdOdanJTl4TOmT27tt3bWWHppqYmAZ6IDuEuBvMU6nZq0zLEe6b/1rACXCgLZqO6ZfrA==",
+      "dependencies": {
+        "swagger-ui-dist": ">=5.0.0"
+      },
+      "engines": {
+        "node": ">= v0.10.32"
+      },
+      "peerDependencies": {
+        "express": ">=4.0.0 || >=5.0.0-beta"
+      }
+    },
    "node_modules/tailwindcss": {
      "version": "3.4.19",
      "resolved": "https://registry.npmmirror.com/tailwindcss/-/tailwindcss-3.4.19.tgz",
@@ -5000,32 +4756,6 @@
        "node": ">=14.0.0"
      }
    },
-    "node_modules/tar-fs": {
-      "version": "2.1.4",
-      "resolved": "https://registry.npmmirror.com/tar-fs/-/tar-fs-2.1.4.tgz",
-      "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
-      "dependencies": {
-        "chownr": "^1.1.1",
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^2.1.4"
-      }
-    },
-    "node_modules/tar-stream": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmmirror.com/tar-stream/-/tar-stream-2.2.0.tgz",
-      "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
-      "dependencies": {
-        "bl": "^4.0.3",
-        "end-of-stream": "^1.4.1",
-        "fs-constants": "^1.0.0",
-        "inherits": "^2.0.3",
-        "readable-stream": "^3.1.1"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
    "node_modules/thenify": {
      "version": "3.3.1",
      "resolved": "https://registry.npmmirror.com/thenify/-/thenify-3.3.1.tgz",
@@ -5140,17 +4870,6 @@
      "resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz",
      "integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg=="
    },
-    "node_modules/tunnel-agent": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmmirror.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
-      "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
-      "dependencies": {
-        "safe-buffer": "^5.0.1"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
    "node_modules/type-check": {
      "version": "0.4.0",
      "resolved": "https://registry.npmmirror.com/type-check/-/type-check-0.4.0.tgz",
@@ -5288,7 +5007,8 @@
    "node_modules/util-deprecate": {
      "version": "1.0.2",
      "resolved": "https://registry.npmmirror.com/util-deprecate/-/util-deprecate-1.0.2.tgz",
-      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+      "dev": true
    },
    "node_modules/utils-merge": {
      "version": "1.0.1",
@@ -5389,11 +5109,6 @@
        "node": ">=0.10.0"
      }
    },
-    "node_modules/wrappy": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
-    },
    "node_modules/ws": {
      "version": "8.19.0",
      "resolved": "https://registry.npmmirror.com/ws/-/ws-8.19.0.tgz",
--- a/package.json
+++ b/package.json
@@ -4,25 +4,38 @@
  "version": "1.0.0",
  "type": "module",
  "scripts": {
+    "start": "./start.sh",
    "dev": "vite",
    "api": "node server/index.js",
    "api:seed": "node server/seed.js",
+    "crawler": "cd crawler && python main.py",
+    "gdelt": "cd crawler && uvicorn realtime_conflict_service:app --host 0.0.0.0 --port 8000",
+    "crawler:once": "cd crawler && python run_once.py",
+    "crawler:once:range": "./scripts/run-crawler-range.sh",
+    "crawler:test": "cd crawler && python3 -c \"import sys; sys.path.insert(0,'.'); from scrapers.rss_scraper import fetch_all; n=len(fetch_all()); print('RSS 抓取:', n, '条' if n else '(0 条，检查网络或关键词过滤)')\"",
+    "crawler:test:extraction": "cd crawler && python3 -m pytest tests/test_extraction.py -v",
    "build": "vite build",
    "typecheck": "tsc --noEmit",
    "lint": "eslint .",
-    "preview": "vite preview"
+    "preview": "vite preview",
+    "verify": "./scripts/verify-pipeline.sh",
+    "verify:full": "./scripts/verify-pipeline.sh --start-crawler",
+    "verify-panels": "node scripts/verify-panels.cjs",
+    "check-crawler-data": "node scripts/check-crawler-data.cjs"
  },
  "dependencies": {
-    "better-sqlite3": "^11.6.0",
    "cors": "^2.8.5",
    "echarts": "^5.5.0",
    "echarts-for-react": "^3.0.2",
    "express": "^4.21.1",
-    "lucide-react": "^0.460.0",
+    "lucide-react": "^0.576.0",
    "mapbox-gl": "^3.6.0",
    "react": "^18.3.1",
    "react-dom": "^18.3.1",
    "react-map-gl": "^7.1.7",
+    "react-router-dom": "^7.13.1",
+    "sql.js": "^1.11.0",
+    "swagger-ui-express": "^5.0.1",
    "ws": "^8.19.0",
    "zustand": "^5.0.0"
  },
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+cd /www/wwwroot/www.airtep.com2/usa
+git fetch origin && git reset --hard origin/master && npm run build && pm2 restart 4
--- a/scripts/align-production-schema.sh
+++ b/scripts/align-production-schema.sh
@@ -0,0 +1,154 @@
+#!/usr/bin/env bash
+# ç”± scripts/gen-align-schema-from-local.sh æ ¹æ<C2B9>®æœ¬åœ° server/data.db è¡¨ç»“æž„ç”Ÿæˆ<C3A6>ï¼Œä¾›ç”Ÿäº§æ‰§è¡Œã€‚
+# ç”¨æ³•ï¼šåœ¨ç”Ÿäº§ç›®å½•æ‰§è¡Œ DB_PATH=server/data.db ./scripts/align-production-schema.sh
+set -e
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+run() { sqlite3 "$DB_PATH" "$1" 2>/dev/null || true; }
+
+echo "=== å¯¹é½<C3A9>ç”Ÿäº§åº“è¡¨ç»“æž„ï¼ˆä¸Žæœ¬åœ° data.db ä¸€è‡´ï¼‰ï¼š$DB_PATH ==="
+run "ALTER TABLE combat_losses ADD COLUMN bases_destroyed INTEGER NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE combat_losses ADD COLUMN bases_damaged INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN personnel_killed INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN personnel_wounded INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN aircraft INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN warships INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN armor INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN vehicles INTEGER NOT NULL;"
+run "ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN drones INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN missiles INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN helicopters INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN submarines INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN tanks INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN carriers INTEGER NOT NULL DEFAULT 0;"
+run "UPDATE combat_losses SET carriers = COALESCE(tanks, 0) WHERE carriers = 0;"
+run "ALTER TABLE combat_losses ADD COLUMN civilian_ships INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE combat_losses ADD COLUMN airport_port INTEGER NOT NULL DEFAULT 0;"
+echo "  combat_losses done"
+run "ALTER TABLE conflict_stats ADD COLUMN total_events INTEGER NOT NULL DEFAULT 0;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE conflict_stats ADD COLUMN high_impact_events INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE conflict_stats ADD COLUMN estimated_casualties INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE conflict_stats ADD COLUMN estimated_strike_count INTEGER NOT NULL DEFAULT 0;"
+run "ALTER TABLE conflict_stats ADD COLUMN updated_at TEXT NOT NULL;"
+echo "  conflict_stats done"
+run "ALTER TABLE display_stats ADD COLUMN viewers INTEGER;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE display_stats ADD COLUMN share_count INTEGER;"
+run "ALTER TABLE display_stats ADD COLUMN like_count INTEGER;"
+run "ALTER TABLE display_stats ADD COLUMN override_enabled INTEGER NOT NULL DEFAULT 0;"
+echo "  display_stats done"
+run "ALTER TABLE feedback ADD COLUMN content TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE feedback ADD COLUMN ip TEXT;"
+run "ALTER TABLE feedback ADD COLUMN created_at TEXT NOT NULL;"
+echo "  feedback done"
+run "ALTER TABLE force_asset ADD COLUMN side TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE force_asset ADD COLUMN name TEXT NOT NULL;"
+run "ALTER TABLE force_asset ADD COLUMN type TEXT NOT NULL;"
+run "ALTER TABLE force_asset ADD COLUMN count INTEGER NOT NULL;"
+run "ALTER TABLE force_asset ADD COLUMN status TEXT NOT NULL;"
+run "ALTER TABLE force_asset ADD COLUMN lat REAL;"
+run "ALTER TABLE force_asset ADD COLUMN lng REAL;"
+echo "  force_asset done"
+run "ALTER TABLE force_summary ADD COLUMN total_assets INTEGER NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE force_summary ADD COLUMN personnel INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN naval_ships INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN aircraft INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN ground_units INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN uav INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN missile_consumed INTEGER NOT NULL;"
+run "ALTER TABLE force_summary ADD COLUMN missile_stock INTEGER NOT NULL;"
+echo "  force_summary done"
+run "ALTER TABLE gdelt_events ADD COLUMN event_time TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE gdelt_events ADD COLUMN title TEXT NOT NULL;"
+run "ALTER TABLE gdelt_events ADD COLUMN lat REAL NOT NULL;"
+run "ALTER TABLE gdelt_events ADD COLUMN lng REAL NOT NULL;"
+run "ALTER TABLE gdelt_events ADD COLUMN impact_score INTEGER NOT NULL;"
+run "ALTER TABLE gdelt_events ADD COLUMN url TEXT;"
+run "ALTER TABLE gdelt_events ADD COLUMN created_at TEXT;"
+echo "  gdelt_events done"
+run "ALTER TABLE key_location ADD COLUMN side TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE key_location ADD COLUMN name TEXT NOT NULL;"
+run "ALTER TABLE key_location ADD COLUMN lat REAL NOT NULL;"
+run "ALTER TABLE key_location ADD COLUMN lng REAL NOT NULL;"
+run "ALTER TABLE key_location ADD COLUMN type TEXT;"
+run "ALTER TABLE key_location ADD COLUMN region TEXT;"
+run 'ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT '\''operational'\'';'
+run "ALTER TABLE key_location ADD COLUMN damage_level INTEGER;"
+run "ALTER TABLE key_location ADD COLUMN attacked_at TEXT;"
+echo "  key_location done"
+run "ALTER TABLE like_count ADD COLUMN total INTEGER NOT NULL DEFAULT 0;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+echo "  like_count done"
+run "ALTER TABLE map_strike_line ADD COLUMN target_lng REAL NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE map_strike_line ADD COLUMN target_lat REAL NOT NULL;"
+run "ALTER TABLE map_strike_line ADD COLUMN target_name TEXT;"
+run "ALTER TABLE map_strike_line ADD COLUMN struck_at TEXT;"
+echo "  map_strike_line done"
+run "ALTER TABLE map_strike_source ADD COLUMN name TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE map_strike_source ADD COLUMN lng REAL NOT NULL;"
+run "ALTER TABLE map_strike_source ADD COLUMN lat REAL NOT NULL;"
+echo "  map_strike_source done"
+run "ALTER TABLE news_content ADD COLUMN content_hash TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE news_content ADD COLUMN title TEXT NOT NULL;"
+run "ALTER TABLE news_content ADD COLUMN summary TEXT NOT NULL;"
+run 'ALTER TABLE news_content ADD COLUMN url TEXT NOT NULL DEFAULT '\'''\'';'
+run 'ALTER TABLE news_content ADD COLUMN source TEXT NOT NULL DEFAULT '\'''\'';'
+run "ALTER TABLE news_content ADD COLUMN published_at TEXT NOT NULL;"
+run 'ALTER TABLE news_content ADD COLUMN category TEXT NOT NULL DEFAULT '\''other'\'';'
+run 'ALTER TABLE news_content ADD COLUMN severity TEXT NOT NULL DEFAULT '\''medium'\'';'
+run "ALTER TABLE news_content ADD COLUMN created_at TEXT NOT NULL;"
+echo "  news_content done"
+run "ALTER TABLE power_index ADD COLUMN overall INTEGER NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE power_index ADD COLUMN military_strength INTEGER NOT NULL;"
+run "ALTER TABLE power_index ADD COLUMN economic_power INTEGER NOT NULL;"
+run "ALTER TABLE power_index ADD COLUMN geopolitical_influence INTEGER NOT NULL;"
+echo "  power_index done"
+run "ALTER TABLE retaliation_current ADD COLUMN value INTEGER NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+echo "  retaliation_current done"
+run "ALTER TABLE retaliation_history ADD COLUMN time TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE retaliation_history ADD COLUMN value INTEGER NOT NULL;"
+echo "  retaliation_history done"
+run "ALTER TABLE share_count ADD COLUMN total INTEGER NOT NULL DEFAULT 0;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+echo "  share_count done"
+run "ALTER TABLE situation ADD COLUMN data TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE situation ADD COLUMN updated_at TEXT NOT NULL;"
+echo "  situation done"
+run "ALTER TABLE situation_update ADD COLUMN timestamp TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE situation_update ADD COLUMN category TEXT NOT NULL;"
+run "ALTER TABLE situation_update ADD COLUMN summary TEXT NOT NULL;"
+run "ALTER TABLE situation_update ADD COLUMN severity TEXT NOT NULL;"
+echo "  situation_update done"
+run "ALTER TABLE visitor_count ADD COLUMN total INTEGER NOT NULL DEFAULT 0;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+echo "  visitor_count done"
+run "ALTER TABLE visits ADD COLUMN last_seen TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+echo "  visits done"
+run "ALTER TABLE wall_street_trend ADD COLUMN time TEXT NOT NULL;"
+# ¼ˆæœ¬åœ°åˆ—ï¼‰
+run "ALTER TABLE wall_street_trend ADD COLUMN value INTEGER NOT NULL;"
+echo "  wall_street_trend done"
+
+echo "=== å®Œæˆ<C3A6>ã€‚æ ¸å¯¹ç¤ºä¾‹ï¼š ==="
+echo "  sqlite3 $DB_PATH \"PRAGMA table_info(key_location);\""
+echo "  sqlite3 $DB_PATH \"PRAGMA table_info(combat_losses);\""
--- a/scripts/check-attack-locations.sh
+++ b/scripts/check-attack-locations.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+# 检查攻击地点/打击线是否完整（与 seed.js 一致）
+# 用法: DB_PATH=server/data.db ./scripts/check-attack-locations.sh
+# 裸机: cd /root/usa && ./scripts/check-attack-locations.sh
+
+set -e
+PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+echo "=========================================="
+echo "攻击地点 / 打击线 检查"
+echo "DB: $DB_PATH"
+echo "=========================================="
+
+if [[ ! -f "$DB_PATH" ]]; then
+  echo "错误: 数据库文件不存在"
+  exit 1
+fi
+
+if ! command -v sqlite3 &>/dev/null; then
+  echo "需要 sqlite3。安装: yum install sqlite 或 apt install sqlite3"
+  exit 1
+fi
+
+# 期望数量（与 server/seed.js 一致）
+EXPECT_US=62    # getUsLocations: naval + attacked + newBases
+EXPECT_IRAN=18  # iranLocs 条数
+EXPECT_ISRAEL=4
+EXPECT_LINCOLN=5
+EXPECT_FORD=7
+
+n_us=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM key_location WHERE side='us';" 2>/dev/null || echo "0")
+n_iran=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM key_location WHERE side='iran';" 2>/dev/null || echo "0")
+n_israel=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM map_strike_line WHERE source_id='israel';" 2>/dev/null || echo "0")
+n_lincoln=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM map_strike_line WHERE source_id='lincoln';" 2>/dev/null || echo "0")
+n_ford=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM map_strike_line WHERE source_id='ford';" 2>/dev/null || echo "0")
+
+echo ""
+echo "key_location:"
+echo "  us   (美军基地等): 当前 $n_us 条，期望 $EXPECT_US 条"
+echo "  iran (伊朗被袭点): 当前 $n_iran 条，期望 $EXPECT_IRAN 条"
+echo ""
+echo "map_strike_line (盟军打击伊朗):"
+echo "  israel: 当前 $n_israel 条，期望 $EXPECT_ISRAEL 条"
+echo "  lincoln: 当前 $n_lincoln 条，期望 $EXPECT_LINCOLN 条"
+echo "  ford:   当前 $n_ford 条，期望 $EXPECT_FORD 条"
+echo "=========================================="
+
+ok=0
+[[ "$n_us" -ge "$EXPECT_US" ]] && [[ "$n_iran" -ge "$EXPECT_IRAN" ]] && \
+[[ "$n_israel" -ge "$EXPECT_ISRAEL" ]] && [[ "$n_lincoln" -ge "$EXPECT_LINCOLN" ]] && \
+[[ "$n_ford" -ge "$EXPECT_FORD" ]] && ok=1
+
+if [[ $ok -eq 1 ]]; then
+  echo "结论: 攻击地点/打击线数量正常"
+  exit 0
+fi
+
+echo "结论: 数量不足，请在生产执行 seed 以与当前代码一致："
+echo "  cd $PROJECT_ROOT"
+echo "  cp server/data.db server/data.db.bak-\$(date +%Y%m%d-%H%M%S)"
+echo "  DB_PATH=server/data.db node server/seed.js"
+echo "  重启 API 后刷新页面"
+exit 1
--- a/scripts/check-crawler-data.cjs
+++ b/scripts/check-crawler-data.cjs
@@ -0,0 +1,140 @@
+#!/usr/bin/env node
+/**
+ * 检查爬虫写入的数据：条数 + 最近内容（situation_update、news_content、gdelt_events）
+ * 用法（项目根目录）: node scripts/check-crawler-data.cjs
+ * 可选：先启动爬虫 npm run gdelt，再启动 API 或直接运行本脚本读 DB
+ */
+const path = require('path')
+const http = require('http')
+
+const projectRoot = path.resolve(__dirname, '..')
+process.chdir(projectRoot)
+
+const db = require('../server/db')
+
+const CRAWLER_URL = process.env.CRAWLER_URL || 'http://localhost:8000'
+const SHOW_ROWS = 10
+
+function fetchCrawlerStatus() {
+  return new Promise((resolve) => {
+    const url = new URL(`${CRAWLER_URL}/crawler/status`)
+    const req = http.request(
+      { hostname: url.hostname, port: url.port || 80, path: url.pathname, method: 'GET', timeout: 3000 },
+      (res) => {
+        let body = ''
+        res.on('data', (c) => (body += c))
+        res.on('end', () => {
+          try {
+            resolve(JSON.parse(body))
+          } catch {
+            resolve(null)
+          }
+        })
+      }
+    )
+    req.on('error', () => resolve(null))
+    req.end()
+  })
+}
+
+async function run() {
+  console.log('========================================')
+  console.log('爬虫数据检查（条数 + 最近内容）')
+  console.log('========================================\n')
+
+  // ---------- 爬虫服务状态（可选）----------
+  const status = await fetchCrawlerStatus()
+  if (status) {
+    console.log('--- 爬虫服务状态 GET /crawler/status ---')
+    console.log('  db_path:', status.db_path)
+    console.log('  db_exists:', status.db_exists)
+    console.log('  situation_update_count:', status.situation_update_count)
+    console.log('  last_fetch_items:', status.last_fetch_items, '（本轮抓取条数）')
+    console.log('  last_fetch_inserted:', status.last_fetch_inserted, '（去重后新增）')
+    if (status.last_fetch_error) console.log('  last_fetch_error:', status.last_fetch_error)
+    console.log('')
+  } else {
+    console.log('--- 爬虫服务 ---')
+    console.log('  未启动或不可达:', CRAWLER_URL)
+    console.log('')
+  }
+
+  // ---------- situation_update（事件脉络，看板「近期更新」）----------
+  let situationUpdateRows = []
+  let situationUpdateCount = 0
+  try {
+    situationUpdateCount = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c
+    situationUpdateRows = db
+      .prepare(
+        'SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT ?'
+      )
+      .all(SHOW_ROWS)
+  } catch (e) {
+    console.log('situation_update 表读取失败:', e.message)
+  }
+
+  console.log('--- situation_update（事件脉络）---')
+  console.log('  总条数:', situationUpdateCount)
+  if (situationUpdateRows.length > 0) {
+    console.log('  最近', situationUpdateRows.length, '条:')
+    situationUpdateRows.forEach((r, i) => {
+      const summary = (r.summary || '').slice(0, 50)
+      console.log(`    ${i + 1}. [${r.timestamp}] ${r.category}/${r.severity} ${summary}${summary.length >= 50 ? '…' : ''}`)
+    })
+  }
+  console.log('')
+
+  // ---------- news_content（资讯表，爬虫去重后写入）----------
+  let newsCount = 0
+  let newsRows = []
+  try {
+    newsCount = db.prepare('SELECT COUNT(*) as c FROM news_content').get().c
+    newsRows = db
+      .prepare(
+        'SELECT title, summary, source, published_at, category, severity FROM news_content ORDER BY published_at DESC LIMIT ?'
+      )
+      .all(SHOW_ROWS)
+  } catch (e) {
+    console.log('news_content 表读取失败:', e.message)
+  }
+
+  console.log('--- news_content（资讯表）---')
+  console.log('  总条数:', newsCount)
+  if (newsRows.length > 0) {
+    console.log('  最近', newsRows.length, '条:')
+    newsRows.forEach((r, i) => {
+      const title = (r.title || '').slice(0, 45)
+      console.log(`    ${i + 1}. [${r.published_at || ''}] ${r.source || ''} ${title}${title.length >= 45 ? '…' : ''}`)
+      if (r.summary) console.log(`       summary: ${(r.summary || '').slice(0, 60)}…`)
+    })
+  }
+  console.log('')
+
+  // ---------- gdelt_events（地图冲突点）----------
+  let gdeltCount = 0
+  let gdeltRows = []
+  try {
+    gdeltCount = db.prepare('SELECT COUNT(*) as c FROM gdelt_events').get().c
+    gdeltRows = db
+      .prepare('SELECT event_id, event_time, title, impact_score FROM gdelt_events ORDER BY event_time DESC LIMIT 5')
+      .all()
+  } catch (e) {
+    console.log('gdelt_events 表读取失败:', e.message)
+  }
+
+  console.log('--- gdelt_events（地图冲突点）---')
+  console.log('  总条数:', gdeltCount)
+  if (gdeltRows.length > 0) {
+    console.log('  最近 5 条:')
+    gdeltRows.forEach((r, i) => {
+      const title = (r.title || '').slice(0, 50)
+      console.log(`    ${i + 1}. [${r.event_time}] impact=${r.impact_score} ${title}${title.length >= 50 ? '…' : ''}`)
+    })
+  }
+  console.log('========================================')
+}
+
+db.initDb().then(() => run()).catch((err) => {
+  console.error('失败:', err.message)
+  process.exit(1)
+})
--- a/scripts/check-crawler-panel-connectivity.sh
+++ b/scripts/check-crawler-panel-connectivity.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# 检查爬虫数据与面板数据是否联通
+# 用法: ./scripts/check-crawler-panel-connectivity.sh
+# 需先启动: npm run api；可选: npm run gdelt
+set -e
+
+API_URL="${API_URL:-http://localhost:3001}"
+CRAWLER_URL="${CRAWLER_URL:-http://localhost:8000}"
+
+echo "=========================================="
+echo "爬虫 ↔ 面板 联通检查"
+echo "API: $API_URL | Crawler: $CRAWLER_URL"
+echo "=========================================="
+
+# 1. 爬虫侧：situation_update 条数
+CRAWLER_SU_COUNT=""
+if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
+  if command -v jq &>/dev/null; then
+    CRAWLER_SU_COUNT=$(curl -sf "$CRAWLER_URL/crawler/status" | jq -r '.situation_update_count // "?"')
+  else
+    CRAWLER_SU_COUNT="(需 jq 查看)"
+  fi
+  echo "[爬虫] situation_update 条数: $CRAWLER_SU_COUNT"
+else
+  echo "[爬虫] 未启动或不可达 (curl $CRAWLER_URL/crawler/status 失败)"
+fi
+
+# 2. 面板侧：API 返回的 recentUpdates 条数、lastUpdated
+if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
+  echo "[API] 未启动，请先运行: npm run api"
+  exit 1
+fi
+
+SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
+if command -v jq &>/dev/null; then
+  RU_LEN=$(echo "$SIT" | jq '.recentUpdates | length')
+  LAST=$(echo "$SIT" | jq -r '.lastUpdated // "?"')
+  echo "[面板] recentUpdates 条数: $RU_LEN | lastUpdated: $LAST"
+else
+  echo "[面板] 态势数据已获取 (安装 jq 可显示条数)"
+fi
+
+# 3. 一致性：爬虫写的是 server/data.db，Node 通过 notify 重载后应一致
+echo ""
+echo "--- 联动说明 ---"
+echo "  • 事件脉络 (recentUpdates) ← situation_update 表，由爬虫 write_updates() 写入"
+echo "  • 爬虫每次抓取后会 POST $API_URL/api/crawler/notify，Node 会 reloadFromFile() 后广播"
+echo "  • 若爬虫有数据但面板 recentUpdates 很少/为空：检查 Node 终端是否出现 [crawler/notify] DB 已重载"
+echo "  • 若从未出现：检查 API_BASE 是否指向当前 API（默认 http://localhost:3001）"
+echo "  • 战损/基地/力量指数：仅当 AI/规则从新闻中提取到数字时才会更新，多数新闻不会触发"
+echo "=========================================="
+
+# 4. 可选：触发一次 notify 看 Node 是否重载（不启动爬虫时可用于测试）
+# 非交互时跳过；交互时可用: echo y | ./scripts/check-crawler-panel-connectivity.sh
+if [[ -t 0 ]]; then
+  echo ""
+  read -r -p "是否发送一次 POST /api/crawler/notify 测试 Node 重载? [y/N] " ans
+  if [[ "${ans,,}" = "y" ]]; then
+    curl -sf -X POST "$API_URL/api/crawler/notify" && echo "  已发送 notify，请看 Node 终端是否打印 [crawler/notify] DB 已重载"
+  fi
+fi
--- a/scripts/check-db-and-crawler.sh
+++ b/scripts/check-db-and-crawler.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# 查看数据库中的 lastUpdated 与条数，并提示如何用爬虫更新
+# 用法: ./scripts/check-db-and-crawler.sh
+
+PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+echo "=========================================="
+echo "数据库与爬虫状态"
+echo "DB: $DB_PATH"
+echo "=========================================="
+
+if [[ ! -f "$DB_PATH" ]]; then
+  echo "数据库文件不存在。请先执行: node server/seed.js"
+  exit 1
+fi
+
+if command -v sqlite3 &>/dev/null; then
+  UPDATED_AT=$(sqlite3 "$DB_PATH" "SELECT updated_at FROM situation WHERE id = 1;" 2>/dev/null || echo "?")
+  SU_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM situation_update;" 2>/dev/null || echo "?")
+  NEWS_COUNT=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM news_content;" 2>/dev/null || echo "?")
+  echo "situation.updated_at (前端 lastUpdated): $UPDATED_AT"
+  echo "situation_update 条数:                  $SU_COUNT"
+  echo "news_content 条数:                       $NEWS_COUNT"
+else
+  echo "未安装 sqlite3，无法直接查库。可安装: brew install sqlite3"
+fi
+
+echo ""
+echo "--- 为何数据停在旧日期？ ---"
+echo "  • lastUpdated 来自 situation.updated_at。"
+echo "  • 已改为：每次爬虫运行都会更新该时间（不再仅在有新资讯时更新）。"
+echo "  • 若从未跑爬虫或很久没跑，请执行一次爬虫："
+echo ""
+echo "  cd $PROJECT_ROOT && python crawler/run_once.py"
+echo "  或: npm run crawler:once"
+echo ""
+echo "  若需定时更新，可启动常驻爬虫: python crawler/main.py"
+echo "=========================================="
--- a/scripts/debug-panels-focus.sh
+++ b/scripts/debug-panels-focus.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# 仅检查：战损、基地、地图战区 三块数据
+# 用法: ./scripts/debug-panels-focus.sh
+
+set -e
+API_URL="${API_URL:-http://localhost:3001}"
+PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+echo "=========================================="
+echo "战损 / 基地 / 地图战区 — 数据检查"
+echo "API: $API_URL | DB: $DB_PATH"
+echo "=========================================="
+echo ""
+
+# ---------- API 连通 ----------
+if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
+  echo "✗ API 无响应，请先运行: npm run api"
+  exit 1
+fi
+echo "✓ API 正常"
+echo ""
+
+SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
+
+# ---------- 1. 战损 ----------
+echo "[1] 战损 (combat_losses)"
+if command -v jq &>/dev/null; then
+  us_k=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed // "?"')
+  us_w=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.wounded // "?"')
+  ir_k=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.killed // "?"')
+  ir_w=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.wounded // "?"')
+  echo "  美军 阵亡=$us_k 受伤=$us_w | 伊朗 阵亡=$ir_k 受伤=$ir_w"
+  echo "  conflictStats: $(echo "$SIT" | jq -c '.conflictStats')"
+else
+  echo "  (安装 jq 可显示详细数字)"
+fi
+if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
+  echo "  表 combat_losses:"
+  sqlite3 "$DB_PATH" "SELECT side, personnel_killed, personnel_wounded, bases_destroyed, bases_damaged FROM combat_losses" 2>/dev/null | while read -r line; do echo "    $line"; done
+fi
+echo "  数据来源: seed 初始；爬虫从新闻提取 combat_losses_delta 后 db_merge 增量叠加。不更新→检查是否跑 gdelt、提取器是否输出、新闻是否含伤亡数字。"
+echo ""
+
+# ---------- 2. 基地 ----------
+echo "[2] 基地 (key_location)"
+if command -v jq &>/dev/null; then
+  us_loc=$(echo "$SIT" | jq -r '.usForces.keyLocations | length')
+  ir_loc=$(echo "$SIT" | jq -r '.iranForces.keyLocations | length')
+  us_attacked=$(echo "$SIT" | jq -r '[.usForces.keyLocations[] | select(.status == "attacked")] | length')
+  ir_attacked=$(echo "$SIT" | jq -r '[.iranForces.keyLocations[] | select(.status == "attacked")] | length')
+  echo "  美军 据点=$us_loc 遭袭=$us_attacked | 伊朗 据点=$ir_loc 遭袭=$ir_attacked"
+fi
+if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
+  echo "  表 key_location 遭袭/有损伤的:"
+  sqlite3 "$DB_PATH" "SELECT side, name, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level IS NOT NULL LIMIT 10" 2>/dev/null | while read -r line; do echo "    $line"; done
+fi
+echo "  数据来源: seed 写入全部据点；爬虫只更新 status/damage_level，需 name_keywords 与 name LIKE 匹配。不更新→检查新闻是否提基地遭袭、关键词与 seed name 是否一致。"
+echo ""
+
+# ---------- 3. 地图战区 ----------
+echo "[3] 地图战区 (gdelt_events + conflict_stats)"
+if command -v jq &>/dev/null; then
+  ev_cnt=$(echo "$SIT" | jq -r '.conflictEvents | length')
+  echo "  conflictEvents 条数: $ev_cnt"
+  echo "  conflictStats: $(echo "$SIT" | jq -c '.conflictStats')"
+fi
+if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
+  n_ev=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM gdelt_events" 2>/dev/null || echo "0")
+  echo "  表 gdelt_events 行数: $n_ev"
+  sqlite3 "$DB_PATH" "SELECT total_events, high_impact_events, estimated_casualties, estimated_strike_count FROM conflict_stats WHERE id = 1" 2>/dev/null | while read -r line; do echo "  conflict_stats: $line"; done
+fi
+echo "  数据来源: GDELT API 写入；或 GDELT_DISABLED=1 时由 situation_update 回填。无点→跑 gdelt 或开启 RSS 回填。"
+echo ""
+
+echo "=========================================="
+echo "详细说明与排查顺序见: docs/DEBUG_战损_基地_地图.md"
+echo "=========================================="
--- a/scripts/debug-panels.sh
+++ b/scripts/debug-panels.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# 看板板块数据快速检查：各表/API 与板块对应关系，便于逐项 debug
+# 用法: ./scripts/debug-panels.sh
+# 依赖: curl；可选 jq、sqlite3 以输出更清晰
+
+set -e
+API_URL="${API_URL:-http://localhost:3001}"
+PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+echo "=========================================="
+echo "看板板块数据检查 (DEBUG_PANELS)"
+echo "API: $API_URL | DB: $DB_PATH"
+echo "=========================================="
+echo ""
+
+# ---------- 1. API 健康与态势摘要 ----------
+echo "[1] API 与态势摘要"
+if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
+  echo "  ✗ API 无响应，请先运行: npm run api"
+  echo "  后续表检查将跳过（依赖 API 或直接读 DB）"
+else
+  echo "  ✓ API 正常"
+  SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
+  if command -v jq &>/dev/null; then
+    echo "  lastUpdated:     $(echo "$SIT" | jq -r '.lastUpdated // "?"')"
+    echo "  recentUpdates:    $(echo "$SIT" | jq -r '.recentUpdates | length') 条 → 事件脉络"
+    echo "  conflictEvents:   $(echo "$SIT" | jq -r '.conflictEvents | length') 条 → 地图冲突点"
+    echo "  us powerIndex:    $(echo "$SIT" | jq -r '.usForces.powerIndex.overall')  → 顶栏/战力图"
+    echo "  iran powerIndex:  $(echo "$SIT" | jq -r '.iranForces.powerIndex.overall')"
+    echo "  us keyLocations:  $(echo "$SIT" | jq -r '.usForces.keyLocations | length') 条 → 美国基地/地图"
+    echo "  iran keyLocations: $(echo "$SIT" | jq -r '.iranForces.keyLocations | length') 条 → 伊朗基地/地图"
+    echo "  us combatLosses:  killed=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed') wounded=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.wounded')"
+    echo "  wallStreet points: $(echo "$SIT" | jq -r '.usForces.wallStreetInvestmentTrend | length')  → 华尔街图"
+    echo "  retaliation:     $(echo "$SIT" | jq -r '.iranForces.retaliationSentiment') (history: $(echo "$SIT" | jq -r '.iranForces.retaliationSentimentHistory | length') 条)"
+  else
+    echo "  (安装 jq 可显示详细字段) 态势已拉取，长度: ${#SIT}"
+  fi
+fi
+echo ""
+
+# ---------- 2. 各表行数（直接读 DB）----------
+echo "[2] 数据库表行数（与板块对应）"
+if ! [[ -f "$DB_PATH" ]]; then
+  echo "  ✗ 数据库文件不存在: $DB_PATH"
+  echo "  请先 seed: node server/seed.js 或 启动 API 后由 initDb 创建"
+elif ! command -v sqlite3 &>/dev/null; then
+  echo "  (未安装 sqlite3，跳过表统计。可安装后重试)"
+else
+  TABLES="force_summary power_index force_asset key_location combat_losses wall_street_trend retaliation_current retaliation_history situation_update situation gdelt_events conflict_stats news_content"
+  for t in $TABLES; do
+    n=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM $t" 2>/dev/null || echo "?")
+    case "$t" in
+      force_summary)     desc="力量摘要(美/伊)" ;;
+      power_index)       desc="战力指数 → 顶栏/战力图" ;;
+      force_asset)        desc="资产列表 → 左右侧摘要" ;;
+      key_location)       desc="据点 → 地图/美伊基地面板" ;;
+      combat_losses)      desc="战损 → 战损面板" ;;
+      wall_street_trend)  desc="华尔街趋势图" ;;
+      retaliation_current) desc="报复当前值" ;;
+      retaliation_history) desc="报复历史 → 仪表盘" ;;
+      situation_update)   desc="事件脉络 → 时间线" ;;
+      situation)          desc="updated_at → 顶栏时间" ;;
+      gdelt_events)      desc="冲突点 → 地图图层" ;;
+      conflict_stats)     desc="冲突统计 → 战损区" ;;
+      news_content)      desc="资讯表 → /api/news" ;;
+      *)                 desc="" ;;
+    esac
+    printf "  %-22s %6s  %s\n" "$t" "$n" "$desc"
+  done
+fi
+echo ""
+
+# ---------- 3. 板块健康简要判断 ----------
+echo "[3] 板块数据来源与可能问题"
+echo "  • 仅 seed、爬虫不写: force_summary, power_index, force_asset"
+echo "  • 爬虫可更新: situation_update(事件脉络), key_location(基地状态), combat_losses(战损), retaliation_*, wall_street_trend, gdelt_events"
+echo "  • 事件脉络不更新 → 检查爬虫是否启动、是否调用 POST /api/crawler/notify"
+echo "  • 战损/基地不更新 → 检查是否跑 npm run gdelt、提取器是否输出、新闻是否含相关表述"
+echo "  • 地图无冲突点 → 检查 gdelt_events 是否有数据、GDELT 或 RSS 回填是否执行"
+echo ""
+echo "详细逐板块说明见: docs/DEBUG_PANELS.md"
+echo "=========================================="
--- a/scripts/gen-align-schema-from-local.sh
+++ b/scripts/gen-align-schema-from-local.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+# 在本地执行：读取 server/data.db 各表 PRAGMA table_info，生成供生产执行的 align-production-schema.sh
+# 用法：在项目根目录执行 ./scripts/gen-align-schema-from-local.sh
+set -e
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+OUT_PATH="$PROJECT_ROOT/scripts/align-production-schema.sh"
+
+if [[ ! -f "$DB_PATH" ]]; then
+  echo "本地库不存在: $DB_PATH"
+  exit 1
+fi
+
+tables=$(sqlite3 "$DB_PATH" "SELECT name FROM sqlite_master WHERE type='table' AND name NOT IN ('sqlite_sequence') ORDER BY name;")
+
+cat > "$OUT_PATH" << 'HEAD'
+#!/usr/bin/env bash
+# 由 scripts/gen-align-schema-from-local.sh 根据本地 server/data.db 表结构生成，供生产执行。
+# 用法：在生产目录执行 DB_PATH=server/data.db ./scripts/align-production-schema.sh
+set -e
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
+
+run() { sqlite3 "$DB_PATH" "$1" 2>/dev/null || true; }
+
+echo "=== 对齐生产库表结构（与本地 data.db 一致）：$DB_PATH ==="
+HEAD
+
+while IFS= read -r table; do
+  [[ -z "$table" ]] && continue
+  # 跳过 cid=0（首列，通常建表时已有）
+  first=1
+  while IFS='|' read -r cid name type notnull dflt pk; do
+    [[ -z "$cid" || "$cid" -eq 0 ]] && continue
+    # 非常量默认值（如 datetime('now')）不写 DEFAULT，避免生产 SQLite 报错
+    def="$type"
+    [[ "$notnull" == "1" ]] && def="$def NOT NULL"
+    if [[ -n "$dflt" && "$dflt" != *"("* ]]; then
+      # SQL 字面量：已知字符串默认值写死，避免 shell 转义问题
+      case "$dflt" in
+        '"operational"') def="${def} DEFAULT 'operational'" ;;
+        '"other"')       def="${def} DEFAULT 'other'" ;;
+        '"medium"')      def="${def} DEFAULT 'medium'" ;;
+        "''")            def="${def} DEFAULT ''" ;;
+        *)               dflt_sql="${dflt//\"/\'}"; def="$def DEFAULT $dflt_sql" ;;
+      esac
+    fi
+    if [[ "$def" == *\'* ]]; then
+      # def 含单引号：用 run '...'\''...'\'' 形式写入
+      safe_def=$(echo "$def" | sed "s/'/'\\\\''/g")
+      printf "run 'ALTER TABLE %s ADD COLUMN %s %s;'\n" "$table" "$name" "$safe_def" >> "$OUT_PATH"
+    else
+      printf 'run "ALTER TABLE %s ADD COLUMN %s %s;"\n' "$table" "$name" "$def" >> "$OUT_PATH"
+    fi
+    if [[ "$first" -eq 1 ]]; then
+      echo "# $table（本地列）" >> "$OUT_PATH"
+      first=0
+    fi
+    if [[ "$table" == "combat_losses" && "$name" == "carriers" ]]; then
+      echo 'run "UPDATE combat_losses SET carriers = COALESCE(tanks, 0) WHERE carriers = 0;"' >> "$OUT_PATH"
+    fi
+  done < <(sqlite3 -separator '|' "$DB_PATH" "PRAGMA table_info($table);")
+  if [[ "$first" -eq 0 ]]; then
+    echo "echo \"  $table done\"" >> "$OUT_PATH"
+  fi
+done <<< "$tables"
+
+echo "" >> "$OUT_PATH"
+echo "echo \"=== 完成。核对示例： ===\"" >> "$OUT_PATH"
+echo "echo \"  sqlite3 \$DB_PATH \\\"PRAGMA table_info(key_location);\\\"\"" >> "$OUT_PATH"
+echo "echo \"  sqlite3 \$DB_PATH \\\"PRAGMA table_info(combat_losses);\\\"\"" >> "$OUT_PATH"
+
+chmod +x "$OUT_PATH"
+echo "已生成: $OUT_PATH"
+echo "请将该文件推到生产后执行：DB_PATH=server/data.db ./scripts/align-production-schema.sh"
--- a/scripts/production-start.sh
+++ b/scripts/production-start.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# 生产环境一键：构建爬虫镜像 + 以「仅爬虫 Docker、API 在宿主机」方式启动，并输出数据对齐说明。
+# 使用前：API 已用 PM2 等方式在宿主机 3001 端口运行，且 server/data.db 已存在（或先执行 npm run api:seed）。
+set -e
+cd "$(dirname "$0")/.."
+PROJECT_ROOT="${PROJECT_ROOT:-$(pwd)}"
+REGISTRY="${REGISTRY:-}"
+
+echo "==> Building crawler image..."
+docker build -t usa-dashboard-crawler:latest \
+  ${REGISTRY:+--build-arg REGISTRY="$REGISTRY"} \
+  -f Dockerfile.crawler .
+
+echo ""
+./scripts/run-crawler-docker-standalone.sh
+
+echo ""
+echo "==> Data alignment (生产数据对齐)"
+echo "    API (host)  DB_PATH  = $PROJECT_ROOT/server/data.db  (或 env DB_PATH)"
+echo "    Crawler     /data/data.db = 挂载自上述同一文件"
+echo "    二者必须指向同一 SQLite 文件，前端/API 与爬虫才能数据一致。"
--- a/scripts/run-crawler-docker-standalone.sh
+++ b/scripts/run-crawler-docker-standalone.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# 生产：仅用 Docker 跑爬虫，API 在宿主机（如 PM2）时使用。
+# 保证爬虫与 API 使用同一 SQLite 文件（数据对齐）。
+set -e
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}"
+DB_FILE="${DB_FILE:-$PROJECT_ROOT/server/data.db}"
+API_BASE="${API_BASE:-http://host.docker.internal:3001}"
+CRAWLER_IMAGE="${CRAWLER_IMAGE:-usa-dashboard-crawler:latest}"
+CONTAINER_NAME="${CONTAINER_NAME:-usa-crawler}"
+
+# 可选：从 .env 加载 DASHSCOPE_API_KEY 等
+if [ -f "$PROJECT_ROOT/.env" ]; then
+  set -a
+  # shellcheck source=../.env
+  . "$PROJECT_ROOT/.env"
+  set +a
+fi
+
+# 宿主机 DB 必须存在（API 已初始化或先 seed）
+if [ ! -f "$DB_FILE" ]; then
+  echo "ERROR: DB file not found: $DB_FILE"
+  echo "  Create it first: DB_PATH=$DB_FILE node server/seed.js"
+  exit 1
+fi
+
+# Linux 下 Docker 默认无 host.docker.internal，需显式添加
+DOCKER_EXTRA=()
+if [ "$(uname -s)" = "Linux" ]; then
+  DOCKER_EXTRA+=(--add-host=host.docker.internal:host-gateway)
+fi
+
+# 若已存在同名容器则先删
+docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+
+echo "==> Starting crawler container (standalone)"
+echo "    DB:        $DB_FILE -> /data/data.db"
+echo "    API_BASE:  $API_BASE"
+echo "    Image:     $CRAWLER_IMAGE"
+docker run -d \
+  --name "$CONTAINER_NAME" \
+  --restart unless-stopped \
+  -p 8000:8000 \
+  -v "$DB_FILE:/data/data.db" \
+  -e DB_PATH=/data/data.db \
+  -e API_BASE="$API_BASE" \
+  -e GDELT_DISABLED=1 \
+  -e RSS_INTERVAL_SEC=60 \
+  ${DASHSCOPE_API_KEY:+ -e DASHSCOPE_API_KEY="$DASHSCOPE_API_KEY"} \
+  "${DOCKER_EXTRA[@]}" \
+  "$CRAWLER_IMAGE"
+
+echo "    Container: $CONTAINER_NAME"
+echo "    Logs:       docker logs -f $CONTAINER_NAME"
+echo "    Status:     curl -s http://localhost:8000/crawler/status | jq ."
--- a/scripts/run-crawler-range.sh
+++ b/scripts/run-crawler-range.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# 按时间范围跑一轮爬虫（RSS：仅保留指定起始时间之后的条目）
+# 用法:
+#   ./scripts/run-crawler-range.sh                    # 默认从 2026-02-28 0:00 到现在
+#   ./scripts/run-crawler-range.sh 2026-02-25T00:00:00
+#
+# GDELT 时间范围需在启动 gdelt 服务时设置，例如:
+#   GDELT_TIMESPAN=3d npm run gdelt
+
+set -e
+START="${1:-2026-02-28T00:00:00}"
+cd "$(dirname "$0")/.."
+echo "RSS 抓取时间范围: 仅保留 ${START} 之后"
+echo "运行: cd crawler && CRAWL_START_DATE=${START} python run_once.py"
+echo ""
+export CRAWL_START_DATE="$START"
+(cd crawler && python3 run_once.py)
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -0,0 +1,126 @@
+sqlite3 server/data.db "
+UPDATE combat_losses
+  SET civilian_killed = 380, civilian_wounded = 1520
+  WHERE side = 'us';
+UPDATE combat_losses
+  SET civilian_killed = 4120, civilian_wounded = 12030
+  WHERE side = 'iran';
+"
+
+cd /root/usa
+sqlite3 server/data.db "
+UPDATE combat_losses
+  SET bases_destroyed    = 15,
+      bases_damaged      = 57,
+      personnel_killed   = 327,
+      personnel_wounded  = 984,
+      civilian_killed    = 380,
+      civilian_wounded   = 1520,
+      aircraft           = 24,
+      warships           = 1,
+      armor              = 18,
+      vehicles           = 42,
+      drones             = 28,
+      missiles           = 756,
+      helicopters        = 8,
+      submarines         = 2,
+      tanks              = 0,
+      carriers           = 0,
+      civilian_ships     = 100,
+      airport_port       = 5
+  WHERE side = 'us';
+
+UPDATE combat_losses
+  SET bases_destroyed    = 2100,
+      bases_damaged      = 8400,
+      personnel_killed   = 2847,
+      personnel_wounded  = 5620,
+      civilian_killed    = 4120,
+      civilian_wounded   = 12030,
+      aircraft           = 240,
+      warships           = 120,
+      armor              = 18,
+      vehicles           = 420,
+      drones             = 28,
+      missiles           = 4560,
+      helicopters        = 20,
+      submarines         = 2,
+      tanks              = 50,
+      carriers           = 0,
+      civilian_ships     = 50,
+      airport_port       = 42
+  WHERE side = 'iran';
+"
+
+sqlite3 server/data.db "
+UPDATE combat_losses
+  SET bases_destroyed    = 15,
+      bases_damaged      = 57,
+      personnel_killed   = 327,
+      personnel_wounded  = 984,
+      aircraft           = 4,
+      warships           = 0,
+      armor              = 3,
+      vehicles           = 76,
+      civilian_killed    = 380,
+      civilian_wounded   = 1520
+  WHERE side = 'us';
+
+UPDATE combat_losses
+  SET bases_destroyed    = 2100,
+      bases_damaged      = 8400,
+      personnel_killed   = 2847,
+      personnel_wounded  = 5620,
+      aircraft           = 70,
+      warships           = 120,
+      armor              = 18,
+      vehicles           = 420,
+      civilian_killed    = 4120,
+      civilian_wounded   = 12030
+  WHERE side = 'iran';
+"
+
+cd /root/usa
+sqlite3 server/data.db "
+UPDATE combat_losses
+  SET bases_destroyed    = 15,
+      bases_damaged      = 57,
+      personnel_killed   = 327,
+      personnel_wounded  = 984,
+      civilian_killed    = 380,
+      civilian_wounded   = 1520,
+      aircraft           = 4,
+      warships           = 1,
+      armor              = 18,
+      vehicles           = 42,
+      drones             = 68,
+      missiles           = 1756,
+      helicopters        = 8,
+      submarines         = 0,
+      tanks              = 0,
+      carriers           = 0,
+      civilian_ships     = 172,
+      airport_port       = 7
+  WHERE side = 'us';
+
+UPDATE combat_losses
+  SET bases_destroyed    = 2100,
+      bases_damaged      = 8400,
+      personnel_killed   = 2847,
+      personnel_wounded  = 5620,
+      civilian_killed    = 4120,
+      civilian_wounded   = 12030,
+      aircraft           = 106,
+      warships           = 107,
+      armor              = 72,
+      vehicles           = 506,
+      drones             = 1428,
+      missiles           = 6620,
+      helicopters        = 20,
+      submarines         = 4,
+      tanks              = 50,
+      carriers           = 1,
+      civilian_ships     = 42,
+      airport_port       = 31
+  WHERE side = 'iran';
+"
--- a/scripts/verify-panels.cjs
+++ b/scripts/verify-panels.cjs
@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+/**
+ * 代码层执行看板验证：直接调用 getSituation() 与 DB，输出战损 / 基地 / 地图战区 结果。
+ * 用法（项目根目录）: node scripts/verify-panels.cjs
+ */
+const path = require('path')
+
+const projectRoot = path.resolve(__dirname, '..')
+process.chdir(projectRoot)
+
+const db = require('../server/db')
+const { getSituation } = require('../server/situationData')
+
+function run() {
+  const s = getSituation()
+
+  console.log('========================================')
+  console.log('看板数据验证（与 API getSituation 一致）')
+  console.log('========================================\n')
+
+  console.log('lastUpdated:', s.lastUpdated)
+  console.log('')
+
+  // ---------- 1. 战损 ----------
+  console.log('--- [1] 战损 combat_losses ---')
+  const us = s.usForces.combatLosses
+  const ir = s.iranForces.combatLosses
+  console.log('美军 阵亡:', us.personnelCasualties.killed, '受伤:', us.personnelCasualties.wounded)
+  console.log('美军 基地毁/损:', us.bases.destroyed, '/', us.bases.damaged)
+  console.log('美军 战机/舰艇/装甲/车辆:', us.aircraft, us.warships, us.armor, us.vehicles)
+  console.log('伊朗 阵亡:', ir.personnelCasualties.killed, '受伤:', ir.personnelCasualties.wounded)
+  console.log('伊朗 基地毁/损:', ir.bases.destroyed, '/', ir.bases.damaged)
+  console.log('平民合计 killed/wounded:', s.civilianCasualtiesTotal.killed, s.civilianCasualtiesTotal.wounded)
+  console.log('conflictStats:', JSON.stringify(s.conflictStats))
+  console.log('')
+
+  // ---------- 2. 基地（与看板口径一致：美军仅 type===Base，伊朗为 Base/Port/Nuclear/Missile）----------
+  console.log('--- [2] 基地 key_location ---')
+  const usLoc = s.usForces.keyLocations || []
+  const irLoc = s.iranForces.keyLocations || []
+  const usBases = usLoc.filter((l) => l.type === 'Base')
+  const irBases = irLoc.filter((l) => ['Base', 'Port', 'Nuclear', 'Missile'].includes(l.type))
+  const usAttacked = usBases.filter((l) => l.status === 'attacked')
+  const irAttacked = irBases.filter((l) => l.status === 'attacked')
+  console.log('美军 总基地数(仅Base):', usBases.length, '| 遭袭:', usAttacked.length, '（与看板「美军基地态势」一致）')
+  console.log('伊朗 总基地数(Base/Port/Nuclear/Missile):', irBases.length, '| 遭袭:', irAttacked.length, '（与看板「伊朗基地态势」一致）')
+  if (usAttacked.length > 0) {
+    console.log('美军遭袭示例:', usAttacked.slice(0, 3).map((l) => `${l.name}(${l.status},damage=${l.damage_level})`).join(', '))
+  }
+  if (irAttacked.length > 0) {
+    console.log('伊朗遭袭示例:', irAttacked.slice(0, 3).map((l) => `${l.name}(${l.status},damage=${l.damage_level})`).join(', '))
+  }
+  console.log('')
+
+  // ---------- 3. 地图战区 ----------
+  console.log('--- [3] 地图战区 gdelt_events + conflict_stats ---')
+  const events = s.conflictEvents || []
+  console.log('conflictEvents 条数:', events.length)
+  console.log('conflictStats:', JSON.stringify(s.conflictStats))
+  if (events.length > 0) {
+    console.log('最近 3 条:', events.slice(0, 3).map((e) => `${e.event_time} ${(e.title || '').slice(0, 40)} impact=${e.impact_score}`))
+  }
+  console.log('')
+
+  // ---------- 附加：事件脉络 ----------
+  const updates = s.recentUpdates || []
+  console.log('--- [附] 事件脉络 situation_update ---')
+  console.log('recentUpdates 条数:', updates.length)
+  if (updates.length > 0) {
+    console.log('最新 1 条:', updates[0].timestamp, (updates[0].summary || '').slice(0, 50))
+  }
+  console.log('========================================')
+}
+
+db
+  .initDb()
+  .then(() => run())
+  .catch((err) => {
+    console.error('验证失败:', err.message)
+    process.exit(1)
+  })
--- a/scripts/verify-pipeline.sh
+++ b/scripts/verify-pipeline.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# 验证爬虫 → 数据库 → API → 前端 全链路
+# 用法: ./scripts/verify-pipeline.sh [--start-crawler]
+set -e
+
+API_URL="${API_URL:-http://localhost:3001}"
+CRAWLER_URL="${CRAWLER_URL:-http://localhost:8000}"
+START_CRAWLER=false
+[[ "${1:-}" = "--start-crawler" ]] && START_CRAWLER=true
+
+PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+
+echo "=========================================="
+echo "US-Iran 态势面板 链路验证"
+echo "API: $API_URL | Crawler: $CRAWLER_URL"
+echo "=========================================="
+echo ""
+
+# 可选：启动爬虫
+if $START_CRAWLER; then
+  echo "[0/6] 启动爬虫..."
+  if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
+    echo "  ✓ 爬虫已在运行"
+  else
+    cd "$PROJECT_ROOT/crawler"
+    python3 -c "import uvicorn" 2>/dev/null || { echo "  需安装: pip install uvicorn"; exit 1; }
+    uvicorn realtime_conflict_service:app --host 127.0.0.1 --port 8000 &
+    echo "  等待爬虫就绪..."
+    for i in $(seq 1 15); do
+      sleep 2
+      if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
+        echo "  ✓ 爬虫已启动"
+        echo "  等待首次 RSS 抓取（约 70 秒）..."
+        sleep 70
+        break
+      fi
+    done
+    if ! curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
+      echo "  ✗ 爬虫启动超时"
+      exit 1
+    fi
+  fi
+  echo ""
+fi
+
+# 1. API 健康检查
+echo "[1/6] API 健康检查..."
+if curl -sf "$API_URL/api/health" > /dev/null; then
+  echo "  ✓ API 正常"
+else
+  echo "  ✗ API 无响应，请先运行: npm run api"
+  exit 1
+fi
+
+# 2. 态势数据
+echo "[2/6] 态势数据..."
+SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
+if echo "$SIT" | grep -q "lastUpdated"; then
+  echo "  ✓ 态势数据可读"
+  LAST=$(echo "$SIT" | grep -o '"lastUpdated":"[^"]*"' | head -1)
+  echo "    $LAST"
+else
+  echo "  ✗ 态势数据异常"
+  exit 1
+fi
+
+# 3. 爬虫状态
+echo "[3/6] 爬虫状态..."
+CRAWLER=$(curl -sf "$CRAWLER_URL/crawler/status" 2>/dev/null || echo "{}")
+if echo "$CRAWLER" | grep -q "db_path\|db_exists"; then
+  echo "  ✓ 爬虫服务可访问"
+  if command -v jq &>/dev/null; then
+    CNT=$(echo "$CRAWLER" | jq -r '.situation_update_count // "?"')
+    echo "    situation_update 条数: $CNT"
+  fi
+else
+  echo "  ⚠ 爬虫未启动或不可达（可选，需单独运行爬虫）"
+fi
+
+# 4. 资讯表
+echo "[4/6] 资讯表 news_content..."
+NEWS=$(curl -sf "$API_URL/api/news?limit=3" 2>/dev/null || echo '{"items":[]}')
+if echo "$NEWS" | grep -q '"items"'; then
+  if command -v jq &>/dev/null; then
+    N=$(echo "$NEWS" | jq '.items | length')
+    echo "  ✓ 最近 $N 条资讯"
+  else
+    echo "  ✓ 资讯接口可读"
+  fi
+else
+  echo "  ⚠ news_content 可能为空（爬虫未跑或刚启动）"
+fi
+
+# 5. 战损数据
+echo "[5/6] 战损数据 combat_losses..."
+if echo "$SIT" | grep -q "personnelCasualties"; then
+  echo "  ✓ 战损字段存在"
+  if command -v jq &>/dev/null; then
+    US_K=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed // "?"')
+    IR_K=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.killed // "?"')
+    echo "    美军阵亡: $US_K | 伊朗阵亡: $IR_K"
+  fi
+else
+  echo "  ✗ 战损结构异常"
+fi
+
+# 6. 通知接口（仅验证可调用）
+echo "[6/6] 通知接口 POST /api/crawler/notify..."
+NOTIFY=$(curl -sf -X POST "$API_URL/api/crawler/notify" 2>/dev/null || echo "{}")
+if echo "$NOTIFY" | grep -q '"ok"'; then
+  echo "  ✓ 通知接口正常"
+else
+  echo "  ⚠ 通知接口可能异常"
+fi
+
+echo ""
+echo "=========================================="
+echo "验证完成。"
+echo ""
+echo "建议："
+echo "  - 访问 $API_URL/db 查看各表数据"
+echo "  - 爬虫未启动时: ./scripts/verify-pipeline.sh --start-crawler"
+echo "  - 或手动启动: cd crawler && uvicorn realtime_conflict_service:app --port 8000"
+echo "=========================================="
--- a/server/README.md
+++ b/server/README.md
@@ -0,0 +1,171 @@
+# 后端运行逻辑
+
+后端是 **Node.js Express + SQLite + WebSocket**，与 Python 爬虫共用同一数据库文件，负责提供「态势数据」API、实时推送和简单统计。
+
+---
+
+## 一、启动方式
+
+```bash
+npm run api   # 启动 server/index.js，默认端口 3001
+```
+
+- 端口：`process.env.API_PORT || 3001`
+- 数据库：`process.env.DB_PATH` 或 `server/data.db`（与爬虫共用）
+
+---
+
+## 二、整体架构
+
+```
+                    ┌─────────────────────────────────────────┐
+                    │           server/index.js                │
+                    │  (HTTP Server + WebSocket Server)        │
+                    └─────────────────────────────────────────┘
+                                         │
+         ┌───────────────────────────────┼───────────────────────────────┐
+         │                               │                               │
+         ▼                               ▼                               ▼
+   ┌─────────────┐               ┌─────────────┐                 ┌─────────────┐
+   │ /api/*      │               │ /ws         │                 │ 静态 dist   │
+   │ routes.js   │               │ WebSocket   │                 │ (生产)      │
+   └──────┬──────┘               └──────┬──────┘                 └─────────────┘
+          │                             │
+          │ 读/写                       │ 广播 situation + stats
+          ▼                             │
+   ┌─────────────┐                      │
+   │ db.js       │◄─────────────────────┘
+   │ (SQLite)    │   getSituation() / getStats()
+   └──────┬──────┘
+          │
+          │ 同文件 data.db
+          ▼
+   ┌─────────────┐
+   │ Python 爬虫  │  抓取 → 去重 → AI 清洗 → 映射到库字段 → 写表 → POST /api/crawler/notify
+   │ situation_   │  (main.py 或 gdelt 服务；写 situation_update / news_content / combat_losses 等)
+   │ update 等    │
+   └─────────────┘
+```
+
+---
+
+## 三、核心模块
+
+| 文件 | 作用 |
+|------|------|
+| **index.js** | 创建 HTTP + WebSocket 服务，挂载路由、静态资源、定时广播、爬虫通知回调 |
+| **routes.js** | 所有 `/api/*` 接口：situation、db/dashboard、visit、feedback、share、stats、events、news 等 |
+| **situationData.js** | `getSituation()`：从多张表聚合为前端所需的「态势」JSON（军力、基地、战损、事件脉络、GDELT 等） |
+| **db.js** | SQLite 连接、建表、迁移（better-sqlite3，WAL 模式） |
+| **stats.js** | `getStats()`：在看人数、累计访问、留言数、分享数 |
+| **openapi.js** | Swagger/OpenAPI 文档定义 |
+| **seed.js** | 初始化/重置种子数据（可单独运行 `npm run api:seed`） |
+
+---
+
+## 四、数据流（读）
+
+1. **前端要「整页态势」**  
+   - 请求 `GET /api/situation` → `routes.js` 调用 `getSituation()`  
+   - `situationData.js` 从 db 读：`force_summary`、`power_index`、`force_asset`、`key_location`、`combat_losses`、`wall_street_trend`、`retaliation_*`、`situation_update`（最近 50 条）、`gdelt_events`、`conflict_stats` 等  
+   - 组装成 `{ lastUpdated, usForces, iranForces, recentUpdates, conflictEvents, conflictStats, civilianCasualtiesTotal }` 返回。
+
+2. **前端要「事件列表」**  
+   - `GET /api/events` 返回 `conflictEvents` + `conflict_stats` + `updated_at`（同样来自 getSituation 的数据）。
+
+3. **前端要「原始表数据」**  
+   - `GET /api/db/dashboard` 返回多张表的 `SELECT *` 结果（含 `situation_update`），供 `/db` 调试页使用。
+
+4. **WebSocket**  
+   - 连接 `ws://host/ws` 时立即收到一条 `{ type: 'situation', data: getSituation(), stats: getStats() }`。  
+   - 之后每 3 秒服务端主动广播同结构数据，前端可据此做实时刷新。
+
+---
+
+## 五、数据流（写）
+
+### 5.1 爬虫侧写库链路（推荐理解顺序）
+
+爬虫写入前端库的完整链路如下，**不是**「抓完直接写表」，而是经过去重、AI 清洗、字段映射后再落库：
+
+1. **爬虫抓取实时数据**  
+   - RSS 等源抓取（`scrapers/rss_scraper.fetch_all`），得到原始条目列表。
+
+2. **数据去重**  
+   - 抓取阶段：RSS 内按 (title, url) 去重。  
+   - 落库前：按 `content_hash(title, summary, url)` 在 `news_content` 表中去重，仅**未出现过**的条目进入后续流程（`news_storage.save_and_dedup`）。
+
+3. **去重后按批次推送给 AI 清洗**  
+   - 对通过去重的每条/每批数据：  
+     - **展示用清洗**：标题/摘要翻译、`clean_news_for_panel` 提炼为符合面板的纯文本与长度（如 summary ≤120 字），`ensure_category` / `ensure_severity` 规范为前端枚举（`cleaner_ai`）。  
+     - **结构化提取**（可选）：`extractor_ai` / `extractor_dashscope` / `extractor_rules` 从新闻文本中抽取战损、基地状态等，输出符合 `panel_schema` 的结构。  
+   - 得到「有效数据」：既有人读的 summary/category/severity，也有可落库的 combat_losses_delta、key_location 等。
+
+4. **有效数据映射回前端数据库字段**  
+   - 事件脉络：清洗后的条目写入 `situation_update`（`db_writer.write_updates`）。  
+   - 资讯存档：去重后的新数据写入 `news_content`（已在步骤 2 完成）。  
+   - 结构化数据：AI 提取结果通过 `db_merge.merge` 映射到前端表结构，更新 `combat_losses`、`key_location`、`retaliation_*`、`wall_street_trend` 等（与 `situationData.getSituation` 所用字段一致）。
+
+5. **更新数据库表并通知后端**  
+   - 上述表更新完成后，爬虫请求 **POST /api/crawler/notify**。  
+   - 后端（index.js）更新 `situation.updated_at` 并调用 `broadcastSituation()`，前端通过 WebSocket 拿到最新态势。
+
+实现上，**gdelt 服务**（`realtime_conflict_service`）里：先对抓取结果做翻译与清洗，再 `save_and_dedup` 去重落库 `news_content`，用去重后的新项写 `situation_update`，再按批次对这批新项做 AI 提取并 `db_merge.merge` 写战损/基地等表。
+
+### 5.2 用户行为写入
+
+- **POST /api/visit**：记 IP 到 `visits`，`visitor_count.total` +1，并触发一次广播。  
+- **POST /api/feedback**：插入 `feedback`。  
+- **POST /api/share**：`share_count.total` +1。  
+
+这些写操作在 `routes.js` 中通过 `db.prepare().run()` 完成。
+
+---
+
+## 六、API 一览
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /api/health | 健康检查 |
+| GET | /api/situation | 完整态势（供主面板） |
+| GET | /api/events | 冲突事件 + 统计 |
+| GET | /api/db/dashboard | 各表原始数据（供 /db 页） |
+| GET | /api/news | 资讯列表（news_content 表） |
+| GET | /api/stats | 在看/累计/留言/分享数 |
+| POST | /api/visit | 记录访问并返回 stats |
+| POST | /api/feedback | 提交留言 |
+| POST | /api/share | 分享计数 +1 |
+| POST | /api/crawler/notify | 爬虫通知：更新 updated_at 并广播（内部用） |
+
+- **Swagger**：`http://localhost:3001/api-docs`
+
+---
+
+## 七、WebSocket 行为
+
+- **路径**：`/ws`（与 HTTP 同端口）。  
+- **连接时**：服务端发送一条 `{ type: 'situation', data, stats }`。  
+- **定时广播**：按 `BROADCAST_INTERVAL_MS`（默认 30 秒）轮询；**仅当数据有变化**（以 `situation.updated_at` + `situation_update` 条数为版本）时才执行 `getSituation()` + `getStats()` 并推送，避免无变更时重复查库和推送、降低负载。  
+- **即时广播**：以下情况会立即推送一次（不等待定时间隔）：爬虫 POST `/api/crawler/notify`、修订页保存（PUT/PATCH/POST/DELETE `/api/edit/*`）。  
+- **环境变量**：`BROADCAST_INTERVAL_MS=0` 可关闭定时轮询，仅依赖即时广播；设为 `3000` 可恢复为每 3 秒检查一次（仍仅在数据变化时推送）。
+
+---
+
+## 八、与爬虫的协作
+
+- **共享 DB**：后端与爬虫都使用同一 `DB_PATH`（默认 `server/data.db`）。  
+- **爬虫写库链路**：爬虫抓取 → 去重 → AI 清洗出有效数据 → 映射到前端库字段 → 更新 `situation_update`、`news_content`、`combat_losses`、`key_location`、`gdelt_events` 等表 → 调用 POST `/api/crawler/notify` 通知后端。  
+- **后端角色**：只读这些表（`getSituation()` 等）并推送；不参与抓取、去重或 AI 清洗，不调度爬虫。
+
+整体上，后端是「读库 + 聚合 + 推送」的服务；写库来自**爬虫（经过去重与 AI 清洗、字段映射后）**以及**用户行为**（访问/留言/分享）。
+
+---
+
+## 九、本地验证链路
+
+1. **启动后端**：`npm run api`（默认 3001）。
+2. **检查读库**：`curl -s http://localhost:3001/api/situation` 应返回含 `lastUpdated`、`recentUpdates` 的 JSON。
+3. **检查写库与通知**：爬虫跑完流水线后会 POST `/api/crawler/notify`，后端会更新 `situation.updated_at` 并广播；可再请求 `/api/situation` 看 `lastUpdated` 是否更新。
+4. **查原始表**：浏览器打开 `http://localhost:3001/api/db/dashboard` 或前端 `/db` 页，查看 `situation_update`、`news_content` 等表。
+
+爬虫侧完整验证步骤见 **crawler/README.md** 的「本地验证链路」；项目根目录可执行 `./scripts/verify-pipeline.sh` 做一键检查。
--- a/server/db.js
+++ b/server/db.js
@@ -1,20 +1,69 @@
-const Database = require('better-sqlite3')
+/**
+ * SQLite 封装：使用 sql.js（纯 JS/WebAssembly，无需 node-gyp）
+ * 对外接口与 better-sqlite3 兼容：db.prepare().get/all/run、db.exec
+ */
 const path = require('path')
+const fs = require('fs')

-const dbPath = path.join(__dirname, 'data.db')
-const db = new Database(dbPath)
+const dbPath = process.env.DB_PATH || path.join(__dirname, 'data.db')
+let _db = null
+/** sql.js 构造函数，initDb 时注入，供 reloadFromFile 使用 */
+let _sqlJs = null

-// 启用外键
-db.pragma('journal_mode = WAL')
+function getDb() {
+  if (!_db) throw new Error('DB not initialized. Call initDb() first.')
+  return _db
+}

-// 建表
-db.exec(`
+function wrapDatabase(nativeDb, persist) {
+  return {
+    prepare(sql) {
+      return {
+        get(...args) {
+          const stmt = nativeDb.prepare(sql)
+          stmt.bind(args.length ? args : null)
+          const row = stmt.step() ? stmt.getAsObject() : undefined
+          stmt.free()
+          return row
+        },
+        all(...args) {
+          const stmt = nativeDb.prepare(sql)
+          stmt.bind(args.length ? args : null)
+          const rows = []
+          while (stmt.step()) rows.push(stmt.getAsObject())
+          stmt.free()
+          return rows
+        },
+        run(...args) {
+          const stmt = nativeDb.prepare(sql)
+          stmt.bind(args.length ? args : null)
+          while (stmt.step());
+          stmt.free()
+          persist()
+        },
+      }
+    },
+    exec(sql) {
+      const statements = sql.split(';').map((s) => s.trim()).filter(Boolean)
+      statements.forEach((s) => nativeDb.run(s))
+      persist()
+    },
+    pragma(str) {
+      nativeDb.run('PRAGMA ' + str)
+    },
+  }
+}
+
+function runMigrations(db) {
+  const exec = (sql) => db.exec(sql)
+  const prepare = (sql) => db.prepare(sql)
+
+  exec(`
  CREATE TABLE IF NOT EXISTS situation (
    id INTEGER PRIMARY KEY CHECK (id = 1),
    data TEXT NOT NULL,
    updated_at TEXT NOT NULL DEFAULT (datetime('now'))
  );
-
  CREATE TABLE IF NOT EXISTS force_summary (
    side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
    total_assets INTEGER NOT NULL,
@@ -26,7 +75,6 @@ db.exec(`
    missile_consumed INTEGER NOT NULL,
    missile_stock INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS power_index (
    side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
    overall INTEGER NOT NULL,
@@ -34,7 +82,6 @@ db.exec(`
    economic_power INTEGER NOT NULL,
    geopolitical_influence INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS force_asset (
    id TEXT PRIMARY KEY,
    side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
@@ -45,7 +92,6 @@ db.exec(`
    lat REAL,
    lng REAL
  );
-
  CREATE TABLE IF NOT EXISTS key_location (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
@@ -55,7 +101,6 @@ db.exec(`
    type TEXT,
    region TEXT
  );
-
  CREATE TABLE IF NOT EXISTS combat_losses (
    side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
    bases_destroyed INTEGER NOT NULL,
@@ -67,24 +112,20 @@ db.exec(`
    armor INTEGER NOT NULL,
    vehicles INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS wall_street_trend (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    time TEXT NOT NULL,
    value INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS retaliation_current (
    id INTEGER PRIMARY KEY CHECK (id = 1),
    value INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS retaliation_history (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    time TEXT NOT NULL,
    value INTEGER NOT NULL
  );
-
  CREATE TABLE IF NOT EXISTS situation_update (
    id TEXT PRIMARY KEY,
    timestamp TEXT NOT NULL,
@@ -92,16 +133,248 @@ db.exec(`
    summary TEXT NOT NULL,
    severity TEXT NOT NULL
  );
-`)
+  CREATE TABLE IF NOT EXISTS gdelt_events (
+    event_id TEXT PRIMARY KEY,
+    event_time TEXT NOT NULL,
+    title TEXT NOT NULL,
+    lat REAL NOT NULL,
+    lng REAL NOT NULL,
+    impact_score INTEGER NOT NULL,
+    url TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+  );
+  CREATE TABLE IF NOT EXISTS conflict_stats (
+    id INTEGER PRIMARY KEY CHECK (id = 1),
+    total_events INTEGER NOT NULL DEFAULT 0,
+    high_impact_events INTEGER NOT NULL DEFAULT 0,
+    estimated_casualties INTEGER NOT NULL DEFAULT 0,
+    estimated_strike_count INTEGER NOT NULL DEFAULT 0,
+    updated_at TEXT NOT NULL
+  );
+  CREATE TABLE IF NOT EXISTS news_content (
+    id TEXT PRIMARY KEY,
+    content_hash TEXT NOT NULL UNIQUE,
+    title TEXT NOT NULL,
+    summary TEXT NOT NULL,
+    url TEXT NOT NULL DEFAULT '',
+    source TEXT NOT NULL DEFAULT '',
+    published_at TEXT NOT NULL,
+    category TEXT NOT NULL DEFAULT 'other',
+    severity TEXT NOT NULL DEFAULT 'medium',
+    created_at TEXT NOT NULL DEFAULT (datetime('now'))
+  );
+  `)
+  try { exec('CREATE INDEX IF NOT EXISTS idx_news_content_hash ON news_content(content_hash)') } catch (_) {}
+  try { exec('CREATE INDEX IF NOT EXISTS idx_news_content_published ON news_content(published_at DESC)') } catch (_) {}

-// 迁移：为已有 key_location 表添加 type、region、status、damage_level 列
-try {
-  const cols = db.prepare('PRAGMA table_info(key_location)').all()
-  const names = cols.map((c) => c.name)
-  if (!names.includes('type')) db.exec('ALTER TABLE key_location ADD COLUMN type TEXT')
-  if (!names.includes('region')) db.exec('ALTER TABLE key_location ADD COLUMN region TEXT')
-  if (!names.includes('status')) db.exec('ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT "operational"')
-  if (!names.includes('damage_level')) db.exec('ALTER TABLE key_location ADD COLUMN damage_level INTEGER')
-} catch (_) {}
+  try {
+    const cols = prepare('PRAGMA table_info(key_location)').all()
+    const names = cols.map((c) => c.name)
+    if (!names.includes('type')) exec('ALTER TABLE key_location ADD COLUMN type TEXT')
+    if (!names.includes('region')) exec('ALTER TABLE key_location ADD COLUMN region TEXT')
+    if (!names.includes('status')) exec('ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT "operational"')
+    if (!names.includes('damage_level')) exec('ALTER TABLE key_location ADD COLUMN damage_level INTEGER')
+    if (!names.includes('attacked_at')) exec('ALTER TABLE key_location ADD COLUMN attacked_at TEXT')
+  } catch (_) {}
+  try {
+    const lossCols = prepare('PRAGMA table_info(combat_losses)').all()
+    const lossNames = lossCols.map((c) => c.name)
+    if (!lossNames.includes('civilian_killed')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('civilian_wounded')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('updated_at')) exec('ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))')
+    if (!lossNames.includes('drones')) exec('ALTER TABLE combat_losses ADD COLUMN drones INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('missiles')) exec('ALTER TABLE combat_losses ADD COLUMN missiles INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('helicopters')) exec('ALTER TABLE combat_losses ADD COLUMN helicopters INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('submarines')) exec('ALTER TABLE combat_losses ADD COLUMN submarines INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('tanks')) exec('ALTER TABLE combat_losses ADD COLUMN tanks INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('carriers')) {
+      exec('ALTER TABLE combat_losses ADD COLUMN carriers INTEGER NOT NULL DEFAULT 0')
+      exec('UPDATE combat_losses SET carriers = tanks')
+    }
+    if (!lossNames.includes('civilian_ships')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_ships INTEGER NOT NULL DEFAULT 0')
+    if (!lossNames.includes('airport_port')) exec('ALTER TABLE combat_losses ADD COLUMN airport_port INTEGER NOT NULL DEFAULT 0')
+  } catch (_) {}

-module.exports = db
+  const addUpdatedAt = (table) => {
+    try {
+      const cols = prepare(`PRAGMA table_info(${table})`).all()
+      if (!cols.some((c) => c.name === 'updated_at')) {
+        exec(`ALTER TABLE ${table} ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))`)
+      }
+    } catch (_) {}
+  }
+  ;['force_summary', 'power_index', 'force_asset', 'key_location', 'retaliation_current'].forEach(addUpdatedAt)
+
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS visits (
+      ip TEXT PRIMARY KEY,
+      last_seen TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    CREATE TABLE IF NOT EXISTS visitor_count (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      total INTEGER NOT NULL DEFAULT 0
+    );
+    INSERT OR IGNORE INTO visitor_count (id, total) VALUES (1, 0);
+    `)
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS feedback (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      content TEXT NOT NULL,
+      ip TEXT,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    )
+    `)
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS share_count (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      total INTEGER NOT NULL DEFAULT 0
+    );
+    INSERT OR IGNORE INTO share_count (id, total) VALUES (1, 0);
+    `)
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS like_count (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      total INTEGER NOT NULL DEFAULT 0
+    );
+    INSERT OR IGNORE INTO like_count (id, total) VALUES (1, 0);
+    `)
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS display_stats (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      viewers INTEGER NULL,
+      cumulative INTEGER NULL,
+      share_count INTEGER NULL,
+      like_count INTEGER NULL,
+      feedback_count INTEGER NULL
+    );
+    INSERT OR IGNORE INTO display_stats (id) VALUES (1);
+    `)
+  } catch (_) {}
+  try {
+    const dsCols = prepare('PRAGMA table_info(display_stats)').all()
+    const dsNames = dsCols.map((c) => c.name)
+    if (!dsNames.includes('override_enabled')) {
+      exec('ALTER TABLE display_stats ADD COLUMN override_enabled INTEGER NOT NULL DEFAULT 0')
+    }
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS map_strike_source (
+      id TEXT PRIMARY KEY,
+      name TEXT NOT NULL,
+      lng REAL NOT NULL,
+      lat REAL NOT NULL
+    );
+    CREATE TABLE IF NOT EXISTS map_strike_line (
+      source_id TEXT NOT NULL,
+      target_lng REAL NOT NULL,
+      target_lat REAL NOT NULL,
+      target_name TEXT,
+      struck_at TEXT,
+      FOREIGN KEY (source_id) REFERENCES map_strike_source(id)
+    );
+    CREATE INDEX IF NOT EXISTS idx_map_strike_line_source ON map_strike_line(source_id);
+    `)
+  } catch (_) {}
+  try {
+    const lineCols = prepare('PRAGMA table_info(map_strike_line)').all()
+    if (!lineCols.some((c) => c.name === 'struck_at')) {
+      exec('ALTER TABLE map_strike_line ADD COLUMN struck_at TEXT')
+    }
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS animation_config (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      strike_cutoff_days INTEGER NOT NULL DEFAULT 5,
+      updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    INSERT OR IGNORE INTO animation_config (id, strike_cutoff_days) VALUES (1, 5);
+    `)
+  } catch (_) {}
+  try {
+    exec(`
+    CREATE TABLE IF NOT EXISTS war_map_config (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      config TEXT NOT NULL,
+      updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    `)
+  } catch (_) {}
+}
+
+async function initDb() {
+  const initSqlJs = require('sql.js')
+  const SQL = await initSqlJs()
+  _sqlJs = SQL
+  let data = new Uint8Array(0)
+  if (fs.existsSync(dbPath)) {
+    data = new Uint8Array(fs.readFileSync(dbPath))
+  }
+  const nativeDb = new SQL.Database(data)
+
+  function persist() {
+    try {
+      const buf = nativeDb.export()
+      fs.writeFileSync(dbPath, Buffer.from(buf))
+    } catch (e) {
+      console.error('[db] persist error:', e.message)
+    }
+  }
+
+  nativeDb.run('PRAGMA journal_mode = WAL')
+  const wrapped = wrapDatabase(nativeDb, persist)
+  runMigrations(wrapped)
+  _db = wrapped
+  return _db
+}
+
+/**
+ * 从磁盘重新加载 DB（爬虫写入同一文件后调用，使 Node 内存中的库与文件一致）
+ */
+function reloadFromFile() {
+  if (!_sqlJs || !_db) throw new Error('DB not initialized. Call initDb() first.')
+  let data = new Uint8Array(0)
+  if (fs.existsSync(dbPath)) {
+    data = new Uint8Array(fs.readFileSync(dbPath))
+  }
+  const nativeDb = new _sqlJs.Database(data)
+  function persist() {
+    try {
+      const buf = nativeDb.export()
+      fs.writeFileSync(dbPath, Buffer.from(buf))
+    } catch (e) {
+      console.error('[db] persist error:', e.message)
+    }
+  }
+  nativeDb.run('PRAGMA journal_mode = WAL')
+  const wrapped = wrapDatabase(nativeDb, persist)
+  runMigrations(wrapped)
+  _db = wrapped
+}
+
+const proxy = {
+  prepare(sql) {
+    return getDb().prepare(sql)
+  },
+  exec(sql) {
+    return getDb().exec(sql)
+  },
+  pragma(str) {
+    getDb().pragma(str)
+  },
+}
+
+module.exports = proxy
+module.exports.initDb = initDb
+module.exports.getDb = getDb
+module.exports.reloadFromFile = reloadFromFile
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Daniel	89145a6743	fix:修复启动文件	2026-03-05 20:22:15 +08:00
Daniel	07454b73c2	fix:优化爬虫配置，单独使用docker容器运行	2026-03-05 20:19:24 +08:00
Daniel	bbb9a5e1e1	fix:修复依赖文件报错	2026-03-05 20:00:15 +08:00
Daniel	98d928f457	fix:优化pm2配置项	2026-03-05 19:53:05 +08:00
Daniel	004b03b374	fix:优化爬虫链路	2026-03-05 19:18:45 +08:00
Daniel	475097d372	fix: 优化页面效果	2026-03-05 16:11:33 +08:00
Daniel	af59d6367f	fix: 新增态效果	2026-03-05 15:53:10 +08:00
Daniel	a3bf8abda5	fix:对齐生产环境的数据字段	2026-03-04 19:19:50 +08:00
Daniel	26938449f0	fix: bug	2026-03-04 16:48:17 +08:00
Daniel	64f4c438c3	fix: code update	2026-03-04 09:56:23 +08:00
Daniel	88c37408e8	fix: 化本	2026-03-04 09:43:21 +08:00
Daniel	3264b3252a	fix: bug	2026-03-04 00:39:01 +08:00
Daniel	95e2fe1c41	fix:	2026-03-04 00:07:14 +08:00
Daniel	ac24c528f3	Merge branch; keep full EditDashboard	2026-03-03 22:44:24 +08:00
Daniel	86e50debec	fix:增面	2026-03-03 22:42:21 +08:00
张成	8696549ba1	1	2026-03-03 20:34:26 +08:00
Daniel	09ec2e3a69	fix: bug	2026-03-03 20:17:38 +08:00
Daniel	034c088bac	chore: stop tracking server/data.db, keep in .gitignore	2026-03-03 18:08:34 +08:00
Daniel	15800b1dad	fix: update code for db file	2026-03-03 18:06:17 +08:00
Daniel	0cbeaf59a5	fix: update code	2026-03-03 17:54:43 +08:00
Daniel	1764a44eb3	fix: update	2026-03-03 17:27:55 +08:00
Daniel	29c921f498	fix: 更改数据库包	2026-03-03 14:49:02 +08:00
Daniel	85dea726e9	fix: 处理数据问题	2026-03-03 13:03:11 +08:00
Daniel	fa6f7407f0	fix: 优化后端数据更新机制	2026-03-03 13:02:28 +08:00
Daniel	7284a1a60d	fix: 修复移动端报错	2026-03-03 11:14:34 +08:00
Daniel	4dd1f7e7dc	fix:优化数据样式	2026-03-03 10:35:11 +08:00
Daniel	92914e6522	fix: 更新数据面板的驱动方式	2026-03-02 23:21:07 +08:00
Daniel	ef60f18cb0	fix: meger code	2026-03-02 21:51:18 +08:00
张成	c3ec459671	1	2026-03-02 21:43:36 +08:00
Daniel	75c58eecfc	fix：优化界面布局	2026-03-02 19:32:56 +08:00
Daniel	3c55485648	fix: 优化留言和分享数据	2026-03-02 19:07:51 +08:00
Daniel	13ca470cad	fix: 优化自适应界面	2026-03-02 18:39:29 +08:00
Daniel	4e91018752	fix: 修复移动端自适应问题	2026-03-02 17:48:12 +08:00
Daniel	55c030e3f5	fix: 修复自适应问题	2026-03-02 17:32:55 +08:00
Daniel	0027074b8b	fix: 修复爬虫问题	2026-03-02 17:20:31 +08:00
Daniel	33e4786cd0	feat: 完成合并	2026-03-02 16:43:29 +08:00
Daniel	d646a93dcf	Merge branch 'master' of https://git.bimwe.com/Daniel/usa	2026-03-02 16:42:55 +08:00
Daniel	af577400fb	fix:移除繁体转简体，移除资讯	2026-03-02 16:42:35 +08:00
Daniel	84656f4a11	fix: 移除繁体转简体和资讯	2026-03-02 16:41:57 +08:00
张成	aa630aa479	Merge branch 'master' of https://git.bimwe.com/Daniel/usa	2026-03-02 16:38:40 +08:00
张成	ffcce0ad81	1	2026-03-02 16:38:39 +08:00
Daniel	ad73305ed1	fix: 更新token	2026-03-02 16:36:49 +08:00
Daniel	a9caf6e7c0	fix: 优化后端数据	2026-03-02 16:29:11 +08:00
Daniel	81628a136a	fix: 优化后台数据	2026-03-02 15:35:40 +08:00
Daniel	84e97f3370	fix: 优化了代码	2026-03-02 14:32:32 +08:00
Daniel	049276fedd	fix: 上传原始数据	2026-03-02 14:27:30 +08:00
Daniel	5460e806b6	fix: 优化git配置文件	2026-03-02 14:25:44 +08:00
Daniel	2d800094b1	fix:优化docker p配置	2026-03-02 14:23:36 +08:00
Daniel	36576592a2	fix: 优化docker 镜像	2026-03-02 14:10:43 +08:00
Daniel	783a69dad1	fix: 修复数据报错	2026-03-02 11:50:35 +08:00
Daniel	004d10b283	fix: 优化数据	2026-03-02 11:28:13 +08:00
Daniel	4a8fff5a00	fix:优化数据来源	2026-03-02 01:00:04 +08:00
Daniel	91d9e48e1e	fix:优化整个大屏界面	2026-03-02 00:59:40 +08:00