Compare commits
42 Commits
24d0593e12
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3264b3252a | ||
|
|
95e2fe1c41 | ||
|
|
ac24c528f3 | ||
|
|
86e50debec | ||
|
|
8696549ba1 | ||
|
|
09ec2e3a69 | ||
|
|
034c088bac | ||
|
|
15800b1dad | ||
|
|
0cbeaf59a5 | ||
|
|
1764a44eb3 | ||
|
|
29c921f498 | ||
|
|
85dea726e9 | ||
|
|
fa6f7407f0 | ||
|
|
7284a1a60d | ||
|
|
4dd1f7e7dc | ||
|
|
92914e6522 | ||
|
|
ef60f18cb0 | ||
|
|
c3ec459671 | ||
|
|
75c58eecfc | ||
|
|
3c55485648 | ||
|
|
13ca470cad | ||
|
|
4e91018752 | ||
|
|
55c030e3f5 | ||
|
|
0027074b8b | ||
|
|
33e4786cd0 | ||
|
|
d646a93dcf | ||
|
|
af577400fb | ||
|
|
84656f4a11 | ||
|
|
aa630aa479 | ||
|
|
ffcce0ad81 | ||
|
|
ad73305ed1 | ||
|
|
a9caf6e7c0 | ||
|
|
81628a136a | ||
|
|
84e97f3370 | ||
|
|
049276fedd | ||
|
|
5460e806b6 | ||
|
|
2d800094b1 | ||
|
|
36576592a2 | ||
|
|
783a69dad1 | ||
|
|
004d10b283 | ||
|
|
4a8fff5a00 | ||
|
|
91d9e48e1e |
13
.dockerignore
Normal file
13
.dockerignore
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
node_modules
|
||||||
|
.git
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
*.log
|
||||||
|
dist
|
||||||
|
server/data.db
|
||||||
|
.DS_Store
|
||||||
|
*.md
|
||||||
|
.cursor
|
||||||
|
.venv
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
3
.env
Normal file
3
.env
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Mapbox 地图令牌
|
||||||
|
VITE_MAPBOX_ACCESS_TOKEN=pk.eyJ1IjoiZDI5cTAiLCJhIjoiY21tYWQyOXI3MGFrZzJwcjJmZGltODI4ZCJ9.0jW_aK91VJExw6ffKGqWIA
|
||||||
|
DASHSCOPE_API_KEY=sk-029a4c4d761d49b99cfe6073234ac443
|
||||||
48
.env.example
48
.env.example
@@ -1,46 +1,8 @@
|
|||||||
# Mapbox 地图令牌 (波斯湾区域展示)
|
# Mapbox 地图令牌(仅在此或 .env 中配置,勿写进源码;若曾泄漏请到 Mapbox 控制台轮换)
|
||||||
# 免费申请: https://account.mapbox.com/access-tokens/
|
# 免费申请: https://account.mapbox.com/access-tokens/
|
||||||
# 复制本文件为 .env 并填入你的 token
|
|
||||||
VITE_MAPBOX_ACCESS_TOKEN=your_mapbox_public_token_here
|
VITE_MAPBOX_ACCESS_TOKEN=your_mapbox_public_token_here
|
||||||
27 个基地完整 JSON 数据
|
|
||||||
[
|
|
||||||
{ "id": 1, "name": "Al Udeid Air Base", "country": "Qatar", "lat": 25.117, "lng": 51.314 },
|
|
||||||
{ "id": 2, "name": "Camp As Sayliyah", "country": "Qatar", "lat": 25.275, "lng": 51.520 },
|
|
||||||
|
|
||||||
{ "id": 3, "name": "Naval Support Activity Bahrain", "country": "Bahrain", "lat": 26.236, "lng": 50.608 },
|
# 阿里云 DashScope API Key(爬虫 AI 提取用,不设则用规则或 Ollama)
|
||||||
|
# 在 crawler 目录或系统环境变量中设置,例如:
|
||||||
{ "id": 4, "name": "Camp Arifjan", "country": "Kuwait", "lat": 28.832, "lng": 47.799 },
|
# export DASHSCOPE_API_KEY=sk-xxx
|
||||||
{ "id": 5, "name": "Ali Al Salem Air Base", "country": "Kuwait", "lat": 29.346, "lng": 47.520 },
|
DASHSCOPE_API_KEY=
|
||||||
{ "id": 6, "name": "Camp Buehring", "country": "Kuwait", "lat": 29.603, "lng": 47.456 },
|
|
||||||
|
|
||||||
{ "id": 7, "name": "Al Dhafra Air Base", "country": "UAE", "lat": 24.248, "lng": 54.547 },
|
|
||||||
|
|
||||||
{ "id": 8, "name": "Prince Sultan Air Base", "country": "Saudi Arabia", "lat": 24.062, "lng": 47.580 },
|
|
||||||
{ "id": 9, "name": "Eskan Village", "country": "Saudi Arabia", "lat": 24.774, "lng": 46.738 },
|
|
||||||
|
|
||||||
{ "id": 10, "name": "Al Asad Airbase", "country": "Iraq", "lat": 33.785, "lng": 42.441 },
|
|
||||||
{ "id": 11, "name": "Erbil Air Base", "country": "Iraq", "lat": 36.237, "lng": 43.963 },
|
|
||||||
{ "id": 12, "name": "Baghdad Diplomatic Support Center", "country": "Iraq", "lat": 33.315, "lng": 44.366 },
|
|
||||||
{ "id": 13, "name": "Camp Taji", "country": "Iraq", "lat": 33.556, "lng": 44.256 },
|
|
||||||
{ "id": 14, "name": "Ain al-Asad", "country": "Iraq", "lat": 33.800, "lng": 42.450 },
|
|
||||||
|
|
||||||
{ "id": 15, "name": "Al-Tanf Garrison", "country": "Syria", "lat": 33.490, "lng": 38.618 },
|
|
||||||
{ "id": 16, "name": "Rmelan Landing Zone", "country": "Syria", "lat": 37.015, "lng": 41.885 },
|
|
||||||
{ "id": 17, "name": "Shaddadi Base", "country": "Syria", "lat": 36.058, "lng": 40.730 },
|
|
||||||
{ "id": 18, "name": "Conoco Gas Field Base", "country": "Syria", "lat": 35.336, "lng": 40.295 },
|
|
||||||
|
|
||||||
{ "id": 19, "name": "Muwaffaq Salti Air Base", "country": "Jordan", "lat": 32.356, "lng": 36.259 },
|
|
||||||
|
|
||||||
{ "id": 20, "name": "Incirlik Air Base", "country": "Turkey", "lat": 37.002, "lng": 35.425 },
|
|
||||||
{ "id": 21, "name": "Kurecik Radar Station", "country": "Turkey", "lat": 38.354, "lng": 37.794 },
|
|
||||||
|
|
||||||
{ "id": 22, "name": "Nevatim Air Base", "country": "Israel", "lat": 31.208, "lng": 35.012 },
|
|
||||||
{ "id": 23, "name": "Ramon Air Base", "country": "Israel", "lat": 30.776, "lng": 34.666 },
|
|
||||||
|
|
||||||
{ "id": 24, "name": "Thumrait Air Base", "country": "Oman", "lat": 17.666, "lng": 54.024 },
|
|
||||||
{ "id": 25, "name": "Masirah Air Base", "country": "Oman", "lat": 20.675, "lng": 58.890 },
|
|
||||||
|
|
||||||
{ "id": 26, "name": "West Cairo Air Base", "country": "Egypt", "lat": 30.915, "lng": 30.298 },
|
|
||||||
|
|
||||||
{ "id": 27, "name": "Camp Lemonnier", "country": "Djibouti", "lat": 11.547, "lng": 43.159 }
|
|
||||||
]
|
|
||||||
|
|||||||
3
.env的副本
Normal file
3
.env的副本
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Mapbox 地图令牌
|
||||||
|
VITE_MAPBOX_ACCESS_TOKEN=pk.eyJ1IjoiZDI5cTAiLCJhIjoiY21tYWQyOXI3MGFrZzJwcjJmZGltODI4ZCJ9.0jW_aK91VJExw6ffKGqWIA
|
||||||
|
DASHSCOPE_API_KEY=sk-029a4c4d761d49b99cfe6073234ac443
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -23,10 +23,11 @@ dist-ssr
|
|||||||
*.sln
|
*.sln
|
||||||
*.sw?
|
*.sw?
|
||||||
|
|
||||||
# API database
|
# API database(SQLite 文件,部署时应挂载卷持久化,勿提交)
|
||||||
server/data.db
|
server/data.db
|
||||||
|
|
||||||
# Env
|
# Env(含 token,勿提交)
|
||||||
.env
|
.env
|
||||||
.env.local
|
.env.local
|
||||||
.env.*.local
|
.env.*.local
|
||||||
|
.pyc
|
||||||
|
|||||||
168
DEPLOY.md
Normal file
168
DEPLOY.md
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
# Docker 部署到服务器
|
||||||
|
|
||||||
|
将 US-Iran 态势面板打包成 Docker 镜像,便于移植到任意服务器。
|
||||||
|
|
||||||
|
## 架构
|
||||||
|
|
||||||
|
| 服务 | 端口 | 说明 |
|
||||||
|
|--------|------|--------------------------|
|
||||||
|
| api | 3001 | 前端静态 + REST API + WebSocket |
|
||||||
|
| crawler| 8000 | RSS 爬虫 + GDELT,内部服务 |
|
||||||
|
|
||||||
|
- 数据库:SQLite,挂载到 `app-data` volume(`/data/data.db`)
|
||||||
|
- 前端与 API 合并到同一镜像,构建时执行 `npm run build` 生成 dist(含修订页 `/edit`),访问 `http://主机:3001` 即可
|
||||||
|
|
||||||
|
## 快速部署
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 克隆项目
|
||||||
|
git clone <repo> usa-dashboard && cd usa-dashboard
|
||||||
|
|
||||||
|
# 2. 构建并启动(需先配置 Mapbox Token,见下方)
|
||||||
|
docker compose up -d --build
|
||||||
|
|
||||||
|
# 3. 访问
|
||||||
|
# 前端 + API: http://localhost:3001
|
||||||
|
# 爬虫状态: http://localhost:8000/crawler/status
|
||||||
|
```
|
||||||
|
|
||||||
|
## Mapbox Token(地图展示)
|
||||||
|
|
||||||
|
构建时需将 Token 传入前端,否则地图为占位模式:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 方式 1:.env 文件
|
||||||
|
echo "VITE_MAPBOX_ACCESS_TOKEN=pk.xxx" > .env
|
||||||
|
docker compose up -d --build
|
||||||
|
|
||||||
|
# 方式 2:环境变量
|
||||||
|
VITE_MAPBOX_ACCESS_TOKEN=pk.xxx docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## 推送到私有仓库并移植
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 打标签(替换为你的仓库地址)
|
||||||
|
docker compose build
|
||||||
|
docker tag usa-dashboard-api your-registry/usa-dashboard-api:latest
|
||||||
|
docker tag usa-dashboard-crawler your-registry/usa-dashboard-crawler:latest
|
||||||
|
|
||||||
|
# 2. 推送
|
||||||
|
docker push your-registry/usa-dashboard-api:latest
|
||||||
|
docker push your-registry/usa-dashboard-crawler:latest
|
||||||
|
|
||||||
|
# 3. 在目标服务器拉取并启动
|
||||||
|
docker pull your-registry/usa-dashboard-api:latest
|
||||||
|
docker pull your-registry/usa-dashboard-crawler:latest
|
||||||
|
# 需准备 docker-compose.yml 或等效编排,见下方
|
||||||
|
```
|
||||||
|
|
||||||
|
## 仅用镜像启动(无 compose)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 创建网络与数据卷
|
||||||
|
docker network create usa-net
|
||||||
|
docker volume create usa-data
|
||||||
|
|
||||||
|
# 2. 启动 API(前端+接口)
|
||||||
|
docker run -d --name api --network usa-net \
|
||||||
|
-p 3001:3001 \
|
||||||
|
-v usa-data:/data \
|
||||||
|
-e DB_PATH=/data/data.db \
|
||||||
|
usa-dashboard-api
|
||||||
|
|
||||||
|
# 3. 启动爬虫(通过 usa-net 访问 api)
|
||||||
|
docker run -d --name crawler --network usa-net \
|
||||||
|
-v usa-data:/data \
|
||||||
|
-e DB_PATH=/data/data.db \
|
||||||
|
-e API_BASE=http://api:3001 \
|
||||||
|
-e CLEANER_AI_DISABLED=1 \
|
||||||
|
-e GDELT_DISABLED=1 \
|
||||||
|
usa-dashboard-crawler
|
||||||
|
```
|
||||||
|
|
||||||
|
爬虫通过 `API_BASE` 调用 Node 的 `/api/crawler/notify`,两容器需在同一网络内。
|
||||||
|
|
||||||
|
## 国内服务器 / 镜像加速
|
||||||
|
|
||||||
|
拉取 `node`、`python` 等基础镜像慢时:
|
||||||
|
|
||||||
|
1. **Docker 镜像加速**:见 [docs/DOCKER_MIRROR.md](docs/DOCKER_MIRROR.md)
|
||||||
|
2. **构建时使用国内镜像源**:
|
||||||
|
```bash
|
||||||
|
docker compose build --build-arg REGISTRY=docker.m.daocloud.io/library/
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## 常用操作
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 查看日志
|
||||||
|
docker compose logs -f
|
||||||
|
|
||||||
|
# 重启
|
||||||
|
docker compose restart
|
||||||
|
|
||||||
|
# 停止并删除容器(数据卷保留)
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# 回填战损数据(从 situation_update 重新提取)
|
||||||
|
curl -X POST http://localhost:8000/crawler/backfill
|
||||||
|
```
|
||||||
|
|
||||||
|
## 服务器直接部署(不用 Docker)
|
||||||
|
|
||||||
|
若在服务器上直接跑 Node(不用 Docker),要能访问修订页 `/edit`,需保证:
|
||||||
|
|
||||||
|
1. **先构建、再启动**:在项目根目录执行 `npm run build`,再启动 API(如 `npm run api` 或 `node server/index.js`)。
|
||||||
|
未构建时没有 `dist` 目录,启动会打日志:`dist 目录不存在,前端页面(含 /edit 修订页)不可用`。
|
||||||
|
|
||||||
|
2. **若前面有 Nginx**:`curl http://127.0.0.1:3001/edit` 已是 200 但浏览器访问 `/edit` 仍 404,说明 Nginx 没有把前端路由交给后端或没做 SPA fallback。二选一即可:
|
||||||
|
|
||||||
|
**方式 A:Nginx 只反代,所有页面由 Node 提供(推荐)**
|
||||||
|
```nginx
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
server_name 你的域名;
|
||||||
|
location / {
|
||||||
|
proxy_pass http://127.0.0.1:3001;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
}
|
||||||
|
location /ws {
|
||||||
|
proxy_pass http://127.0.0.1:3001;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**方式 B:Nginx 提供 dist 静态,仅 /api、/ws 反代**
|
||||||
|
```nginx
|
||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
server_name 你的域名;
|
||||||
|
root /path/to/项目根目录/dist; # 改成实际路径
|
||||||
|
index index.html;
|
||||||
|
location / {
|
||||||
|
try_files $uri $uri/ /index.html;
|
||||||
|
}
|
||||||
|
location /api {
|
||||||
|
proxy_pass http://127.0.0.1:3001;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
}
|
||||||
|
location /ws {
|
||||||
|
proxy_pass http://127.0.0.1:3001;
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
修改后执行 `sudo nginx -t` 检查配置,再 `sudo systemctl reload nginx`(或 `sudo nginx -s reload`)。
|
||||||
41
Dockerfile
Normal file
41
Dockerfile
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# 前端 + 后端合一镜像:构建阶段产出 dist,运行阶段提供静态与 API(含修订页 /edit)
|
||||||
|
# 国内服务器拉取慢时,可加 --build-arg REGISTRY=docker.m.daocloud.io/library
|
||||||
|
ARG REGISTRY=
|
||||||
|
|
||||||
|
# ---------- 阶段 1:构建前端 ----------
|
||||||
|
FROM ${REGISTRY}node:20-slim AS frontend-builder
|
||||||
|
WORKDIR /app
|
||||||
|
RUN npm config set registry https://registry.npmmirror.com
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY vite.config.ts index.html tsconfig.json tsconfig.app.json ./
|
||||||
|
COPY postcss.config.js tailwind.config.js ./
|
||||||
|
COPY src ./src
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# ---------- 阶段 2:运行(API + 静态) ----------
|
||||||
|
FROM ${REGISTRY}node:20-slim
|
||||||
|
|
||||||
|
RUN npm config set registry https://registry.npmmirror.com
|
||||||
|
|
||||||
|
RUN rm -f /etc/apt/sources.list.d/debian.sources && \
|
||||||
|
echo 'deb http://mirrors.aliyun.com/debian bookworm main' > /etc/apt/sources.list && \
|
||||||
|
echo 'deb http://mirrors.aliyun.com/debian bookworm-updates main' >> /etc/apt/sources.list && \
|
||||||
|
echo 'deb http://mirrors.aliyun.com/debian-security bookworm-security main' >> /etc/apt/sources.list && \
|
||||||
|
apt-get update && apt-get install -y --no-install-recommends python3 make g++ && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm ci --omit=dev
|
||||||
|
COPY server ./server
|
||||||
|
COPY --from=frontend-builder /app/dist ./dist
|
||||||
|
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
ENV API_PORT=3001
|
||||||
|
ENV DB_PATH=/data/data.db
|
||||||
|
EXPOSE 3001
|
||||||
|
|
||||||
|
COPY docker-entrypoint.sh ./
|
||||||
|
RUN chmod +x docker-entrypoint.sh
|
||||||
|
ENTRYPOINT ["./docker-entrypoint.sh"]
|
||||||
18
Dockerfile.crawler
Normal file
18
Dockerfile.crawler
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Python 爬虫服务
|
||||||
|
# 国内服务器可加 --build-arg REGISTRY=docker.m.daocloud.io/library
|
||||||
|
ARG REGISTRY=
|
||||||
|
FROM ${REGISTRY}python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY crawler/requirements.txt ./
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
COPY crawler ./
|
||||||
|
|
||||||
|
ENV DB_PATH=/data/data.db
|
||||||
|
ENV API_BASE=http://api:3001
|
||||||
|
ENV GDELT_DISABLED=1
|
||||||
|
ENV RSS_INTERVAL_SEC=60
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["uvicorn", "realtime_conflict_service:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
55
README.md
55
README.md
@@ -41,7 +41,13 @@ npm run api:seed
|
|||||||
npm run api
|
npm run api
|
||||||
```
|
```
|
||||||
|
|
||||||
开发时需同时运行前端与 API:
|
开发时可用一键启动(推荐):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm start
|
||||||
|
```
|
||||||
|
|
||||||
|
或分终端分别运行:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 终端 1
|
# 终端 1
|
||||||
@@ -53,6 +59,19 @@ npm run dev
|
|||||||
|
|
||||||
API 会由 Vite 代理到 `/api`,前端通过 `/api/situation` 获取完整态势数据。数据库文件位于 `server/data.db`,可通过修改表数据实现动态调整。
|
API 会由 Vite 代理到 `/api`,前端通过 `/api/situation` 获取完整态势数据。数据库文件位于 `server/data.db`,可通过修改表数据实现动态调整。
|
||||||
|
|
||||||
|
### 爬虫不生效时
|
||||||
|
|
||||||
|
1. 测试 RSS 抓取:`npm run crawler:test`(需网络,返回抓取条数)
|
||||||
|
2. 单独启动爬虫查看日志:`npm run gdelt`(另开终端)
|
||||||
|
3. 查看爬虫状态:`curl http://localhost:8000/crawler/status`(需爬虫服务已启动)
|
||||||
|
4. 数据库面板 `/db` 每 30 秒自动刷新,可观察 situation_update 条数是否增加
|
||||||
|
|
||||||
|
### 面板数据 / 地图 / 战损不更新时
|
||||||
|
|
||||||
|
- **确保 API 与爬虫共用同一数据库**:本地开发时,Node 默认用 `server/data.db`,爬虫默认用 `../server/data.db`(同文件)。若 Node 在本地、爬虫在 Docker,则数据库不同,面板不会更新。
|
||||||
|
- **Docker 部署**:`GDELT_DISABLED=1` 时,地图冲突点由 RSS 新闻填充;战损与基地状态由规则/AI 提取后写入 `combat_losses` 和 `key_location`。
|
||||||
|
- **排查**:访问 `/db` 看 `situation_update`、`gdelt_events`、`combat_losses` 是否在增长;确认 API 已启动且前端能访问 `/api/situation`。
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -65,6 +84,37 @@ npm run dev
|
|||||||
npm run build
|
npm run build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Docker 部署
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 构建并启动(需 .env 中配置 VITE_MAPBOX_ACCESS_TOKEN 以启用地图)
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 访问前端:http://localhost:3001
|
||||||
|
# 数据库与爬虫共享 volume,首次启动自动 seed
|
||||||
|
```
|
||||||
|
|
||||||
|
**迁移到服务器**:见 [DEPLOY.md](DEPLOY.md)(构建、推送、单机/多机部署说明)
|
||||||
|
|
||||||
|
**拉取镜像超时?** 在 Docker Desktop 配置镜像加速,见 [docs/DOCKER_MIRROR.md](docs/DOCKER_MIRROR.md)
|
||||||
|
|
||||||
|
**开发时无需每次 rebuild**:使用开发模式挂载源码 + 热重载:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
- API:`node --watch` 监听 `server/` 变更并自动重启
|
||||||
|
- 爬虫:`uvicorn --reload` 监听 `crawler/` 变更并自动重启
|
||||||
|
- 修改 `server/` 或 `crawler/` 后,服务会自动重载,无需重新 build
|
||||||
|
|
||||||
|
环境变量(可选,在 .env 或 docker-compose.yml 中配置):
|
||||||
|
|
||||||
|
- `VITE_MAPBOX_ACCESS_TOKEN`:Mapbox 令牌,构建时注入
|
||||||
|
- `DB_PATH`:数据库路径(默认 /data/data.db)
|
||||||
|
- `CLEANER_AI_DISABLED=1`:爬虫默认禁用 Ollama
|
||||||
|
- `GDELT_DISABLED=1`:爬虫默认禁用 GDELT(国内易超时)
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -91,3 +141,6 @@ server/
|
|||||||
├── seed.js # 数据库种子脚本
|
├── seed.js # 数据库种子脚本
|
||||||
└── data.db # SQLite 数据库(运行 seed 后生成)
|
└── data.db # SQLite 数据库(运行 seed 后生成)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
https://git.bimwe.com/Daniel/usa.git
|
||||||
417
crawler/README.md
Normal file
417
crawler/README.md
Normal file
@@ -0,0 +1,417 @@
|
|||||||
|
# GDELT 实时冲突服务 + 新闻爬虫
|
||||||
|
|
||||||
|
## 数据来源梳理
|
||||||
|
|
||||||
|
### 1. GDELT Project (gdelt_events)
|
||||||
|
|
||||||
|
| 项目 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| API | `https://api.gdeltproject.org/api/v2/doc/doc` |
|
||||||
|
| 查询 | `query=United States Iran military`(可配 `GDELT_QUERY`) |
|
||||||
|
| 模式 | `mode=ArtList`,`format=json`,`maxrecords=30` |
|
||||||
|
| 时间范围 | **未指定时默认最近 3 个月**,按相关性排序,易返回较旧文章 |
|
||||||
|
| 更新频率 | GDELT 约 15 分钟级,爬虫 60 秒拉一次 |
|
||||||
|
|
||||||
|
**数据偏老原因**:未传 `timespan` 和 `sort=datedesc`,API 返回 3 个月内“最相关”文章,不保证最新。
|
||||||
|
|
||||||
|
### 2. RSS 新闻 (situation_update) — 主事件脉络来源
|
||||||
|
|
||||||
|
| 项目 | 说明 |
|
||||||
|
|------|------|
|
||||||
|
| 源 | 多国主流媒体:美(Reuters/NYT)、英(BBC)、法(France 24)、俄(TASS/RT)、中(Xinhua/CGTN)、伊(Press TV)、卡塔尔(Al Jazeera) |
|
||||||
|
| 过滤 | 标题/摘要需含 `KEYWORDS` 之一(iran、usa、strike、military 等) |
|
||||||
|
| 更新 | 爬虫 45 秒拉一次(`RSS_INTERVAL_SEC`),优先保证事件脉络 |
|
||||||
|
| 优先级 | 启动时先拉 RSS,再拉 GDELT |
|
||||||
|
|
||||||
|
**GDELT 无法访问时**:设置 `GDELT_DISABLED=1`,仅用 RSS 新闻即可维持事件脉络。部分境外源可能受网络限制。
|
||||||
|
|
||||||
|
### 3. AI 新闻清洗与分类(可选)
|
||||||
|
|
||||||
|
- **清洗**:`cleaner_ai.py` 用 Ollama 提炼新闻为简洁摘要,供面板展示
|
||||||
|
- **分类**:`parser_ai.py` 用 Ollama 替代规则做 category/severity 判定
|
||||||
|
- 需先安装并运行 Ollama:`ollama run llama3.1`
|
||||||
|
- 环境变量:`OLLAMA_MODEL=llama3.1`、`PARSER_AI_DISABLED=1`、`CLEANER_AI_DISABLED=1`(禁用对应 AI)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**事件脉络可实时更新**:爬虫抓取后 → 写入 SQLite → 调用 Node 通知 → WebSocket 广播 → 前端自动刷新。
|
||||||
|
|
||||||
|
## 依赖
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
新增 `deep-translator`:GDELT 与 RSS 新闻入库前自动翻译为中文。
|
||||||
|
|
||||||
|
## 运行(需同时启动 3 个服务)
|
||||||
|
|
||||||
|
| 终端 | 命令 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 1 | `npm run api` | Node API + WebSocket(必须) |
|
||||||
|
| 2 | `npm run gdelt` | GDELT + RSS 爬虫(**事件脉络数据来源**) |
|
||||||
|
| 3 | `npm run dev` | 前端开发 |
|
||||||
|
|
||||||
|
**事件脉络不更新时**:多半是未启动 `npm run gdelt`。只跑 `npm run api` 时,事件脉络会显示空或仅有缓存。
|
||||||
|
|
||||||
|
## 如何检查爬虫是否工作正常
|
||||||
|
|
||||||
|
按下面顺序做即可确认整条链路(爬虫 → 数据库 → Node 重载 → API/WebSocket)正常。
|
||||||
|
|
||||||
|
### 1. 一键验证(推荐)
|
||||||
|
|
||||||
|
先启动 API,再执行验证脚本(可选是否顺带启动爬虫):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 终端 1:必须
|
||||||
|
npm run api
|
||||||
|
|
||||||
|
# 终端 2:执行验证(不启动爬虫,只检查当前状态)
|
||||||
|
./scripts/verify-pipeline.sh
|
||||||
|
|
||||||
|
# 或:顺带启动爬虫并等首次抓取后再验证
|
||||||
|
./scripts/verify-pipeline.sh --start-crawler
|
||||||
|
```
|
||||||
|
|
||||||
|
脚本会检查:API 健康、态势数据含 `lastUpdated`、爬虫服务是否可达、`news_content`/situation_update、战损字段、`POST /api/crawler/notify` 是否可用。
|
||||||
|
|
||||||
|
### 2. 手动快速检查
|
||||||
|
|
||||||
|
| 步骤 | 命令 / 操作 | 正常表现 |
|
||||||
|
|-----|-------------|----------|
|
||||||
|
| API 是否在跑 | `curl -s http://localhost:3001/api/health` | 返回 `{"ok":true}` |
|
||||||
|
| 态势是否可读 | `curl -s http://localhost:3001/api/situation \| head -c 300` | 含 `lastUpdated`、`usForces`、`recentUpdates` |
|
||||||
|
| RSS 能否抓到 | `npm run crawler:test` | 输出「RSS 抓取: N 条」,N>0 表示有命中 |
|
||||||
|
| 爬虫服务(gdelt) | `curl -s http://localhost:8000/crawler/status` | 返回 JSON,含 `db_path`/`db_exists` 等 |
|
||||||
|
| 库里有无爬虫数据 | `sqlite3 server/data.db "SELECT COUNT(*) FROM situation_update; SELECT COUNT(*) FROM news_content;"` 或访问 `http://localhost:3001/api/db/dashboard` | situation_update、news_content 条数 > 0(跑过流水线后) |
|
||||||
|
| 通知后是否重载 | 爬虫写库后会 POST `/api/crawler/notify`,Node 会 `reloadFromFile` 再广播 | 前端/`/api/situation` 的 `lastUpdated` 和内容会更新 |
|
||||||
|
|
||||||
|
### 3. 跑一轮流水线(不常驻爬虫时)
|
||||||
|
|
||||||
|
不启动 gdelt 时,可单次跑完整流水线(抓取 → 去重 → 写表 → notify):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run api # 保持运行
|
||||||
|
cd crawler && python3 -c "
|
||||||
|
from pipeline import run_full_pipeline
|
||||||
|
from config import DB_PATH, API_BASE
|
||||||
|
n_fetched, n_news, n_panel = run_full_pipeline(db_path=DB_PATH, api_base=API_BASE, notify=True)
|
||||||
|
print('抓取:', n_fetched, '去重新增:', n_news, '面板写入:', n_panel)
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
有网络且有关键词命中时,应看到非零数字;再查 `curl -s http://localhost:3001/api/situation` 或前端事件脉络是否出现新数据。
|
||||||
|
|
||||||
|
**按时间范围测试(例如 2 月 28 日 0 时至今)**:RSS 流水线支持只保留指定起始时间之后的条目,便于测试「从某日 0 点到现在」的数据。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 默认从 2026-02-28 0:00 到现在
|
||||||
|
npm run crawler:once:range
|
||||||
|
|
||||||
|
# 或指定起始时间
|
||||||
|
./scripts/run-crawler-range.sh 2026-02-28T00:00:00
|
||||||
|
```
|
||||||
|
|
||||||
|
需设置环境变量 `CRAWL_START_DATE`(ISO 时间,如 `2026-02-28T00:00:00`)。GDELT 时间范围在启动 gdelt 服务时设置,例如:`GDELT_TIMESPAN=3d npm run gdelt`(最近 3 天)。
|
||||||
|
|
||||||
|
### 4. 仅测提取逻辑(不写库)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run crawler:test:extraction # 规则/db_merge 测试
|
||||||
|
# 或按 README「快速自测命令」用示例文本调 extract_from_news 看 combat_losses_delta / key_location_updates
|
||||||
|
```
|
||||||
|
|
||||||
|
**常见现象**:抓取 0 条 → 网络/RSS 被墙或关键词未命中;situation_update 为空 → 未跑流水线或去重后无新增;前端不刷新 → 未开 `npm run api` 或未开爬虫(gdelt)。
|
||||||
|
|
||||||
|
### 5. 爬虫与面板是否联通
|
||||||
|
|
||||||
|
专门检查「爬虫写库」与「面板展示」是否一致:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/check-crawler-panel-connectivity.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
会对比:爬虫侧的 `situation_update` 条数 vs 面板 API 返回的 `recentUpdates` 条数,并说明为何战损/基地等不一定随每条新闻变化。
|
||||||
|
|
||||||
|
## 爬虫与面板数据联动说明
|
||||||
|
|
||||||
|
| 面板展示 | 数据来源(表/接口) | 是否由爬虫更新 | 说明 |
|
||||||
|
|----------|---------------------|----------------|------|
|
||||||
|
| **事件脉络** (recentUpdates) | situation_update → getSituation() | ✅ 是 | 每条去重后的新闻会写入 situation_update,Node 收到 notify 后重载 DB 再广播 |
|
||||||
|
| **地图冲突点** (conflictEvents) | gdelt_events 或 RSS→gdelt 回填 | ✅ 是 | GDELT 或 GDELT 禁用时由 situation_update 同步到 gdelt_events |
|
||||||
|
| **战损/装备毁伤** (combatLosses) | combat_losses | ⚠️ 有条件 | 仅当 AI/规则从新闻中提取到数字(如「2 名美军死亡」)时,merge 才写入增量 |
|
||||||
|
| **基地/地点状态** (keyLocations) | key_location | ⚠️ 有条件 | 仅当提取到 key_location_updates(如某基地遭袭)时更新 |
|
||||||
|
| **力量摘要/指数/资产** (summary, powerIndex, assets) | force_summary, power_index, force_asset | ❌ 否 | 仅 seed 初始化,爬虫不写 |
|
||||||
|
| **华尔街/报复情绪** (wallStreet, retaliation) | wall_street_trend, retaliation_* | ⚠️ 有条件 | 仅当提取器输出对应字段时更新 |
|
||||||
|
|
||||||
|
因此:**新闻很多、但战损/基地数字不动**是正常现象——多数标题不含可解析的伤亡/基地数字,只有事件脉络(recentUpdates)和地图冲突点会随每条新闻增加。若**事件脉络也不更新**,请确认 Node 终端在爬虫每轮抓取后是否出现 `[crawler/notify] DB 已重载`;若无,检查爬虫的 `API_BASE` 是否指向当前 API(默认 `http://localhost:3001`)。
|
||||||
|
|
||||||
|
## 写库流水线(与 server/README 第五节一致)
|
||||||
|
|
||||||
|
RSS 与主入口均走统一流水线 `pipeline.run_full_pipeline`:
|
||||||
|
|
||||||
|
1. **抓取** → 2. **AI 清洗**(标题/摘要/分类)→ 3. **去重**(news_content.content_hash)→ 4. **映射到前端库字段**(situation_update、combat_losses、key_location 等)→ 5. **更新表** → 6. **有新增时 POST /api/crawler/notify**
|
||||||
|
|
||||||
|
- `npm run crawler`(main.py)与 `npm run gdelt`(realtime_conflict_service)的 RSS 分支都调用该流水线。
|
||||||
|
- 实现见 `crawler/pipeline.py`。
|
||||||
|
|
||||||
|
## 数据流
|
||||||
|
|
||||||
|
```
|
||||||
|
GDELT API → 抓取(60s) → SQLite (gdelt_events, conflict_stats) → POST /api/crawler/notify
|
||||||
|
RSS → 抓取 → 清洗 → 去重 → 写 news_content / situation_update / 战损等 → POST /api/crawler/notify
|
||||||
|
↓
|
||||||
|
Node 更新 situation.updated_at + WebSocket 广播
|
||||||
|
↓
|
||||||
|
前端实时展示
|
||||||
|
```
|
||||||
|
|
||||||
|
## 配置
|
||||||
|
|
||||||
|
环境变量:
|
||||||
|
|
||||||
|
- `DB_PATH`: SQLite 路径,默认 `../server/data.db`
|
||||||
|
- `API_BASE`: Node API 地址,默认 `http://localhost:3001`
|
||||||
|
- **`DASHSCOPE_API_KEY`**:阿里云通义(DashScope)API Key。**设置后全程使用商业模型,无需本机安装 Ollama**(适合 Mac 版本较低无法跑 Ollama 的情况)。获取: [阿里云百炼 / DashScope](https://dashscope.console.aliyun.com/) → 创建 API-KEY,复制到环境变量或项目根目录 `.env` 中 `DASHSCOPE_API_KEY=sk-xxx`。摘要、分类、战损/基地提取均走通义。
|
||||||
|
- `GDELT_QUERY`: 搜索关键词,默认 `United States Iran military`
|
||||||
|
- `GDELT_MAX_RECORDS`: 最大条数,默认 30
|
||||||
|
- `GDELT_TIMESPAN`: 时间范围,`1h` / `1d` / `1week`,默认 `1d`(近日资讯)
|
||||||
|
- `GDELT_DISABLED`: 设为 `1` 则跳过 GDELT,仅用 RSS 新闻(GDELT 无法访问时用)
|
||||||
|
- `FETCH_INTERVAL_SEC`: GDELT 抓取间隔(秒),默认 60
|
||||||
|
- `RSS_INTERVAL_SEC`: RSS 抓取间隔(秒),默认 45(优先保证事件脉络)
|
||||||
|
- `OLLAMA_MODEL`: AI 分类模型,默认 `llama3.1`
|
||||||
|
- `PARSER_AI_DISABLED`: 设为 `1` 则禁用 AI 分类,仅用规则
|
||||||
|
- `CLEANER_AI_DISABLED`: 设为 `1` 则禁用 AI 清洗,仅用规则截断
|
||||||
|
- `FETCH_FULL_ARTICLE`: 设为 `0` 则不再抓取正文,仅用标题+摘要做 AI 提取(默认 `1` 抓取正文)
|
||||||
|
- `ARTICLE_FETCH_LIMIT`: 每轮为多少条新资讯抓取正文,默认 10
|
||||||
|
- `ARTICLE_FETCH_TIMEOUT`: 单篇正文请求超时(秒),默认 12
|
||||||
|
- `ARTICLE_MAX_BODY_CHARS`: 正文最大字符数,默认 6000
|
||||||
|
- `EXTRACT_TEXT_MAX_LEN`: 送入 AI 提取的原文最大长度,默认 4000
|
||||||
|
|
||||||
|
**增量与地点**:战损一律按**增量**处理——AI 只填本则报道的「本次/此次」新增数,不填累计总数;合并时与库内当前值叠加。双方攻击地点通过 `key_location_updates` 更新(美军基地被打击 side=us,伊朗设施被打击 side=iran),会写入 `key_location` 的 status/damage_level。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 主要新闻资讯来源(RSS)
|
||||||
|
|
||||||
|
配置在 `crawler/config.py` 的 `RSS_FEEDS`,当前包含:
|
||||||
|
|
||||||
|
| 来源 | URL / 说明 |
|
||||||
|
|------|------------|
|
||||||
|
| **美国** | Reuters Top News、NYT World |
|
||||||
|
| **英国** | BBC World、BBC Middle East、The Guardian World |
|
||||||
|
| **法国** | France 24 |
|
||||||
|
| **德国** | DW World |
|
||||||
|
| **俄罗斯** | TASS、RT |
|
||||||
|
| **中国** | Xinhua World、CGTN World |
|
||||||
|
| **凤凰** | 凤凰军事、凤凰国际(feedx.net 镜像) |
|
||||||
|
| **伊朗** | Press TV |
|
||||||
|
| **卡塔尔/中东** | Al Jazeera All、Al Jazeera Middle East |
|
||||||
|
|
||||||
|
单源超时由 `FEED_TIMEOUT`(默认 12 秒)控制;某源失败不影响其他源。
|
||||||
|
|
||||||
|
**过滤**:每条条目的标题+摘要必须命中 `config.KEYWORDS` 中至少一个关键词才会进入流水线(伊朗/美国/中东/军事/基地/霍尔木兹等,见 `config.KEYWORDS`)。
|
||||||
|
|
||||||
|
### 境内可访问情况(仅供参考,以实际网络为准)
|
||||||
|
|
||||||
|
| 通常境内可直接访问 | 说明 |
|
||||||
|
|-------------------|------|
|
||||||
|
| **新华网** `english.news.cn/rss/world.xml` | 中国官方外文社 |
|
||||||
|
| **CGTN** `cgtn.com/rss/world` | 中国国际台 |
|
||||||
|
| **凤凰** `feedx.net/rss/ifengmil.xml`、`ifengworld.xml` | 第三方 RSS 镜像,中文军事/国际 |
|
||||||
|
| **人民网** `people.com.cn/rss/military.xml`、`world.xml` | 军事、国际 |
|
||||||
|
| **新浪** `rss.sina.com.cn` 军事/新闻 | 新浪军事、新浪新闻滚动 |
|
||||||
|
| **中国日报** `chinadaily.com.cn/rss/world_rss.xml` | 国际新闻 |
|
||||||
|
| **中国军网** `english.chinamil.com.cn/rss.xml` | 解放军报英文 |
|
||||||
|
| **俄通社 TASS** `tass.com/rss/v2.xml` | 俄罗斯官媒 |
|
||||||
|
| **RT** `rt.com/rss/` | 俄罗斯今日俄罗斯 |
|
||||||
|
| **DW** `rss.dw.com/xml/rss-en-world` | 德国之声,部分地区/时段可访问 |
|
||||||
|
|
||||||
|
**境内常需代理**:Reuters、NYT、BBC、Guardian、France 24、Al Jazeera、Press TV 等境外主站 RSS,直连易超时或被墙。境内部署建议:设 `CRAWLER_USE_PROXY=1` 并配置代理,或仅保留上表源(可在 `config.py` 中注释掉不可达的 URL,减少超时等待)。
|
||||||
|
|
||||||
|
**国内其他媒体(今日头条、网易、腾讯、新浪微博等)**:今日头条、腾讯新闻、新浪微博等多为 App/信息流产品,**无官方公开 RSS**。如需接入可考虑:第三方 RSS 聚合(如 FeedX、RSSHub 等若有对应频道)、或平台开放 API(若有且合规使用)。当前爬虫已加入新浪(rss.sina.com.cn)、人民网、中国日报、中国军网等有明确 RSS 的境内源;网易新闻曾有 RSS 中心页,具体栏目 XML 需在其订阅页查找后加入 `config.py`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 为什么爬虫一直抓不到有效信息(0 条)
|
||||||
|
|
||||||
|
常见原因与应对如下。
|
||||||
|
|
||||||
|
| 原因 | 说明 | 建议 |
|
||||||
|
|------|------|------|
|
||||||
|
| **RSS 源在国内不可达** | 多数源为境外站(Reuters、BBC、NYT、Guardian、France24、DW、TASS、RT、Al Jazeera、Press TV 等),国内直连易超时或被墙。 | 使用代理:设 `CRAWLER_USE_PROXY=1` 并配置系统/环境 HTTP(S) 代理,或部署到海外服务器再跑爬虫。 |
|
||||||
|
| **关键词无一命中** | 只有标题或摘要里包含 `KEYWORDS` 中至少一个词才会保留(如 iran、usa、middle east、strike、基地 等)。若当前头条都不涉及美伊/中东,整轮会 0 条。 | 先跑 `npm run crawler:test` 看是否 0 条;若长期为 0 且网络正常,可在 `config.py` 中适当放宽或增加 `KEYWORDS`(如增加通用词做测试)。 |
|
||||||
|
| **单源超时导致整轮无结果** | 若所有源都在 `FEED_TIMEOUT` 内未返回,则每源返回空列表,汇总仍为 0 条。 | 增大 `FEED_TIMEOUT`(如 20);或先单独用浏览器/curl 测某条 RSS URL 是否可访问;国内建议代理后再试。 |
|
||||||
|
| **分类/清洗依赖 AI 且失败** | 每条命中关键词的条目会调 `classify_and_severity`(Ollama 或 DashScope)。若本机未起 Ollama、未设 DashScope,且规则兜底异常,可能影响该条。 | 设 `PARSER_AI_DISABLED=1` 使用纯规则分类,避免依赖 Ollama/DashScope;或配置好 `DASHSCOPE_API_KEY` / 本地 Ollama 再跑。 |
|
||||||
|
| **去重后无新增** | 抓到的条数 >0,但经 `news_content` 的 content_hash 去重后「新增」为 0,则不会写 `situation_update`,事件脉络不增加。 | 属正常:同一批新闻再次抓取不会重复写入。等有新头条命中关键词后才会出现新条目。 |
|
||||||
|
|
||||||
|
**快速自检**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run crawler:test
|
||||||
|
```
|
||||||
|
|
||||||
|
输出「RSS 抓取: N 条」。若始终为 0,优先检查网络/代理与 `KEYWORDS`;若 N>0 但面板无新事件,多为去重后无新增或未调 `POST /api/crawler/notify`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 优化后验证效果示例
|
||||||
|
|
||||||
|
以下为「正文抓取 + AI 精确提取 + 增量与地点更新」优化后,单条新闻从输入到前端展示的完整示例,便于对照验证。
|
||||||
|
|
||||||
|
### 1. 示例输入(新闻摘要/全文片段)
|
||||||
|
|
||||||
|
```
|
||||||
|
伊朗向伊拉克阿萨德空军基地发射 12 枚弹道导弹,造成此次袭击中 2 名美军人员死亡、14 人受伤,
|
||||||
|
另有 1 架战机在跑道受损。乌代德基地未遭直接命中。同日以色列对伊朗伊斯法罕一处设施发动打击。
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. AI 提取输出(增量 + 攻击地点)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"summary": "伊朗导弹袭击伊拉克阿萨德基地致美军 2 死 14 伤,1 架战机受损;以军打击伊斯法罕。",
|
||||||
|
"category": "alert",
|
||||||
|
"severity": "high",
|
||||||
|
"us_personnel_killed": 2,
|
||||||
|
"us_personnel_wounded": 14,
|
||||||
|
"us_aircraft": 1,
|
||||||
|
"us_bases_damaged": 1,
|
||||||
|
"key_location_updates": [
|
||||||
|
{ "name_keywords": "阿萨德|asad|al-asad", "side": "us", "status": "attacked", "damage_level": 2 },
|
||||||
|
{ "name_keywords": "伊斯法罕|isfahan", "side": "iran", "status": "attacked", "damage_level": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:战损为**本则报道的新增数**(此次 2 死、14 伤、1 架战机),不是累计总数;地点为双方遭袭设施(美军基地 side=us,伊朗设施 side=iran)。
|
||||||
|
|
||||||
|
### 3. 合并后数据库变化
|
||||||
|
|
||||||
|
| 表/字段 | 合并前 | 本则增量 | 合并后 |
|
||||||
|
|--------|--------|----------|--------|
|
||||||
|
| combat_losses.us.personnel_killed | 127 | +2 | 129 |
|
||||||
|
| combat_losses.us.personnel_wounded | 384 | +14 | 398 |
|
||||||
|
| combat_losses.us.aircraft | 2 | +1 | 3 |
|
||||||
|
| combat_losses.us.bases_damaged | 27 | +1 | 28 |
|
||||||
|
| key_location(name 含「阿萨德」) | status=operational | — | status=attacked, damage_level=2 |
|
||||||
|
| key_location(name 含「伊斯法罕」) | status=operational | — | status=attacked, damage_level=1 |
|
||||||
|
|
||||||
|
若 AI 误提「累计 2847 人丧生」并填成 personnel_killed=2847,单次合并会被上限截断(如最多 +500),避免一次写入导致数据剧增。
|
||||||
|
|
||||||
|
### 4. 前端验证效果
|
||||||
|
|
||||||
|
- **事件脉络**:出现一条新条目,summary 为上述 1–2 句概括,category=alert、severity=high。
|
||||||
|
- **装备毁伤面板**:美军「阵亡」+2、「受伤」+14、「战机」+1;基地毁/损数字随 bases_damaged +1 更新。
|
||||||
|
- **地图**:阿萨德基地、伊斯法罕对应点位显示为「遭袭」状态(脉冲/标色随现有地图逻辑)。
|
||||||
|
- **API**:`GET /api/situation` 中 `usForces.combatLosses`、`usForces.keyLocations`(含 status/damage_level)为更新后值;`lastUpdated` 为合并后时间。
|
||||||
|
|
||||||
|
### 5. 快速自测命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 仅测提取逻辑(不写库):用示例文本调 AI 提取,看是否得到增量 + key_location_updates
|
||||||
|
cd crawler && python3 -c "
|
||||||
|
from extractor_ai import extract_from_news
|
||||||
|
text = '''伊朗向伊拉克阿萨德空军基地发射导弹,此次袭击造成 2 名美军死亡、14 人受伤,1 架战机受损。'''
|
||||||
|
out = extract_from_news(text)
|
||||||
|
print('combat_losses_delta:', out.get('combat_losses_delta'))
|
||||||
|
print('key_location_updates:', out.get('key_location_updates'))
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
期望:`combat_losses_delta.us` 含 personnel_killed=2、personnel_wounded=14、aircraft=1 等增量;`key_location_updates` 含阿萨德 side=us 等条目。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 冲突强度 (impact_score)
|
||||||
|
|
||||||
|
| 分数 | 地图效果 |
|
||||||
|
|------|------------|
|
||||||
|
| 1–3 | 绿色点 |
|
||||||
|
| 4–6 | 橙色闪烁 |
|
||||||
|
| 7–10 | 红色脉冲扩散 |
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
- `GET http://localhost:8000/events`:返回事件列表与冲突统计(Python 服务直连)
|
||||||
|
- `GET http://localhost:3001/api/events`:从 Node 读取(推荐,含 WebSocket 同步)
|
||||||
|
|
||||||
|
## 本地验证链路
|
||||||
|
|
||||||
|
按下面任选一种方式,确认「抓取 → 清洗 → 去重 → 映射 → 写表 → 通知」整条链路正常。
|
||||||
|
|
||||||
|
### 方式一:最小验证(不启动前端)
|
||||||
|
|
||||||
|
1. **启动 API(必须)**
|
||||||
|
```bash
|
||||||
|
npm run api
|
||||||
|
```
|
||||||
|
保持运行,默认 `http://localhost:3001`。
|
||||||
|
|
||||||
|
2. **安装爬虫依赖并跑一轮流水线**
|
||||||
|
```bash
|
||||||
|
cd crawler && pip install -r requirements.txt
|
||||||
|
python -c "
|
||||||
|
from pipeline import run_full_pipeline
|
||||||
|
from config import DB_PATH, API_BASE
|
||||||
|
n_fetched, n_news, n_panel = run_full_pipeline(db_path=DB_PATH, api_base=API_BASE, translate=True, notify=True)
|
||||||
|
print('抓取:', n_fetched, '去重新增:', n_news, '面板写入:', n_panel)
|
||||||
|
"
|
||||||
|
```
|
||||||
|
- 有网络且有关键词命中时,应看到非零数字;无网络或全被过滤则为 `0 0 0`。
|
||||||
|
- 若报错 `module 'socket' has no attribute 'settimeout'`,已修复为 `setdefaulttimeout`,请拉取最新代码。
|
||||||
|
|
||||||
|
3. **查库确认**
|
||||||
|
```bash
|
||||||
|
sqlite3 server/data.db "SELECT COUNT(*) FROM situation_update; SELECT COUNT(*) FROM news_content;"
|
||||||
|
```
|
||||||
|
或浏览器打开 `http://localhost:3001/api/db/dashboard`,看 `situation_update`、`news_content` 是否有数据。
|
||||||
|
|
||||||
|
4. **确认态势接口**
|
||||||
|
```bash
|
||||||
|
curl -s http://localhost:3001/api/situation | head -c 500
|
||||||
|
```
|
||||||
|
应包含 `lastUpdated`、`recentUpdates` 等。
|
||||||
|
|
||||||
|
### 方式二:用现有验证脚本(推荐)
|
||||||
|
|
||||||
|
1. 终端 1:`npm run api`
|
||||||
|
2. 终端 2(可选):`npm run gdelt`(会定时跑 RSS + GDELT)
|
||||||
|
3. 执行验证脚本:
|
||||||
|
```bash
|
||||||
|
./scripts/verify-pipeline.sh
|
||||||
|
```
|
||||||
|
若爬虫未启动想一并测爬虫,可:
|
||||||
|
```bash
|
||||||
|
./scripts/verify-pipeline.sh --start-crawler
|
||||||
|
```
|
||||||
|
脚本会检查:API 健康、态势数据、爬虫状态、资讯表、战损字段、通知接口。
|
||||||
|
|
||||||
|
### 方式三:只测 RSS 抓取(不写库)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run crawler:test
|
||||||
|
```
|
||||||
|
输出为「RSS 抓取: N 条」。0 条时检查网络或 `config.py` 里 `RSS_FEEDS` / `KEYWORDS`。
|
||||||
|
|
||||||
|
### 常见问题
|
||||||
|
|
||||||
|
| 现象 | 可能原因 |
|
||||||
|
|------|----------|
|
||||||
|
| 抓取 0 条 | 网络不通、RSS 被墙、关键词无一命中 |
|
||||||
|
| `situation_update` 为空 | 去重后无新增,或未跑流水线(只跑了 `fetch_all` 未跑 `run_full_pipeline`) |
|
||||||
|
| 前端事件脉络不刷新 | 未启动 `npm run api` 或 WebSocket 未连上(需通过 Vite 代理访问前端) |
|
||||||
|
| 翻译/AI 清洗很慢或报错 | 设 `TRANSLATE_DISABLED=1` 或 `CLEANER_AI_DISABLED=1` 可跳过,用规则兜底 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 故障排查
|
||||||
|
|
||||||
|
| 现象 | 可能原因 | 排查 |
|
||||||
|
|------|----------|------|
|
||||||
|
| 事件脉络始终为空 | 未启动 GDELT 爬虫 | 另开终端运行 `npm run gdelt`,观察是否有 `GDELT 更新 X 条事件` 输出 |
|
||||||
|
| 事件脉络不刷新 | WebSocket 未连上 | 确认 `npm run api` 已启动,前端需通过 `npm run dev` 访问(Vite 会代理 /ws) |
|
||||||
|
| GDELT 抓取失败 | 系统代理超时 / ProxyError | 爬虫默认直连,不走代理;若需代理请设 `CRAWLER_USE_PROXY=1` |
|
||||||
|
| GDELT 抓取失败 | 网络 / GDELT API 限流 | 检查 Python 终端报错;GDELT 在国外,国内网络可能较慢或超时 |
|
||||||
|
| 新闻条数为 0 | RSS 源被墙或关键词不匹配 | 检查 crawler/config.py 中 RSS_FEEDS、KEYWORDS;国内需代理 |
|
||||||
|
| **返回数据偏老** | GDELT 默认 3 个月内按相关性 | 设置 `GDELT_TIMESPAN=1d` 限制为近日;加 `sort=datedesc` 最新优先 |
|
||||||
BIN
crawler/__pycache__/article_fetcher.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/article_fetcher.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/cleaner_ai.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/cleaner_ai.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/cleaner_ai.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/cleaner_ai.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/config.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/config.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/config.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/config.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/db_merge.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/db_merge.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/db_merge.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/db_merge.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/db_writer.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/db_writer.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/db_writer.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/db_writer.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/extractor_ai.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/extractor_ai.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/extractor_dashscope.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/extractor_dashscope.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/extractor_rules.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/extractor_rules.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/extractor_rules.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/extractor_rules.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/news_storage.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/news_storage.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/panel_schema.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/panel_schema.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/parser.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/parser.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/parser.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/parser.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/parser_ai.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/parser_ai.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/parser_ai.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/parser_ai.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/pipeline.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/pipeline.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/pipeline.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/pipeline.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/realtime_conflict_service.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/realtime_conflict_service.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/realtime_conflict_service.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/realtime_conflict_service.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/translate_utils.cpython-311.pyc
Normal file
BIN
crawler/__pycache__/translate_utils.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/__pycache__/translate_utils.cpython-39.pyc
Normal file
BIN
crawler/__pycache__/translate_utils.cpython-39.pyc
Normal file
Binary file not shown.
90
crawler/article_fetcher.py
Normal file
90
crawler/article_fetcher.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
从文章 URL 抓取正文,供 AI 提取精确数据使用。
|
||||||
|
RSS 仅提供标题和短摘要,正文可提供伤亡、番号、地点等具体数字与事实。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# 单页超时(秒)
|
||||||
|
FETCH_TIMEOUT = int(os.environ.get("ARTICLE_FETCH_TIMEOUT", "12"))
|
||||||
|
# 正文最大字符数,避免超长输入
|
||||||
|
MAX_BODY_CHARS = int(os.environ.get("ARTICLE_MAX_BODY_CHARS", "6000"))
|
||||||
|
# 是否启用正文抓取(设为 0 则仅用标题+摘要)
|
||||||
|
FETCH_FULL_ARTICLE = os.environ.get("FETCH_FULL_ARTICLE", "1") == "1"
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(html: str) -> str:
|
||||||
|
"""简单去除 HTML 标签与多余空白"""
|
||||||
|
if not html:
|
||||||
|
return ""
|
||||||
|
text = re.sub(r"<script[^>]*>[\s\S]*?</script>", " ", html, flags=re.I)
|
||||||
|
text = re.sub(r"<style[^>]*>[\s\S]*?</style>", " ", text, flags=re.I)
|
||||||
|
text = re.sub(r"<[^>]+>", " ", text)
|
||||||
|
text = re.sub(r"\s+", " ", text).strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_article_body(url: str, timeout: int = FETCH_TIMEOUT) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
请求文章 URL,提取正文纯文本。失败或非 HTML 返回 None。
|
||||||
|
优先用 BeautifulSoup 取 main/article 或 body,否则退化为正则去标签。
|
||||||
|
"""
|
||||||
|
if not url or not url.strip().startswith("http"):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
headers = {"User-Agent": "US-Iran-Dashboard/1.0 (News Aggregator)"}
|
||||||
|
# 不跟随代理,避免墙内超时
|
||||||
|
proxies = {"http": None, "https": None} if os.environ.get("CRAWLER_USE_PROXY") != "1" else None
|
||||||
|
r = requests.get(url, headers=headers, timeout=timeout, proxies=proxies)
|
||||||
|
r.raise_for_status()
|
||||||
|
ct = (r.headers.get("Content-Type") or "").lower()
|
||||||
|
if "html" not in ct and "xml" not in ct:
|
||||||
|
return None
|
||||||
|
html = r.text
|
||||||
|
if not html or len(html) < 200:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
except ImportError:
|
||||||
|
return _strip_html(html)[:MAX_BODY_CHARS]
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
for tag in ("article", "main", "[role='main']", ".article-body", ".post-content", ".entry-content", ".content"):
|
||||||
|
if tag.startswith((".", "[")):
|
||||||
|
node = soup.select_one(tag)
|
||||||
|
else:
|
||||||
|
node = soup.find(tag)
|
||||||
|
if node:
|
||||||
|
body = node.get_text(separator=" ", strip=True)
|
||||||
|
if len(body) > 300:
|
||||||
|
return _strip_html(body)[:MAX_BODY_CHARS]
|
||||||
|
body = soup.body.get_text(separator=" ", strip=True) if soup.body else ""
|
||||||
|
if len(body) > 300:
|
||||||
|
return _strip_html(body)[:MAX_BODY_CHARS]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _strip_html(html)[:MAX_BODY_CHARS]
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_item_with_body(item: dict, max_chars: int = MAX_BODY_CHARS) -> None:
|
||||||
|
"""
|
||||||
|
若 item 有 url 且无 full_text,则抓取正文并写入 item["full_text"]。
|
||||||
|
用于 AI 提取时获得更多上下文。原地修改 item。
|
||||||
|
"""
|
||||||
|
if not FETCH_FULL_ARTICLE:
|
||||||
|
return
|
||||||
|
url = (item.get("url") or "").strip()
|
||||||
|
if not url or item.get("full_text"):
|
||||||
|
return
|
||||||
|
body = fetch_article_body(url)
|
||||||
|
if not body:
|
||||||
|
return
|
||||||
|
title = (item.get("title") or "").strip()
|
||||||
|
summary = (item.get("summary") or "").strip()
|
||||||
|
combined = f"{title}\n{summary}\n{body}" if summary else f"{title}\n{body}"
|
||||||
|
item["full_text"] = combined[:max_chars]
|
||||||
125
crawler/cleaner_ai.py
Normal file
125
crawler/cleaner_ai.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
AI 清洗新闻数据,严格按面板字段约束输出
|
||||||
|
面板 EventTimelinePanel 所需:summary(≤120字)、category(枚举)、severity(枚举)
|
||||||
|
优先使用 DASHSCOPE_API_KEY(通义,无需 Ollama),否则 Ollama,最后规则兜底
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1"
|
||||||
|
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
|
||||||
|
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()
|
||||||
|
|
||||||
|
# 面板 schema:必须与 EventTimelinePanel / SituationUpdate 一致
|
||||||
|
SUMMARY_MAX_LEN = 120 # 面板 line-clamp-2 展示
|
||||||
|
CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||||
|
SEVERITIES = ("low", "medium", "high", "critical")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_summary(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
|
||||||
|
"""确保 summary 符合面板:纯文本、无换行、限制长度"""
|
||||||
|
if not text or not isinstance(text, str):
|
||||||
|
return ""
|
||||||
|
s = re.sub(r"\s+", " ", str(text).strip())
|
||||||
|
s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f]", "", s) # 去除控制字符
|
||||||
|
return s[:max_len].rstrip()
|
||||||
|
|
||||||
|
|
||||||
|
def _rule_clean(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
|
||||||
|
"""规则清洗:去空白、去控制符、截断"""
|
||||||
|
return _sanitize_summary(text, max_len)
|
||||||
|
|
||||||
|
|
||||||
|
def _call_dashscope_summary(text: str, max_len: int, timeout: int = 8) -> Optional[str]:
|
||||||
|
"""调用阿里云通义(DashScope)提炼摘要,无需 Ollama。需设置 DASHSCOPE_API_KEY"""
|
||||||
|
if not DASHSCOPE_API_KEY or CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 5:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import dashscope
|
||||||
|
from http import HTTPStatus
|
||||||
|
dashscope.api_key = DASHSCOPE_API_KEY
|
||||||
|
prompt = f"""将新闻提炼为1-2句简洁中文事实,直接输出纯文本,不要标号、引号、解释。限{max_len}字内。
|
||||||
|
|
||||||
|
原文:{str(text)[:350]}
|
||||||
|
|
||||||
|
输出:"""
|
||||||
|
r = dashscope.Generation.call(
|
||||||
|
model="qwen-turbo",
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
result_format="message",
|
||||||
|
max_tokens=150,
|
||||||
|
)
|
||||||
|
if r.status_code != HTTPStatus.OK:
|
||||||
|
return None
|
||||||
|
out = (r.output.get("choices", [{}])[0].get("message", {}).get("content", "") or "").strip()
|
||||||
|
out = re.sub(r"^[\d\.\-\*\s]+", "", out)
|
||||||
|
out = re.sub(r"^['\"\s]+|['\"\s]+$", "", out)
|
||||||
|
out = _sanitize_summary(out, max_len)
|
||||||
|
if out and len(out) > 3:
|
||||||
|
return out
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _call_ollama_summary(text: str, max_len: int, timeout: int = 6) -> Optional[str]:
|
||||||
|
"""调用 Ollama 提炼摘要,输出须为纯文本、≤max_len 字"""
|
||||||
|
if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 5:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
prompt = f"""将新闻提炼为1-2句简洁中文事实,直接输出纯文本,不要标号、引号、解释。限{max_len}字内。
|
||||||
|
|
||||||
|
原文:{str(text)[:350]}
|
||||||
|
|
||||||
|
输出:"""
|
||||||
|
r = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": OLLAMA_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 150},
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return None
|
||||||
|
out = (r.json().get("message", {}).get("content", "") or "").strip()
|
||||||
|
out = re.sub(r"^[\d\.\-\*\s]+", "", out) # 去编号
|
||||||
|
out = re.sub(r"^['\"\s]+|['\"\s]+$", "", out)
|
||||||
|
out = _sanitize_summary(out, max_len)
|
||||||
|
if out and len(out) > 3:
|
||||||
|
return out
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def clean_news_for_panel(text: str, max_len: int = SUMMARY_MAX_LEN) -> str:
|
||||||
|
"""清洗 summary 字段,供 EventTimelinePanel 展示。输出必为≤max_len 的纯文本"""
|
||||||
|
if not text or not isinstance(text, str):
|
||||||
|
return ""
|
||||||
|
t = str(text).strip()
|
||||||
|
if not t:
|
||||||
|
return ""
|
||||||
|
# 优先商业模型(通义),再 Ollama,最后规则
|
||||||
|
if DASHSCOPE_API_KEY:
|
||||||
|
res = _call_dashscope_summary(t, max_len, timeout=8)
|
||||||
|
else:
|
||||||
|
res = _call_ollama_summary(t, max_len, timeout=6)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
return _rule_clean(t, max_len)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_category(cat: str) -> str:
|
||||||
|
"""确保 category 在面板枚举内"""
|
||||||
|
return cat if cat in CATEGORIES else "other"
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_severity(sev: str) -> str:
|
||||||
|
"""确保 severity 在面板枚举内"""
|
||||||
|
return sev if sev in SEVERITIES else "medium"
|
||||||
108
crawler/config.py
Normal file
108
crawler/config.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""爬虫配置"""
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 数据库路径(与 server 共用 SQLite)
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||||||
|
|
||||||
|
# Node API 地址(用于通知推送)
|
||||||
|
API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
|
||||||
|
|
||||||
|
# 阿里云 DashScope API Key(用于 AI 提取面板数据,不设则回退到规则/Ollama)
|
||||||
|
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||||
|
|
||||||
|
# 抓取间隔(秒)
|
||||||
|
CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
|
||||||
|
|
||||||
|
# 单源抓取超时(秒),避免某源卡住拖垮整轮
|
||||||
|
FEED_TIMEOUT = int(os.environ.get("FEED_TIMEOUT", "12"))
|
||||||
|
|
||||||
|
# RSS 源:世界主流媒体,覆盖美伊/中东多视角
|
||||||
|
# 每项为 URL 字符串,或 {"name": "显示名", "url": "..."} 便于日志与排查
|
||||||
|
RSS_FEEDS = [
|
||||||
|
# 美国
|
||||||
|
"https://feeds.reuters.com/reuters/topNews",
|
||||||
|
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
||||||
|
# 英国
|
||||||
|
"https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||||
|
"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
|
||||||
|
"https://www.theguardian.com/world/rss",
|
||||||
|
# 法国
|
||||||
|
"https://www.france24.com/en/rss",
|
||||||
|
# 德国
|
||||||
|
"https://rss.dw.com/xml/rss-en-world",
|
||||||
|
# 俄罗斯
|
||||||
|
"https://tass.com/rss/v2.xml",
|
||||||
|
"https://www.rt.com/rss/",
|
||||||
|
# 中国
|
||||||
|
"https://english.news.cn/rss/world.xml",
|
||||||
|
"https://www.cgtn.com/rss/world",
|
||||||
|
# 凤凰网(军事 + 国际,中文视角)
|
||||||
|
{"name": "凤凰军事", "url": "https://feedx.net/rss/ifengmil.xml"},
|
||||||
|
{"name": "凤凰国际", "url": "https://feedx.net/rss/ifengworld.xml"},
|
||||||
|
# 境内媒体(境内直连友好,可补中文视角)
|
||||||
|
{"name": "人民网军事", "url": "http://www.people.com.cn/rss/military.xml"},
|
||||||
|
{"name": "人民网国际", "url": "http://www.people.com.cn/rss/world.xml"},
|
||||||
|
{"name": "新浪军事", "url": "http://rss.sina.com.cn/rss/jczs/index.shtml"},
|
||||||
|
{"name": "新浪新闻", "url": "http://rss.sina.com.cn/rss/roll/news.xml"},
|
||||||
|
{"name": "中国日报国际", "url": "http://www.chinadaily.com.cn/rss/world_rss.xml"},
|
||||||
|
{"name": "中国军网", "url": "https://english.chinamil.com.cn/rss.xml"},
|
||||||
|
# 伊朗
|
||||||
|
"https://www.presstv.ir/rss",
|
||||||
|
# 卡塔尔(中东)
|
||||||
|
"https://www.aljazeera.com/xml/rss/all.xml",
|
||||||
|
"https://www.aljazeera.com/xml/rss/middleeast.xml",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_feed_sources():
|
||||||
|
"""返回 [(name, url), ...],name 用于日志,缺省为 URL 的 host"""
|
||||||
|
import urllib.parse
|
||||||
|
out = []
|
||||||
|
for raw in RSS_FEEDS:
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
name = raw.get("name") or "rss"
|
||||||
|
url = raw.get("url", "").strip()
|
||||||
|
else:
|
||||||
|
url = (raw or "").strip()
|
||||||
|
name = urllib.parse.urlparse(url).netloc or "rss"
|
||||||
|
if url:
|
||||||
|
out.append((name, url))
|
||||||
|
return out
|
||||||
|
|
||||||
|
# 关键词过滤:至少匹配一个才会入库(与地图区域对应:伊拉克/叙利亚/海湾/红海/地中海等)
|
||||||
|
KEYWORDS = [
|
||||||
|
# 伊朗
|
||||||
|
"iran", "iranian", "tehran", "德黑兰", "bushehr", "布什尔", "abbas", "阿巴斯",
|
||||||
|
# 以色列 / 巴勒斯坦
|
||||||
|
"israel", "以色列", "hamas", "gaza", "加沙", "hezbollah", "真主党",
|
||||||
|
# 美国
|
||||||
|
"usa", "us ", "american", "美军", "美国", "pentagon",
|
||||||
|
# 区域(地图覆盖)
|
||||||
|
"middle east", "中东", "persian gulf", "波斯湾", "gulf of oman", "阿曼湾",
|
||||||
|
"arabian sea", "阿拉伯海", "red sea", "红海", "mediterranean", "地中海",
|
||||||
|
"strait of hormuz", "霍尔木兹",
|
||||||
|
# 伊拉克 / 叙利亚
|
||||||
|
"iraq", "伊拉克", "baghdad", "巴格达", "erbil", "埃尔比勒", "basra", "巴士拉",
|
||||||
|
"syria", "叙利亚", "damascus", "大马士革", "deir", "代尔祖尔",
|
||||||
|
# 海湾国家
|
||||||
|
"saudi", "沙特", "riyadh", "利雅得", "qatar", "卡塔尔", "doha", "多哈",
|
||||||
|
"uae", "emirates", "阿联酋", "dubai", "迪拜", "abu dhabi",
|
||||||
|
"bahrain", "巴林", "kuwait", "科威特", "oman", "阿曼", "yemen", "也门",
|
||||||
|
# 约旦 / 土耳其 / 埃及 / 吉布提 / 黎巴嫩
|
||||||
|
"jordan", "约旦", "amman", "安曼",
|
||||||
|
"lebanon", "黎巴嫩",
|
||||||
|
"turkey", "土耳其", "incirlik", "因吉尔利克",
|
||||||
|
"egypt", "埃及", "cairo", "开罗", "sinai", "西奈",
|
||||||
|
"djibouti", "吉布提",
|
||||||
|
# 军事 / 基地
|
||||||
|
"al-asad", "al asad", "阿萨德", "al udeid", "乌代德", "incirlik",
|
||||||
|
"strike", "attack", "military", "missile", "核", "nuclear",
|
||||||
|
"carrier", "航母", "drone", "uav", "无人机", "retaliation", "报复",
|
||||||
|
"base", "基地", "troops", "troop", "soldier", "personnel",
|
||||||
|
# 胡塞 / 武装 / 军力
|
||||||
|
"houthi", "胡塞", "houthis",
|
||||||
|
"idf", "irgc", "革命卫队", "qassem soleimani", "苏莱曼尼",
|
||||||
|
]
|
||||||
194
crawler/db_merge.py
Normal file
194
crawler/db_merge.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
将 AI 提取的结构化数据合并到 SQLite
|
||||||
|
与 panel schema 及 situationData.getSituation 对齐,支持回放
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||||||
|
|
||||||
|
# 单次合并时各字段增量的上限,防止误把「累计总数」当增量导致数据剧增(可选,设为 0 表示不设限)
|
||||||
|
MAX_DELTA_PER_MERGE = {
|
||||||
|
"personnel_killed": 500, "personnel_wounded": 1000, "civilian_killed": 300, "civilian_wounded": 500,
|
||||||
|
"bases_destroyed": 5, "bases_damaged": 10,
|
||||||
|
"aircraft": 50, "warships": 10, "armor": 30, "vehicles": 100,
|
||||||
|
"drones": 50, "missiles": 200, "helicopters": 20, "submarines": 5, "carriers": 2,
|
||||||
|
"civilian_ships": 20, "airport_port": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _clamp_delta(key: str, value: int) -> int:
|
||||||
|
"""单次增量上限,避免误提「累计」导致波动"""
|
||||||
|
cap = MAX_DELTA_PER_MERGE.get(key, 0)
|
||||||
|
if cap <= 0:
|
||||||
|
return max(0, value)
|
||||||
|
return max(0, min(value, cap))
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_tables(conn: sqlite3.Connection) -> None:
|
||||||
|
"""确保所需表存在(与 db.js 一致)"""
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS situation_update (
|
||||||
|
id TEXT PRIMARY KEY, timestamp TEXT NOT NULL, category TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL, severity TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS combat_losses (
|
||||||
|
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
||||||
|
bases_destroyed INTEGER NOT NULL, bases_damaged INTEGER NOT NULL,
|
||||||
|
personnel_killed INTEGER NOT NULL, personnel_wounded INTEGER NOT NULL,
|
||||||
|
aircraft INTEGER NOT NULL, warships INTEGER NOT NULL, armor INTEGER NOT NULL, vehicles INTEGER NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
try:
|
||||||
|
conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
conn.execute("ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
conn.execute("ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime('now'))")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
for col in ("drones", "missiles", "helicopters", "submarines", "tanks", "carriers", "civilian_ships", "airport_port"):
|
||||||
|
try:
|
||||||
|
conn.execute(f"ALTER TABLE combat_losses ADD COLUMN {col} INTEGER NOT NULL DEFAULT 0")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
conn.execute("CREATE TABLE IF NOT EXISTS wall_street_trend (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)")
|
||||||
|
conn.execute("CREATE TABLE IF NOT EXISTS retaliation_current (id INTEGER PRIMARY KEY CHECK (id = 1), value INTEGER NOT NULL)")
|
||||||
|
conn.execute("CREATE TABLE IF NOT EXISTS retaliation_history (id INTEGER PRIMARY KEY AUTOINCREMENT, time TEXT NOT NULL, value INTEGER NOT NULL)")
|
||||||
|
conn.execute("CREATE TABLE IF NOT EXISTS situation (id INTEGER PRIMARY KEY CHECK (id = 1), data TEXT NOT NULL, updated_at TEXT NOT NULL)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def merge(extracted: Dict[str, Any], db_path: Optional[str] = None) -> bool:
|
||||||
|
"""将提取数据合并到 DB,返回是否有更新"""
|
||||||
|
path = db_path or DB_PATH
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return False
|
||||||
|
conn = sqlite3.connect(path, timeout=10)
|
||||||
|
try:
|
||||||
|
_ensure_tables(conn)
|
||||||
|
updated = False
|
||||||
|
# situation_update
|
||||||
|
if "situation_update" in extracted:
|
||||||
|
u = extracted["situation_update"]
|
||||||
|
uid = f"ai_{hash(u.get('summary','')+u.get('timestamp','')) % 10**10}"
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(uid, u.get("timestamp", ""), u.get("category", "other"), u.get("summary", "")[:500], u.get("severity", "medium")),
|
||||||
|
)
|
||||||
|
if conn.total_changes > 0:
|
||||||
|
updated = True
|
||||||
|
# combat_losses:统一按增量处理。AI 输出为本则报道的新增数,此处叠加到库内当前值,避免把「累计总数」当增量导致数据波动。
|
||||||
|
if "combat_losses_delta" in extracted:
|
||||||
|
for side, delta in extracted["combat_losses_delta"].items():
|
||||||
|
if side not in ("us", "iran"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT personnel_killed,personnel_wounded,civilian_killed,civilian_wounded,bases_destroyed,bases_damaged,aircraft,warships,armor,vehicles,drones,missiles,helicopters,submarines,tanks,carriers,civilian_ships,airport_port FROM combat_losses WHERE side = ?",
|
||||||
|
(side,),
|
||||||
|
).fetchone()
|
||||||
|
cur = {"personnel_killed": 0, "personnel_wounded": 0, "civilian_killed": 0, "civilian_wounded": 0,
|
||||||
|
"bases_destroyed": 0, "bases_damaged": 0, "aircraft": 0, "warships": 0, "armor": 0, "vehicles": 0,
|
||||||
|
"drones": 0, "missiles": 0, "helicopters": 0, "submarines": 0, "tanks": 0, "carriers": 0, "civilian_ships": 0, "airport_port": 0}
|
||||||
|
if row:
|
||||||
|
cur = {
|
||||||
|
"personnel_killed": row[0], "personnel_wounded": row[1], "civilian_killed": row[2] or 0,
|
||||||
|
"civilian_wounded": row[3] or 0, "bases_destroyed": row[4], "bases_damaged": row[5],
|
||||||
|
"aircraft": row[6], "warships": row[7], "armor": row[8], "vehicles": row[9],
|
||||||
|
"drones": row[10] if len(row) > 10 else 0, "missiles": row[11] if len(row) > 11 else 0,
|
||||||
|
"helicopters": row[12] if len(row) > 12 else 0, "submarines": row[13] if len(row) > 13 else 0,
|
||||||
|
"tanks": row[14] if len(row) > 14 else 0, "carriers": row[15] if len(row) > 15 else (row[14] if len(row) > 14 else 0),
|
||||||
|
"civilian_ships": row[16] if len(row) > 16 else 0, "airport_port": row[17] if len(row) > 17 else 0,
|
||||||
|
}
|
||||||
|
pk = max(0, (cur["personnel_killed"] or 0) + _clamp_delta("personnel_killed", delta.get("personnel_killed", 0)))
|
||||||
|
pw = max(0, (cur["personnel_wounded"] or 0) + _clamp_delta("personnel_wounded", delta.get("personnel_wounded", 0)))
|
||||||
|
ck = max(0, (cur["civilian_killed"] or 0) + _clamp_delta("civilian_killed", delta.get("civilian_killed", 0)))
|
||||||
|
cw = max(0, (cur["civilian_wounded"] or 0) + _clamp_delta("civilian_wounded", delta.get("civilian_wounded", 0)))
|
||||||
|
bd = max(0, (cur["bases_destroyed"] or 0) + _clamp_delta("bases_destroyed", delta.get("bases_destroyed", 0)))
|
||||||
|
bm = max(0, (cur["bases_damaged"] or 0) + _clamp_delta("bases_damaged", delta.get("bases_damaged", 0)))
|
||||||
|
ac = max(0, (cur["aircraft"] or 0) + _clamp_delta("aircraft", delta.get("aircraft", 0)))
|
||||||
|
ws = max(0, (cur["warships"] or 0) + _clamp_delta("warships", delta.get("warships", 0)))
|
||||||
|
ar = max(0, (cur["armor"] or 0) + _clamp_delta("armor", delta.get("armor", 0)))
|
||||||
|
vh = max(0, (cur["vehicles"] or 0) + _clamp_delta("vehicles", delta.get("vehicles", 0)))
|
||||||
|
dr = max(0, (cur["drones"] or 0) + _clamp_delta("drones", delta.get("drones", 0)))
|
||||||
|
ms = max(0, (cur["missiles"] or 0) + _clamp_delta("missiles", delta.get("missiles", 0)))
|
||||||
|
hp = max(0, (cur["helicopters"] or 0) + _clamp_delta("helicopters", delta.get("helicopters", 0)))
|
||||||
|
sb = max(0, (cur["submarines"] or 0) + _clamp_delta("submarines", delta.get("submarines", 0)))
|
||||||
|
cr = max(0, (cur["carriers"] or 0) + _clamp_delta("carriers", delta.get("carriers", 0)))
|
||||||
|
cs = max(0, (cur["civilian_ships"] or 0) + _clamp_delta("civilian_ships", delta.get("civilian_ships", 0)))
|
||||||
|
ap = max(0, (cur["airport_port"] or 0) + _clamp_delta("airport_port", delta.get("airport_port", 0)))
|
||||||
|
ts = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
if row:
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE combat_losses SET personnel_killed=?, personnel_wounded=?, civilian_killed=?, civilian_wounded=?,
|
||||||
|
bases_destroyed=?, bases_damaged=?, aircraft=?, warships=?, armor=?, vehicles=?,
|
||||||
|
drones=?, missiles=?, helicopters=?, submarines=?, tanks=?, carriers=?, civilian_ships=?, airport_port=?, updated_at=? WHERE side=?""",
|
||||||
|
(pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, cur.get("tanks", 0), cr, cs, ap, ts, side),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR REPLACE INTO combat_losses (side, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
|
||||||
|
bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, tanks, carriers, civilian_ships, airport_port, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(side, pk, pw, ck, cw, bd, bm, ac, ws, ar, vh, dr, ms, hp, sb, 0, cr, cs, ap, ts),
|
||||||
|
)
|
||||||
|
if conn.total_changes > 0:
|
||||||
|
updated = True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# retaliation
|
||||||
|
if "retaliation" in extracted:
|
||||||
|
r = extracted["retaliation"]
|
||||||
|
conn.execute("INSERT OR REPLACE INTO retaliation_current (id, value) VALUES (1, ?)", (r["value"],))
|
||||||
|
conn.execute("INSERT INTO retaliation_history (time, value) VALUES (?, ?)", (r["time"], r["value"]))
|
||||||
|
updated = True
|
||||||
|
# wall_street_trend
|
||||||
|
if "wall_street" in extracted:
|
||||||
|
w = extracted["wall_street"]
|
||||||
|
conn.execute("INSERT INTO wall_street_trend (time, value) VALUES (?, ?)", (w["time"], w["value"]))
|
||||||
|
updated = True
|
||||||
|
# key_location:更新双方攻击地点(美军基地被打击 side=us,伊朗设施被打击 side=iran)的 status/damage_level
|
||||||
|
if "key_location_updates" in extracted:
|
||||||
|
try:
|
||||||
|
for u in extracted["key_location_updates"]:
|
||||||
|
kw_raw = (u.get("name_keywords") or "").strip()
|
||||||
|
if not kw_raw:
|
||||||
|
continue
|
||||||
|
# 支持 "a|b|c" 或 "a b c" 分隔
|
||||||
|
kw = [k.strip() for k in kw_raw.replace("|", " ").split() if k.strip()]
|
||||||
|
side = u.get("side")
|
||||||
|
status = (u.get("status") or "attacked")[:20]
|
||||||
|
dmg = u.get("damage_level", 2)
|
||||||
|
if not kw or side not in ("us", "iran"):
|
||||||
|
continue
|
||||||
|
# 简化:name LIKE '%kw%' 对每个关键词 OR 连接,支持中英文
|
||||||
|
conditions = " OR ".join("name LIKE ?" for _ in kw)
|
||||||
|
params = [status, dmg, side] + [f"%{k}%" for k in kw]
|
||||||
|
cur = conn.execute(
|
||||||
|
f"UPDATE key_location SET status=?, damage_level=? WHERE side=? AND ({conditions})",
|
||||||
|
params,
|
||||||
|
)
|
||||||
|
if cur.rowcount > 0:
|
||||||
|
updated = True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if updated:
|
||||||
|
conn.execute("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)", (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),))
|
||||||
|
conn.commit()
|
||||||
|
return updated
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
raise e
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
110
crawler/db_writer.py
Normal file
110
crawler/db_writer.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""写入 SQLite 并确保 situation_update 表存在"""
|
||||||
|
import sqlite3
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from config import DB_PATH
|
||||||
|
|
||||||
|
CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||||
|
SEVERITIES = ("low", "medium", "high", "critical")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_table(conn: sqlite3.Connection) -> None:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS situation_update (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
timestamp TEXT NOT NULL,
|
||||||
|
category TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _make_id(title: str, url: str, published: str) -> str:
|
||||||
|
raw = f"{title}|{url}|{published}"
|
||||||
|
return "nw_" + hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _to_utc_iso(dt: datetime) -> str:
|
||||||
|
if dt.tzinfo:
|
||||||
|
dt = dt.astimezone(timezone.utc)
|
||||||
|
return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
|
||||||
|
|
||||||
|
def insert_update(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
title: str,
|
||||||
|
summary: str,
|
||||||
|
url: str,
|
||||||
|
published: datetime,
|
||||||
|
category: str = "other",
|
||||||
|
severity: str = "medium",
|
||||||
|
) -> bool:
|
||||||
|
"""插入一条更新,若 id 已存在则跳过。返回是否插入了新记录。"""
|
||||||
|
_ensure_table(conn)
|
||||||
|
ts = _to_utc_iso(published)
|
||||||
|
uid = _make_id(title, url, ts)
|
||||||
|
if category not in CATEGORIES:
|
||||||
|
category = "other"
|
||||||
|
if severity not in SEVERITIES:
|
||||||
|
severity = "medium"
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
(uid, ts, category, summary[:500], severity),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return conn.total_changes > 0
|
||||||
|
except Exception:
|
||||||
|
conn.rollback()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def touch_situation_updated_at(conn: sqlite3.Connection) -> None:
|
||||||
|
"""更新 situation 表的 updated_at"""
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)",
|
||||||
|
(datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def write_updates(updates: list[dict]) -> int:
|
||||||
|
"""
|
||||||
|
updates: [{"title","summary","url","published","category","severity"}, ...]
|
||||||
|
返回新增条数。
|
||||||
|
"""
|
||||||
|
if not os.path.exists(DB_PATH):
|
||||||
|
return 0
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||||
|
try:
|
||||||
|
count = 0
|
||||||
|
for u in updates:
|
||||||
|
pub = u.get("published")
|
||||||
|
if isinstance(pub, str):
|
||||||
|
try:
|
||||||
|
pub = datetime.fromisoformat(pub.replace("Z", "+00:00"))
|
||||||
|
except ValueError:
|
||||||
|
pub = datetime.utcnow()
|
||||||
|
elif pub is None:
|
||||||
|
pub = datetime.utcnow()
|
||||||
|
ok = insert_update(
|
||||||
|
conn,
|
||||||
|
title=u.get("title", "")[:200],
|
||||||
|
summary=u.get("summary", "") or u.get("title", ""),
|
||||||
|
url=u.get("url", ""),
|
||||||
|
published=pub,
|
||||||
|
category=u.get("category", "other"),
|
||||||
|
severity=u.get("severity", "medium"),
|
||||||
|
)
|
||||||
|
if ok:
|
||||||
|
count += 1
|
||||||
|
if count > 0:
|
||||||
|
touch_situation_updated_at(conn)
|
||||||
|
return count
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
130
crawler/extractor_ai.py
Normal file
130
crawler/extractor_ai.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
从新闻文本中 AI 提取结构化数据,映射到面板 schema
|
||||||
|
输出符合 panel_schema 的字段,供 db_merge 写入
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from panel_schema import validate_category, validate_severity, validate_summary
|
||||||
|
|
||||||
|
CLEANER_AI_DISABLED = os.environ.get("CLEANER_AI_DISABLED", "0") == "1"
|
||||||
|
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
|
||||||
|
|
||||||
|
|
||||||
|
# 用于 AI 提取的原文最大长度(有正文时取更长以提取精确数据)
|
||||||
|
EXTRACT_TEXT_MAX_LEN = int(os.environ.get("EXTRACT_TEXT_MAX_LEN", "4000"))
|
||||||
|
|
||||||
|
|
||||||
|
def _call_ollama_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]:
|
||||||
|
"""调用 Ollama 从新闻全文/摘要中提取精确结构化数据,仅填写报道中明确给出的数字与事实。"""
|
||||||
|
if CLEANER_AI_DISABLED or not text or len(str(text).strip()) < 10:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
raw = str(text).strip()[:EXTRACT_TEXT_MAX_LEN]
|
||||||
|
prompt = f"""从以下美伊/中东新闻**全文或摘要**中,提取**报道明确给出的数字与事实**,输出 JSON。规则:
|
||||||
|
1. 仅填写报道中**直接出现、可核对**的数据,不要推测或估算。
|
||||||
|
2. 无明确依据的字段**必须省略**,不要填 0 或猜。
|
||||||
|
3. **战损一律按增量**:只填本则报道中「本次/此次/今日/本轮」**新增**的伤亡或损毁数量。若报道只给「累计总数」「迄今共」「total so far」等,**不要填写**该字段(避免与库内已有累计值重复叠加)。
|
||||||
|
4. **攻击地点**:提取双方遭袭的具体地点。美军/盟军基地被打击 → side=us;伊朗/亲伊设施被打击 → side=iran。name_keywords 用「中文名|英文名」便于匹配,可填多处。
|
||||||
|
|
||||||
|
字段说明:
|
||||||
|
- summary: 1-2 句中文事实概括,≤80 字
|
||||||
|
- category: deployment|alert|intel|diplomatic|other
|
||||||
|
- severity: low|medium|high|critical
|
||||||
|
- 战损(**仅填本则报道的新增增量**,如「此次 5 人丧生」「今日又损 2 架」):
|
||||||
|
us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded,
|
||||||
|
us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded,
|
||||||
|
us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged,
|
||||||
|
us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles,
|
||||||
|
us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines,
|
||||||
|
us_carriers, iran_carriers, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||||
|
- retaliation_sentiment: 0-100,仅当报道涉及伊朗报复/反击情绪时
|
||||||
|
- wall_street_value: 0-100,仅当报道涉及美股/市场时
|
||||||
|
- key_location_updates: **双方攻击地点**。每项 {{ "name_keywords": "阿萨德|asad|al-asad", "side": "us或iran(被打击方)", "status": "attacked", "damage_level": 1-3 }}。美军基地例:阿萨德|asad、乌代德|udeid、埃尔比勒|erbil、因吉尔利克|incirlik。伊朗例:德黑兰|tehran、布什尔|bushehr、伊斯法罕|isfahan、阿巴斯|abbas、纳坦兹|natanz
|
||||||
|
|
||||||
|
原文:
|
||||||
|
{raw}
|
||||||
|
|
||||||
|
直接输出 JSON,不要解释:"""
|
||||||
|
r = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": OLLAMA_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 384},
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return None
|
||||||
|
raw = (r.json().get("message", {}).get("content", "") or "").strip()
|
||||||
|
raw = re.sub(r"^```\w*\s*|\s*```$", "", raw)
|
||||||
|
return json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从新闻文本提取结构化数据,严格符合面板 schema
|
||||||
|
返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, ... }
|
||||||
|
"""
|
||||||
|
ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
parsed = _call_ollama_extract(text)
|
||||||
|
if not parsed:
|
||||||
|
return out
|
||||||
|
# situation_update
|
||||||
|
if parsed.get("summary"):
|
||||||
|
out["situation_update"] = {
|
||||||
|
"summary": validate_summary(str(parsed["summary"])[:120], 120),
|
||||||
|
"category": validate_category(str(parsed.get("category", "other")).lower()),
|
||||||
|
"severity": validate_severity(str(parsed.get("severity", "medium")).lower()),
|
||||||
|
"timestamp": ts,
|
||||||
|
}
|
||||||
|
# combat_losses 增量(仅数字字段)
|
||||||
|
loss_us = {}
|
||||||
|
loss_ir = {}
|
||||||
|
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "carriers", "civilian_ships", "airport_port"]:
|
||||||
|
uk = f"us_{k}"
|
||||||
|
ik = f"iran_{k}"
|
||||||
|
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
||||||
|
loss_us[k] = max(0, int(parsed[uk]))
|
||||||
|
if ik in parsed and isinstance(parsed[ik], (int, float)):
|
||||||
|
loss_ir[k] = max(0, int(parsed[ik]))
|
||||||
|
if loss_us or loss_ir:
|
||||||
|
out["combat_losses_delta"] = {}
|
||||||
|
if loss_us:
|
||||||
|
out["combat_losses_delta"]["us"] = loss_us
|
||||||
|
if loss_ir:
|
||||||
|
out["combat_losses_delta"]["iran"] = loss_ir
|
||||||
|
# retaliation
|
||||||
|
if "retaliation_sentiment" in parsed:
|
||||||
|
v = parsed["retaliation_sentiment"]
|
||||||
|
if isinstance(v, (int, float)) and 0 <= v <= 100:
|
||||||
|
out["retaliation"] = {"value": int(v), "time": ts}
|
||||||
|
# wall_street
|
||||||
|
if "wall_street_value" in parsed:
|
||||||
|
v = parsed["wall_street_value"]
|
||||||
|
if isinstance(v, (int, float)) and 0 <= v <= 100:
|
||||||
|
out["wall_street"] = {"time": ts, "value": int(v)}
|
||||||
|
# key_location_updates:受袭基地
|
||||||
|
if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list):
|
||||||
|
valid = []
|
||||||
|
for u in parsed["key_location_updates"]:
|
||||||
|
if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"):
|
||||||
|
valid.append({
|
||||||
|
"name_keywords": str(u["name_keywords"]),
|
||||||
|
"side": u["side"],
|
||||||
|
"status": str(u.get("status", "attacked"))[:20],
|
||||||
|
"damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2,
|
||||||
|
})
|
||||||
|
if valid:
|
||||||
|
out["key_location_updates"] = valid
|
||||||
|
return out
|
||||||
126
crawler/extractor_dashscope.py
Normal file
126
crawler/extractor_dashscope.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
阿里云 DashScope(通义千问)提取面板结构化数据
|
||||||
|
从新闻文本中提取战损、报复指数、基地状态等,供 db_merge 落库
|
||||||
|
API Key 通过环境变量 DASHSCOPE_API_KEY 配置
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from panel_schema import validate_category, validate_severity, validate_summary
|
||||||
|
|
||||||
|
|
||||||
|
EXTRACT_TEXT_MAX_LEN = int(os.environ.get("EXTRACT_TEXT_MAX_LEN", "4000"))
|
||||||
|
|
||||||
|
|
||||||
|
def _call_dashscope_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]:
|
||||||
|
"""调用阿里云 DashScope 从新闻全文中提取精确结构化数据,仅填写报道明确给出的数字与事实。"""
|
||||||
|
api_key = os.environ.get("DASHSCOPE_API_KEY", "").strip()
|
||||||
|
if not api_key or not text or len(str(text).strip()) < 10:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import dashscope
|
||||||
|
from http import HTTPStatus
|
||||||
|
|
||||||
|
dashscope.api_key = api_key
|
||||||
|
raw = str(text).strip()[:EXTRACT_TEXT_MAX_LEN]
|
||||||
|
|
||||||
|
prompt = f"""从以下美伊/中东新闻**全文或摘要**中,提取**报道明确给出的数字与事实**,输出 JSON。规则:
|
||||||
|
1. 仅填写报道中**直接出现、可核对**的数据,不要推测或估算。
|
||||||
|
2. 无明确依据的字段**必须省略**,不要填 0 或猜。
|
||||||
|
3. **战损一律按增量**:只填本则报道中「本次/此次/今日」**新增**数量。报道若只给「累计总数」「迄今共」**不要填**该字段。
|
||||||
|
4. **攻击地点**:提取双方遭袭地点。美军/盟军基地被打击 → side=us;伊朗/亲伊设施被打击 → side=iran。name_keywords 用「中文|英文」,可填多处。
|
||||||
|
|
||||||
|
字段:
|
||||||
|
- summary: 1-2 句中文事实概括,≤80 字
|
||||||
|
- category: deployment|alert|intel|diplomatic|other
|
||||||
|
- severity: low|medium|high|critical
|
||||||
|
- 战损(**仅填本则报道的新增增量**): us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded, us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded, us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged, us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles, us_drones, iran_drones, us_missiles, iran_missiles, us_helicopters, iran_helicopters, us_submarines, iran_submarines, us_carriers, iran_carriers, us_civilian_ships, iran_civilian_ships, us_airport_port, iran_airport_port
|
||||||
|
- retaliation_sentiment: 0-100(仅当报道涉及伊朗报复情绪时)
|
||||||
|
- wall_street_value: 0-100(仅当报道涉及美股/市场时)
|
||||||
|
- key_location_updates: **双方攻击地点**。每项 {{"name_keywords":"阿萨德|asad","side":"us或iran(被打击方)","status":"attacked","damage_level":1-3}}。美军基地:阿萨德|asad、乌代德|udeid、埃尔比勒|erbil、因吉尔利克|incirlik。伊朗:德黑兰|tehran、布什尔|bushehr、伊斯法罕|isfahan、阿巴斯|abbas、纳坦兹|natanz
|
||||||
|
|
||||||
|
原文:
|
||||||
|
{raw}
|
||||||
|
|
||||||
|
直接输出 JSON,不要其他解释:"""
|
||||||
|
|
||||||
|
response = dashscope.Generation.call(
|
||||||
|
model="qwen-turbo",
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
result_format="message",
|
||||||
|
max_tokens=512,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != HTTPStatus.OK:
|
||||||
|
return None
|
||||||
|
raw = (response.output.get("choices", [{}])[0].get("message", {}).get("content", "") or "").strip()
|
||||||
|
raw = re.sub(r"^```\w*\s*|\s*```$", "", raw)
|
||||||
|
return json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
从新闻文本提取结构化数据,符合面板 schema
|
||||||
|
返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, key_location_updates? }
|
||||||
|
"""
|
||||||
|
ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
parsed = _call_dashscope_extract(text)
|
||||||
|
if not parsed:
|
||||||
|
return out
|
||||||
|
|
||||||
|
if parsed.get("summary"):
|
||||||
|
out["situation_update"] = {
|
||||||
|
"summary": validate_summary(str(parsed["summary"])[:120], 120),
|
||||||
|
"category": validate_category(str(parsed.get("category", "other")).lower()),
|
||||||
|
"severity": validate_severity(str(parsed.get("severity", "medium")).lower()),
|
||||||
|
"timestamp": ts,
|
||||||
|
}
|
||||||
|
|
||||||
|
loss_us = {}
|
||||||
|
loss_ir = {}
|
||||||
|
for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded",
|
||||||
|
"bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles",
|
||||||
|
"drones", "missiles", "helicopters", "submarines", "carriers", "civilian_ships", "airport_port"]:
|
||||||
|
uk, ik = f"us_{k}", f"iran_{k}"
|
||||||
|
if uk in parsed and isinstance(parsed[uk], (int, float)):
|
||||||
|
loss_us[k] = max(0, int(parsed[uk]))
|
||||||
|
if ik in parsed and isinstance(parsed[ik], (int, float)):
|
||||||
|
loss_ir[k] = max(0, int(parsed[ik]))
|
||||||
|
if loss_us or loss_ir:
|
||||||
|
out["combat_losses_delta"] = {}
|
||||||
|
if loss_us:
|
||||||
|
out["combat_losses_delta"]["us"] = loss_us
|
||||||
|
if loss_ir:
|
||||||
|
out["combat_losses_delta"]["iran"] = loss_ir
|
||||||
|
|
||||||
|
if "retaliation_sentiment" in parsed:
|
||||||
|
v = parsed["retaliation_sentiment"]
|
||||||
|
if isinstance(v, (int, float)) and 0 <= v <= 100:
|
||||||
|
out["retaliation"] = {"value": int(v), "time": ts}
|
||||||
|
|
||||||
|
if "wall_street_value" in parsed:
|
||||||
|
v = parsed["wall_street_value"]
|
||||||
|
if isinstance(v, (int, float)) and 0 <= v <= 100:
|
||||||
|
out["wall_street"] = {"time": ts, "value": int(v)}
|
||||||
|
|
||||||
|
if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list):
|
||||||
|
valid = []
|
||||||
|
for u in parsed["key_location_updates"]:
|
||||||
|
if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"):
|
||||||
|
valid.append({
|
||||||
|
"name_keywords": str(u["name_keywords"]),
|
||||||
|
"side": u["side"],
|
||||||
|
"status": str(u.get("status", "attacked"))[:20],
|
||||||
|
"damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2,
|
||||||
|
})
|
||||||
|
if valid:
|
||||||
|
out["key_location_updates"] = valid
|
||||||
|
|
||||||
|
return out
|
||||||
254
crawler/extractor_rules.py
Normal file
254
crawler/extractor_rules.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
基于规则的新闻数据提取(无需 Ollama)
|
||||||
|
从新闻文本中提取战损、报复情绪等数值,供 db_merge 写入
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _first_int(text: str, pattern: str) -> Optional[int]:
|
||||||
|
m = re.search(pattern, text, re.I)
|
||||||
|
if m and m.group(1) and m.group(1).replace(",", "").isdigit():
|
||||||
|
return max(0, int(m.group(1).replace(",", "")))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
规则提取:匹配数字+关键词,输出符合 panel schema 的字段(无需 Ollama)
|
||||||
|
"""
|
||||||
|
ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
out: Dict[str, Any] = {}
|
||||||
|
t = (text or "").lower()
|
||||||
|
|
||||||
|
loss_us, loss_ir = {}, {}
|
||||||
|
|
||||||
|
# 美军人员伤亡(中文,优先匹配)
|
||||||
|
v = _first_int(t, r"造成\s*(\d+)\s*名?\s*美军\s*伤亡")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*美军\s*伤亡") if loss_us.get("personnel_killed") is None else None
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国军队|美国)\s*(?:死亡|阵亡)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*(?:美军|美国)\s*受伤")
|
||||||
|
if v is None and ("美军" in (text or "") or "美国" in (text or "")):
|
||||||
|
v = _first_int(text or t, r"另有\s*(\d+)\s*人\s*受伤")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_wounded"] = v
|
||||||
|
v = _first_int(t, r"美军\s*伤亡\s*(\d+)")
|
||||||
|
if v is not None and loss_us.get("personnel_killed") is None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
|
||||||
|
# 美军人员伤亡(英文)
|
||||||
|
v = _first_int(t, r"(?:us|american|u\.?s\.?)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:us|american)[\s\w]*(?:troop|soldier|military)[\s\w]*(?:killed|dead)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(?:us|american)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["personnel_wounded"] = v
|
||||||
|
|
||||||
|
# 伊朗人员伤亡(中文)
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*伤亡")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*(?:伊朗|伊朗军队)[\s\w]*(?:死亡|阵亡)")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*伊朗\s*受伤")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_wounded"] = v
|
||||||
|
|
||||||
|
# 伊朗人员伤亡(英文)
|
||||||
|
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(?:say|report)[\s\w]*(\d+)[\s\w]*(?:troop|soldier|guard|killed|dead)")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:iranian|iran)[\s\w]*(?:troop|soldier|guard|killed|dead)")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_killed"] = v
|
||||||
|
v = _first_int(t, r"(?:iran|iranian)[\s\w]*(\d+)[\s\w]*(?:wounded|injured)")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["personnel_wounded"] = v
|
||||||
|
|
||||||
|
# 平民伤亡(中英文,按阵营归属)
|
||||||
|
v = _first_int(t, r"(\d+)\s*名?\s*平民\s*(?:伤亡|死亡)")
|
||||||
|
if v is not None:
|
||||||
|
if "伊朗" in text or "iran" in t:
|
||||||
|
loss_ir["civilian_killed"] = v
|
||||||
|
else:
|
||||||
|
loss_us["civilian_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:killed|dead)") if loss_us.get("civilian_killed") is None and loss_ir.get("civilian_killed") is None else None
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t:
|
||||||
|
loss_ir["civilian_killed"] = v
|
||||||
|
else:
|
||||||
|
loss_us["civilian_killed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian|civil)[\s\w]*(?:wounded|injured)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t:
|
||||||
|
loss_ir["civilian_wounded"] = v
|
||||||
|
else:
|
||||||
|
loss_us["civilian_wounded"] = v
|
||||||
|
v = _first_int(text or t, r"伊朗[\s\w]*(?:空袭|打击)[\s\w]*造成[^\d]*(\d+)[\s\w]*(?:平民|人|伤亡)")
|
||||||
|
if v is not None:
|
||||||
|
loss_ir["civilian_killed"] = v
|
||||||
|
|
||||||
|
# 基地损毁(仅匹配 base/基地,排除"军事目标"等泛指)
|
||||||
|
skip_bases = "军事目标" in (text or "") and "基地" not in (text or "") and "base" not in t
|
||||||
|
if not skip_bases:
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:destroyed|leveled|摧毁|夷平)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["bases_destroyed"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:base|基地)[\s\w]*(?:damaged|hit|struck|受损|袭击)")
|
||||||
|
if v is not None:
|
||||||
|
loss_us["bases_damaged"] = v
|
||||||
|
if ("base" in t or "基地" in t) and ("destroy" in t or "level" in t or "摧毁" in t or "夷平" in t) and not loss_us.get("bases_destroyed"):
|
||||||
|
loss_us["bases_destroyed"] = 1
|
||||||
|
if ("base" in t or "基地" in t) and ("damage" in t or "hit" in t or "struck" in t or "strike" in t or "袭击" in t or "受损" in t) and not loss_us.get("bases_damaged"):
|
||||||
|
loss_us["bases_damaged"] = 1
|
||||||
|
|
||||||
|
# 战机 / 舰船(根据上下文判断阵营)
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:aircraft|plane|jet|fighter|f-?16|f-?35|f-?18)[\s\w]*(?:down|destroyed|lost|shot)")
|
||||||
|
if v is not None:
|
||||||
|
if "us" in t or "american" in t or "u.s" in t:
|
||||||
|
loss_us["aircraft"] = v
|
||||||
|
elif "iran" in t:
|
||||||
|
loss_ir["aircraft"] = v
|
||||||
|
else:
|
||||||
|
loss_us["aircraft"] = v
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:ship|destroyer|warship|vessel)[\s\w]*(?:hit|damaged|sunk)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t:
|
||||||
|
loss_ir["warships"] = v
|
||||||
|
else:
|
||||||
|
loss_us["warships"] = v
|
||||||
|
|
||||||
|
# 无人机 drone / uav / 无人机
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:drone|uav|无人机)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(text or t, r"(?:击落|摧毁)[^\d]*(\d+)[\s\w]*(?:drone|uav|无人机|架)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(t, r"(?:drone|uav|无人机)[\s\w]*(\d+)[\s\w]*(?:down|destroyed|shot|击落|摧毁)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t or "shahed" in t or "沙希德" in t or "伊朗" in (text or ""):
|
||||||
|
loss_ir["drones"] = v
|
||||||
|
else:
|
||||||
|
loss_us["drones"] = v
|
||||||
|
|
||||||
|
# 导弹 missile / 导弹
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:missile|导弹)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截|击落)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t:
|
||||||
|
loss_ir["missiles"] = v
|
||||||
|
else:
|
||||||
|
loss_us["missiles"] = v
|
||||||
|
v = _first_int(t, r"(?:missile|导弹)[\s\w]*(\d+)[\s\w]*(?:fired|launched|intercepted|destroyed|发射|拦截)") if not loss_us.get("missiles") and not loss_ir.get("missiles") else None
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t:
|
||||||
|
loss_ir["missiles"] = v
|
||||||
|
else:
|
||||||
|
loss_us["missiles"] = v
|
||||||
|
|
||||||
|
# 直升机 helicopter / 直升机
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:helicopter|直升机)[\s\w]*(?:down|destroyed|crashed|crashes|击落|坠毁)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t:
|
||||||
|
loss_ir["helicopters"] = v
|
||||||
|
else:
|
||||||
|
loss_us["helicopters"] = v
|
||||||
|
|
||||||
|
# 潜艇 submarine / 潜艇
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:submarine|潜艇)[\s\w]*(?:sunk|damaged|hit|destroyed|击沉|受损)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t:
|
||||||
|
loss_ir["submarines"] = v
|
||||||
|
else:
|
||||||
|
loss_us["submarines"] = v
|
||||||
|
|
||||||
|
# 航母 carrier / 航空母舰 / 航母
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:carrier|aircraft\s*carrier|航母|航空母舰)[\s\w]*(?:destroyed|damaged|lost|hit|sunk|摧毁|损毁|击毁|沉没)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t:
|
||||||
|
loss_ir["carriers"] = v
|
||||||
|
else:
|
||||||
|
loss_us["carriers"] = v
|
||||||
|
|
||||||
|
# 民船 civilian ship / 商船 / 民船
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:civilian\s*ship|merchant|商船|民船)[\s\w]*(?:sunk|damaged|hit|击沉|受损)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(text or t, r"(?:民船|商船|货船)[\s\w]*(\d+)[\s\w]*(?:艘)?[\s\w]*(?:击沉|受损|袭击)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
|
||||||
|
loss_ir["civilian_ships"] = v
|
||||||
|
else:
|
||||||
|
loss_us["civilian_ships"] = v
|
||||||
|
|
||||||
|
# 机/港 airport / port / 机场 / 港口
|
||||||
|
v = _first_int(t, r"(\d+)[\s\w]*(?:airport|port|机场|港口)[\s\w]*(?:destroyed|damaged|hit|struck|摧毁|受损|袭击)")
|
||||||
|
if v is None:
|
||||||
|
v = _first_int(text or t, r"(?:机场|港口)[\s\w]*(\d+)[\s\w]*(?:处|个)?[\s\w]*(?:受损|袭击|摧毁)")
|
||||||
|
if v is not None:
|
||||||
|
if "iran" in t or "iranian" in t or "伊朗" in (text or ""):
|
||||||
|
loss_ir["airport_port"] = v
|
||||||
|
else:
|
||||||
|
loss_us["airport_port"] = v
|
||||||
|
|
||||||
|
if loss_us:
|
||||||
|
out.setdefault("combat_losses_delta", {})["us"] = loss_us
|
||||||
|
if loss_ir:
|
||||||
|
out.setdefault("combat_losses_delta", {})["iran"] = loss_ir
|
||||||
|
if "retaliat" in t or "revenge" in t or "报复" in t or "反击" in t:
|
||||||
|
out["retaliation"] = {"value": 75, "time": ts}
|
||||||
|
if "wall street" in t or " dow " in t or "s&p" in t or "market slump" in t or "stock fall" in t or "美股" in t:
|
||||||
|
out["wall_street"] = {"time": ts, "value": 55}
|
||||||
|
|
||||||
|
# key_location_updates:受袭基地(与 key_location.name 匹配)
|
||||||
|
# 新闻提及基地遭袭时,更新对应基地 status;放宽触发词以匹配更多英文报道
|
||||||
|
attack_words = ("attack" in t or "attacked" in t or "hit" in t or "strike" in t or "struck" in t or "strikes" in t
|
||||||
|
or "damage" in t or "damaged" in t or "target" in t or "targeted" in t or "bomb" in t or "bombed" in t
|
||||||
|
or "袭击" in (text or "") or "遭袭" in (text or "") or "打击" in (text or "") or "受损" in (text or "") or "摧毁" in (text or ""))
|
||||||
|
base_attacked = ("base" in t or "基地" in t or "outpost" in t or "facility" in t) and attack_words
|
||||||
|
if base_attacked:
|
||||||
|
updates: list = []
|
||||||
|
# 常见美军基地关键词 -> name_keywords(用于 db_merge 的 LIKE 匹配,需与 key_location.name 能匹配)
|
||||||
|
bases_all = [
|
||||||
|
("阿萨德|阿因|asad|assad|ain", "us"),
|
||||||
|
("巴格达|baghdad", "us"),
|
||||||
|
("乌代德|udeid|卡塔尔|qatar", "us"),
|
||||||
|
("阿克罗蒂里|akrotiri|塞浦路斯|cyprus", "us"),
|
||||||
|
("巴格拉姆|bagram|阿富汗|afghanistan", "us"),
|
||||||
|
("埃尔比勒|erbil", "us"),
|
||||||
|
("因吉尔利克|incirlik|土耳其|turkey", "us"),
|
||||||
|
("苏尔坦|sultan|沙特|saudi", "us"),
|
||||||
|
("坦夫|tanf|叙利亚|syria", "us"),
|
||||||
|
("达夫拉|dhafra|阿联酋|uae", "us"),
|
||||||
|
("内瓦提姆|nevatim|拉蒙|ramon|以色列|israel", "us"),
|
||||||
|
("赛利耶|sayliyah", "us"),
|
||||||
|
("巴林|bahrain", "us"),
|
||||||
|
("科威特|kuwait", "us"),
|
||||||
|
# 伊朗基地
|
||||||
|
("阿巴斯港|abbas|bandar abbas", "iran"),
|
||||||
|
("德黑兰|tehran", "iran"),
|
||||||
|
("布什尔|bushehr", "iran"),
|
||||||
|
("伊斯法罕|isfahan|esfahan", "iran"),
|
||||||
|
("纳坦兹|natanz", "iran"),
|
||||||
|
("米纳布|minab", "iran"),
|
||||||
|
("卡拉季|karaj", "iran"),
|
||||||
|
("克尔曼沙赫|kermanshah", "iran"),
|
||||||
|
("大不里士|tabriz", "iran"),
|
||||||
|
("霍尔木兹|hormuz", "iran"),
|
||||||
|
]
|
||||||
|
for kws, side in bases_all:
|
||||||
|
if any(k in t for k in kws.split("|")):
|
||||||
|
updates.append({"name_keywords": kws, "side": side, "status": "attacked", "damage_level": 2})
|
||||||
|
if updates:
|
||||||
|
out["key_location_updates"] = updates
|
||||||
|
|
||||||
|
return out
|
||||||
41
crawler/main.py
Normal file
41
crawler/main.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""爬虫入口:定时执行完整写库流水线(抓取 → 清洗 → 去重 → 映射 → 更新表 → 通知 API)"""
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 确保能导入 config
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||||
|
|
||||||
|
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
|
||||||
|
from pipeline import run_full_pipeline
|
||||||
|
|
||||||
|
|
||||||
|
def run_once() -> int:
|
||||||
|
"""执行一轮:抓取、清洗、去重、映射、写表、通知。返回本轮新增条数(面板或资讯)。"""
|
||||||
|
n_fetched, n_news, n_panel = run_full_pipeline(
|
||||||
|
db_path=DB_PATH,
|
||||||
|
api_base=API_BASE,
|
||||||
|
translate=True,
|
||||||
|
notify=True,
|
||||||
|
)
|
||||||
|
return n_panel or n_news
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
print("Crawler started. DB:", DB_PATH)
|
||||||
|
print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
n = run_once()
|
||||||
|
if n > 0:
|
||||||
|
print(f"[{time.strftime('%H:%M:%S')}] 抓取完成,去重后新增 {n} 条,已写库并通知 API")
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
|
||||||
|
time.sleep(CRAWL_INTERVAL)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
141
crawler/news_storage.py
Normal file
141
crawler/news_storage.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
资讯内容独立存储,支持历史去重
|
||||||
|
爬虫拉回数据 → 计算 content_hash → 若已存在则跳过(去重)→ 新数据落库 news_content
|
||||||
|
"""
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
|
from config import DB_PATH
|
||||||
|
|
||||||
|
|
||||||
|
def _to_utc_iso(dt: datetime) -> str:
|
||||||
|
if dt.tzinfo:
|
||||||
|
dt = dt.astimezone(timezone.utc)
|
||||||
|
return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_for_hash(text: str) -> str:
|
||||||
|
"""归一化文本用于生成去重 hash"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
t = re.sub(r"\s+", " ", str(text).strip().lower())[:600]
|
||||||
|
return re.sub(r"[\x00-\x1f]", "", t)
|
||||||
|
|
||||||
|
|
||||||
|
def content_hash(title: str, summary: str, url: str) -> str:
|
||||||
|
"""根据标题、摘要、URL 生成去重 hash,相似内容视为重复"""
|
||||||
|
raw = _normalize_for_hash(title) + "|" + _normalize_for_hash(summary) + "|" + (url or "").strip()
|
||||||
|
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_table(conn: sqlite3.Connection) -> None:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS news_content (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
content_hash TEXT NOT NULL UNIQUE,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL DEFAULT '',
|
||||||
|
source TEXT NOT NULL DEFAULT '',
|
||||||
|
published_at TEXT NOT NULL,
|
||||||
|
category TEXT NOT NULL DEFAULT 'other',
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium',
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
try:
|
||||||
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_news_content_hash ON news_content(content_hash)")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_news_content_pub ON news_content(published_at DESC)")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def exists_by_hash(conn: sqlite3.Connection, h: str) -> bool:
|
||||||
|
row = conn.execute("SELECT 1 FROM news_content WHERE content_hash = ? LIMIT 1", (h,)).fetchone()
|
||||||
|
return row is not None
|
||||||
|
|
||||||
|
|
||||||
|
def insert_news(
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
*,
|
||||||
|
title: str,
|
||||||
|
summary: str,
|
||||||
|
url: str = "",
|
||||||
|
source: str = "",
|
||||||
|
published: datetime,
|
||||||
|
category: str = "other",
|
||||||
|
severity: str = "medium",
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
插入资讯,若 content_hash 已存在则跳过(去重)
|
||||||
|
返回: 新插入的 id,或 None 表示重复跳过
|
||||||
|
"""
|
||||||
|
_ensure_table(conn)
|
||||||
|
h = content_hash(title, summary, url)
|
||||||
|
if exists_by_hash(conn, h):
|
||||||
|
return None
|
||||||
|
uid = "nc_" + hashlib.sha256(f"{h}{datetime.utcnow().isoformat()}".encode()).hexdigest()[:14]
|
||||||
|
ts = _to_utc_iso(published)
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO news_content (id, content_hash, title, summary, url, source, published_at, category, severity)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(uid, h, (title or "")[:500], (summary or "")[:2000], (url or "")[:500], (source or "")[:100], ts, category, severity),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return uid
|
||||||
|
|
||||||
|
|
||||||
|
def save_and_dedup(items: List[dict], db_path: Optional[str] = None) -> Tuple[List[dict], int]:
|
||||||
|
"""
|
||||||
|
去重后落库 news_content
|
||||||
|
items: [{"title","summary","url","published","category","severity","source"?}, ...]
|
||||||
|
返回: (通过去重的新项列表, 实际新增条数)
|
||||||
|
"""
|
||||||
|
path = db_path or DB_PATH
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return [], 0
|
||||||
|
conn = sqlite3.connect(path, timeout=10)
|
||||||
|
try:
|
||||||
|
_ensure_table(conn)
|
||||||
|
new_items: List[dict] = []
|
||||||
|
count = 0
|
||||||
|
for u in items:
|
||||||
|
title = (u.get("title") or "")[:500]
|
||||||
|
summary = (u.get("summary") or u.get("title") or "")[:2000]
|
||||||
|
url = (u.get("url") or "")[:500]
|
||||||
|
source = (u.get("source") or "")[:100]
|
||||||
|
pub = u.get("published")
|
||||||
|
if isinstance(pub, str):
|
||||||
|
try:
|
||||||
|
pub = datetime.fromisoformat(pub.replace("Z", "+00:00"))
|
||||||
|
except ValueError:
|
||||||
|
pub = datetime.now(timezone.utc)
|
||||||
|
elif pub is None:
|
||||||
|
pub = datetime.now(timezone.utc)
|
||||||
|
cat = u.get("category", "other")
|
||||||
|
sev = u.get("severity", "medium")
|
||||||
|
uid = insert_news(
|
||||||
|
conn,
|
||||||
|
title=title,
|
||||||
|
summary=summary,
|
||||||
|
url=url,
|
||||||
|
source=source,
|
||||||
|
published=pub,
|
||||||
|
category=cat,
|
||||||
|
severity=sev,
|
||||||
|
)
|
||||||
|
if uid:
|
||||||
|
count += 1
|
||||||
|
new_items.append({**u, "news_id": uid})
|
||||||
|
return new_items, count
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
42
crawler/panel_schema.py
Normal file
42
crawler/panel_schema.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
前端面板完整数据 schema,与 DB / situationData / useReplaySituation 对齐
|
||||||
|
爬虫 + AI 清洗后的数据必须符合此 schema 才能正确更新前端
|
||||||
|
"""
|
||||||
|
from typing import Any, Dict, List, Literal, Optional, Tuple
|
||||||
|
|
||||||
|
# 事件脉络
|
||||||
|
SITUATION_UPDATE_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||||
|
SITUATION_UPDATE_SEVERITIES = ("low", "medium", "high", "critical")
|
||||||
|
SUMMARY_MAX_LEN = 120
|
||||||
|
|
||||||
|
# 战损
|
||||||
|
CombatLossesRow = Dict[str, Any] # bases_destroyed, bases_damaged, personnel_killed, ...
|
||||||
|
|
||||||
|
# 时间序列(回放用)
|
||||||
|
TimeSeriesPoint = Tuple[str, int] # (ISO time, value)
|
||||||
|
|
||||||
|
# AI 可从新闻中提取的字段
|
||||||
|
EXTRACTABLE_FIELDS = {
|
||||||
|
"situation_update": ["summary", "category", "severity", "timestamp"],
|
||||||
|
"combat_losses": ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles", "drones", "missiles", "helicopters", "submarines", "tanks", "carriers", "civilian_ships", "airport_port"],
|
||||||
|
"retaliation": ["value"], # 0-100
|
||||||
|
"wall_street_trend": ["time", "value"], # 0-100
|
||||||
|
"conflict_stats": ["estimated_casualties", "estimated_strike_count"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_category(cat: str) -> str:
|
||||||
|
return cat if cat in SITUATION_UPDATE_CATEGORIES else "other"
|
||||||
|
|
||||||
|
|
||||||
|
def validate_severity(sev: str) -> str:
|
||||||
|
return sev if sev in SITUATION_UPDATE_SEVERITIES else "medium"
|
||||||
|
|
||||||
|
|
||||||
|
def validate_summary(s: str, max_len: int = SUMMARY_MAX_LEN) -> str:
|
||||||
|
import re
|
||||||
|
if not s or not isinstance(s, str):
|
||||||
|
return ""
|
||||||
|
t = re.sub(r"\s+", " ", str(s).strip())[:max_len]
|
||||||
|
return re.sub(r"[\x00-\x1f]", "", t).rstrip()
|
||||||
52
crawler/parser.py
Normal file
52
crawler/parser.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""新闻分类与严重度判定"""
|
||||||
|
import re
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
|
||||||
|
Severity = Literal["low", "medium", "high", "critical"]
|
||||||
|
|
||||||
|
# 分类关键词
|
||||||
|
CAT_DEPLOYMENT = ["deploy", "carrier", "航母", "military build", "troop", "forces"]
|
||||||
|
CAT_ALERT = ["strike", "attack", "fire", "blast", "hit", "爆炸", "袭击", "打击"]
|
||||||
|
CAT_INTEL = ["satellite", "intel", "image", "surveillance", "卫星", "情报"]
|
||||||
|
CAT_DIPLOMATIC = ["talk", "negotiation", "diplomat", "sanction", "谈判", "制裁"]
|
||||||
|
|
||||||
|
|
||||||
|
def _match(text: str, words: list[str]) -> bool:
|
||||||
|
t = (text or "").lower()
|
||||||
|
for w in words:
|
||||||
|
if w.lower() in t:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def classify(text: str) -> Category:
|
||||||
|
if _match(text, CAT_ALERT):
|
||||||
|
return "alert"
|
||||||
|
if _match(text, CAT_DEPLOYMENT):
|
||||||
|
return "deployment"
|
||||||
|
if _match(text, CAT_INTEL):
|
||||||
|
return "intel"
|
||||||
|
if _match(text, CAT_DIPLOMATIC):
|
||||||
|
return "diplomatic"
|
||||||
|
return "other"
|
||||||
|
|
||||||
|
|
||||||
|
def severity(text: str, category: Category) -> Severity:
|
||||||
|
t = (text or "").lower()
|
||||||
|
critical = [
|
||||||
|
"nuclear", "核", "strike", "attack", "killed", "dead", "casualty",
|
||||||
|
"war", "invasion", "袭击", "打击", "死亡",
|
||||||
|
]
|
||||||
|
high = [
|
||||||
|
"missile", "drone", "bomb", "explosion", "blasted", "fire",
|
||||||
|
"导弹", "无人机", "爆炸", "轰炸",
|
||||||
|
]
|
||||||
|
if _match(t, critical):
|
||||||
|
return "critical"
|
||||||
|
if _match(t, high) or category == "alert":
|
||||||
|
return "high"
|
||||||
|
if category == "deployment":
|
||||||
|
return "medium"
|
||||||
|
return "low"
|
||||||
138
crawler/parser_ai.py
Normal file
138
crawler/parser_ai.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
AI 新闻分类与严重度判定
|
||||||
|
优先 DASHSCOPE_API_KEY(通义,无需 Ollama),否则 Ollama,最后规则
|
||||||
|
设置 PARSER_AI_DISABLED=1 可只用规则(更快)
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from typing import Literal, Optional, Tuple
|
||||||
|
|
||||||
|
Category = Literal["deployment", "alert", "intel", "diplomatic", "other"]
|
||||||
|
Severity = Literal["low", "medium", "high", "critical"]
|
||||||
|
|
||||||
|
PARSER_AI_DISABLED = os.environ.get("PARSER_AI_DISABLED", "0") == "1"
|
||||||
|
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "llama3.1")
|
||||||
|
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "").strip()
|
||||||
|
|
||||||
|
_CATEGORIES = ("deployment", "alert", "intel", "diplomatic", "other")
|
||||||
|
_SEVERITIES = ("low", "medium", "high", "critical")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ai_response(text: str) -> Tuple[Category, Severity]:
|
||||||
|
"""从 AI 回复解析 category:severity"""
|
||||||
|
t = (text or "").strip().lower()
|
||||||
|
cat, sev = "other", "low"
|
||||||
|
for c in _CATEGORIES:
|
||||||
|
if c in t:
|
||||||
|
cat = c
|
||||||
|
break
|
||||||
|
for s in _SEVERITIES:
|
||||||
|
if s in t:
|
||||||
|
sev = s
|
||||||
|
break
|
||||||
|
return cat, sev # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def _call_dashscope(text: str, timeout: int = 6) -> Optional[Tuple[Category, Severity]]:
|
||||||
|
"""调用阿里云通义(DashScope)分类,无需 Ollama。需设置 DASHSCOPE_API_KEY"""
|
||||||
|
if not DASHSCOPE_API_KEY or PARSER_AI_DISABLED:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import dashscope
|
||||||
|
from http import HTTPStatus
|
||||||
|
dashscope.api_key = DASHSCOPE_API_KEY
|
||||||
|
prompt = f"""Classify this news about US-Iran/middle east (one line only):
|
||||||
|
- category: deployment|alert|intel|diplomatic|other
|
||||||
|
- severity: low|medium|high|critical
|
||||||
|
|
||||||
|
News: {text[:300]}
|
||||||
|
|
||||||
|
Reply format: category:severity (e.g. alert:high)"""
|
||||||
|
r = dashscope.Generation.call(
|
||||||
|
model="qwen-turbo",
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
result_format="message",
|
||||||
|
max_tokens=32,
|
||||||
|
)
|
||||||
|
if r.status_code != HTTPStatus.OK:
|
||||||
|
return None
|
||||||
|
out = r.output.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||||
|
return _parse_ai_response(out)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _call_ollama(text: str, timeout: int = 5) -> Optional[Tuple[Category, Severity]]:
|
||||||
|
"""调用 Ollama 本地模型。需先运行 ollama run llama3.1"""
|
||||||
|
if PARSER_AI_DISABLED:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
prompt = f"""Classify this news about US-Iran/middle east (one line only):
|
||||||
|
- category: deployment|alert|intel|diplomatic|other
|
||||||
|
- severity: low|medium|high|critical
|
||||||
|
|
||||||
|
News: {text[:300]}
|
||||||
|
|
||||||
|
Reply format: category:severity (e.g. alert:high)"""
|
||||||
|
r = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": OLLAMA_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 32},
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
return None
|
||||||
|
out = r.json().get("message", {}).get("content", "")
|
||||||
|
return _parse_ai_response(out)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _rule_classify(text: str) -> Category:
|
||||||
|
from parser import classify
|
||||||
|
return classify(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _rule_severity(text: str, category: Category) -> Severity:
|
||||||
|
from parser import severity
|
||||||
|
return severity(text, category)
|
||||||
|
|
||||||
|
|
||||||
|
def _call_ai(text: str) -> Optional[Tuple[Category, Severity]]:
|
||||||
|
"""优先通义,再 Ollama"""
|
||||||
|
if DASHSCOPE_API_KEY:
|
||||||
|
return _call_dashscope(text)
|
||||||
|
return _call_ollama(text)
|
||||||
|
|
||||||
|
|
||||||
|
def classify(text: str) -> Category:
|
||||||
|
"""分类。AI 失败时回退规则"""
|
||||||
|
res = _call_ai(text)
|
||||||
|
if res:
|
||||||
|
return res[0]
|
||||||
|
return _rule_classify(text)
|
||||||
|
|
||||||
|
|
||||||
|
def severity(text: str, category: Category) -> Severity:
|
||||||
|
"""严重度。AI 失败时回退规则"""
|
||||||
|
res = _call_ai(text)
|
||||||
|
if res:
|
||||||
|
return res[1]
|
||||||
|
return _rule_severity(text, category)
|
||||||
|
|
||||||
|
|
||||||
|
def classify_and_severity(text: str) -> Tuple[Category, Severity]:
|
||||||
|
"""一次调用返回分类和严重度(减少 AI 调用)"""
|
||||||
|
if PARSER_AI_DISABLED:
|
||||||
|
from parser import classify, severity
|
||||||
|
c = classify(text)
|
||||||
|
return c, severity(text, c)
|
||||||
|
res = _call_ai(text)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
return _rule_classify(text), _rule_severity(text, _rule_classify(text))
|
||||||
186
crawler/pipeline.py
Normal file
186
crawler/pipeline.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
统一写库流水线:抓取 → 清洗 → 去重 → 映射到前端库字段 → 更新表 → 通知
|
||||||
|
与 server/README.md 第五节「爬虫侧写库链路」一致,供 main.py 与 realtime_conflict_service 共用。
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Callable, Optional, Tuple
|
||||||
|
|
||||||
|
from config import DB_PATH, API_BASE
|
||||||
|
|
||||||
|
|
||||||
|
def _notify_api(api_base: str) -> bool:
|
||||||
|
"""调用 Node API 触发立即广播"""
|
||||||
|
try:
|
||||||
|
import urllib.request
|
||||||
|
token = os.environ.get("API_CRAWLER_TOKEN", "").strip()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{api_base.rstrip('/')}/api/crawler/notify",
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
**({"X-Crawler-Token": token} if token else {}),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||||
|
return resp.status == 200
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] notify API failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_and_merge(items: list, db_path: str) -> bool:
|
||||||
|
"""AI 从新闻全文或标题+摘要中提取精确结构化数据,合并到 combat_losses / key_location 等表。"""
|
||||||
|
if not items or not os.path.exists(db_path):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
from db_merge import merge
|
||||||
|
use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
|
||||||
|
if use_dashscope:
|
||||||
|
from extractor_dashscope import extract_from_news
|
||||||
|
limit = 10
|
||||||
|
elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
|
||||||
|
from extractor_rules import extract_from_news
|
||||||
|
limit = 25
|
||||||
|
else:
|
||||||
|
from extractor_ai import extract_from_news
|
||||||
|
limit = 10
|
||||||
|
merged_any = False
|
||||||
|
for it in items[:limit]:
|
||||||
|
# 优先用正文(article_fetcher 抓取),否则用标题+摘要,供 AI 提取精确数字
|
||||||
|
text = it.get("full_text") or ((it.get("title", "") or "") + " " + (it.get("summary", "") or ""))
|
||||||
|
if len(text.strip()) < 20:
|
||||||
|
continue
|
||||||
|
pub = it.get("published")
|
||||||
|
ts = None
|
||||||
|
if pub:
|
||||||
|
try:
|
||||||
|
if isinstance(pub, str):
|
||||||
|
pub_dt = datetime.fromisoformat(pub.replace("Z", "+00:00"))
|
||||||
|
else:
|
||||||
|
pub_dt = pub
|
||||||
|
if pub_dt.tzinfo:
|
||||||
|
pub_dt = pub_dt.astimezone(timezone.utc)
|
||||||
|
ts = pub_dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
extracted = extract_from_news(text, timestamp=ts)
|
||||||
|
if extracted and merge(extracted, db_path=db_path):
|
||||||
|
merged_any = True
|
||||||
|
return merged_any
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] AI 面板数据提取/合并: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def run_full_pipeline(
|
||||||
|
db_path: Optional[str] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
*,
|
||||||
|
translate: bool = True,
|
||||||
|
notify: bool = True,
|
||||||
|
on_notify: Optional[Callable[[], None]] = None,
|
||||||
|
) -> Tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
执行完整写库链路:
|
||||||
|
1. 爬虫抓取实时数据
|
||||||
|
2. AI 清洗(标题/摘要/分类)→ 有效数据
|
||||||
|
3. 去重(news_content content_hash)→ 仅新项进入后续
|
||||||
|
4. 有效数据映射到前端库字段(situation_update、news_content、combat_losses 等)
|
||||||
|
5. 更新数据库表;若有更新则通知后端
|
||||||
|
|
||||||
|
translate: 是否对标题/摘要做翻译(英→中)
|
||||||
|
notify: 是否在流水线末尾调用 POST /api/crawler/notify
|
||||||
|
on_notify: 若提供,在通知前调用(供 gdelt 服务做 GDELT 回填等)
|
||||||
|
|
||||||
|
返回: (本轮抓取条数, 去重后新增资讯数, 写入 situation_update 条数)
|
||||||
|
"""
|
||||||
|
path = db_path or DB_PATH
|
||||||
|
base = api_base or API_BASE
|
||||||
|
|
||||||
|
from scrapers.rss_scraper import fetch_all
|
||||||
|
from db_writer import write_updates
|
||||||
|
from news_storage import save_and_dedup
|
||||||
|
from cleaner_ai import clean_news_for_panel, ensure_category, ensure_severity
|
||||||
|
|
||||||
|
# 1. 抓取
|
||||||
|
items = fetch_all()
|
||||||
|
if not items:
|
||||||
|
return 0, 0, 0
|
||||||
|
|
||||||
|
# 可选:仅保留指定起始时间之后的条目(如 CRAWL_START_DATE=2026-02-28T00:00:00)
|
||||||
|
start_date_env = os.environ.get("CRAWL_START_DATE", "").strip()
|
||||||
|
if start_date_env:
|
||||||
|
try:
|
||||||
|
raw = start_date_env.replace("Z", "+00:00").strip()
|
||||||
|
start_dt = datetime.fromisoformat(raw)
|
||||||
|
if start_dt.tzinfo is None:
|
||||||
|
start_dt = start_dt.replace(tzinfo=timezone.utc)
|
||||||
|
else:
|
||||||
|
start_dt = start_dt.astimezone(timezone.utc)
|
||||||
|
before = len(items)
|
||||||
|
items = [it for it in items if (it.get("published") or datetime.min.replace(tzinfo=timezone.utc)) >= start_dt]
|
||||||
|
if before > len(items):
|
||||||
|
print(f" [pipeline] 按 CRAWL_START_DATE={start_date_env} 过滤后保留 {len(items)} 条(原 {before} 条)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] CRAWL_START_DATE 解析失败,忽略: {e}")
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return 0, 0, 0
|
||||||
|
n_total = len(items)
|
||||||
|
print(f" [pipeline] 抓取 {n_total} 条")
|
||||||
|
for i, it in enumerate(items[:5]):
|
||||||
|
title = (it.get("title") or it.get("summary") or "").strip()[:60]
|
||||||
|
print(f" [{i + 1}] {title}" + ("…" if len((it.get("title") or it.get("summary") or "")[:60]) >= 60 else ""))
|
||||||
|
if n_total > 5:
|
||||||
|
print(f" ... 共 {n_total} 条")
|
||||||
|
|
||||||
|
# 2. 清洗(标题/摘要/分类,符合面板 schema)
|
||||||
|
if translate:
|
||||||
|
from translate_utils import translate_to_chinese
|
||||||
|
for it in items:
|
||||||
|
raw_title = translate_to_chinese(it.get("title", "") or "")
|
||||||
|
raw_summary = translate_to_chinese(it.get("summary", "") or it.get("title", ""))
|
||||||
|
it["title"] = clean_news_for_panel(raw_title, max_len=80)
|
||||||
|
it["summary"] = clean_news_for_panel(raw_summary or raw_title, max_len=120)
|
||||||
|
else:
|
||||||
|
for it in items:
|
||||||
|
it["title"] = clean_news_for_panel(it.get("title", "") or "", max_len=80)
|
||||||
|
it["summary"] = clean_news_for_panel(it.get("summary", "") or it.get("title", ""), max_len=120)
|
||||||
|
for it in items:
|
||||||
|
it["category"] = ensure_category(it.get("category", "other"))
|
||||||
|
it["severity"] = ensure_severity(it.get("severity", "medium"))
|
||||||
|
it["source"] = it.get("source") or "rss"
|
||||||
|
|
||||||
|
# 3. 去重:落库 news_content,仅新项返回
|
||||||
|
new_items, n_news = save_and_dedup(items, db_path=path)
|
||||||
|
if new_items:
|
||||||
|
print(f" [pipeline] 去重后新增 {n_news} 条,写入事件脉络 {len(new_items)} 条")
|
||||||
|
for i, it in enumerate(new_items[:3]):
|
||||||
|
title = (it.get("title") or it.get("summary") or "").strip()[:55]
|
||||||
|
print(f" 新增 [{i + 1}] {title}" + ("…" if len((it.get("title") or it.get("summary") or "").strip()) > 55 else ""))
|
||||||
|
|
||||||
|
# 3.5 数据增强:为参与 AI 提取的条目抓取正文,便于从全文提取精确数据(伤亡、基地等)
|
||||||
|
if new_items:
|
||||||
|
try:
|
||||||
|
from article_fetcher import enrich_item_with_body
|
||||||
|
# 仅对前若干条抓取正文,避免单轮请求过多
|
||||||
|
enrich_limit = int(os.environ.get("ARTICLE_FETCH_LIMIT", "10"))
|
||||||
|
for it in new_items[:enrich_limit]:
|
||||||
|
enrich_item_with_body(it)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] 正文抓取: {e}")
|
||||||
|
|
||||||
|
# 4. 映射到前端库字段并更新表
|
||||||
|
n_panel = write_updates(new_items) if new_items else 0
|
||||||
|
if new_items:
|
||||||
|
_extract_and_merge(new_items, path)
|
||||||
|
|
||||||
|
# 5. 通知(有新增时才通知;可选:先执行外部逻辑如 GDELT 回填,再通知)
|
||||||
|
if on_notify:
|
||||||
|
on_notify()
|
||||||
|
if notify and (n_panel > 0 or n_news > 0):
|
||||||
|
_notify_api(base)
|
||||||
|
|
||||||
|
return len(items), n_news, n_panel
|
||||||
456
crawler/realtime_conflict_service.py
Normal file
456
crawler/realtime_conflict_service.py
Normal file
@@ -0,0 +1,456 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
GDELT 实时冲突抓取 + API 服务
|
||||||
|
核心数据源:GDELT Project,约 15 分钟级更新,含经纬度、事件编码、参与方、事件强度
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
# 直连外网,避免系统代理导致 ProxyError / 超时(需代理时设置 CRAWLER_USE_PROXY=1)
|
||||||
|
if os.environ.get("CRAWLER_USE_PROXY") != "1":
|
||||||
|
os.environ.setdefault("NO_PROXY", "*")
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
logging.getLogger("uvicorn").setLevel(logging.INFO)
|
||||||
|
app = FastAPI(title="GDELT Conflict Service")
|
||||||
|
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"])
|
||||||
|
|
||||||
|
# 配置
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||||||
|
API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
|
||||||
|
QUERY = os.environ.get("GDELT_QUERY", "United States Iran military")
|
||||||
|
MAX_RECORDS = int(os.environ.get("GDELT_MAX_RECORDS", "30"))
|
||||||
|
FETCH_INTERVAL_SEC = int(os.environ.get("FETCH_INTERVAL_SEC", "60"))
|
||||||
|
RSS_INTERVAL_SEC = int(os.environ.get("RSS_INTERVAL_SEC", "60")) # 每分钟抓取世界主流媒体
|
||||||
|
# 时间范围:1h=1小时 1d=1天 1week=1周;不设则默认 3 个月(易返回旧文)
|
||||||
|
GDELT_TIMESPAN = os.environ.get("GDELT_TIMESPAN", "1d")
|
||||||
|
# 设为 1 则跳过 GDELT,仅用 RSS 新闻作为事件脉络(GDELT 国外可能无法访问)
|
||||||
|
GDELT_DISABLED = os.environ.get("GDELT_DISABLED", "0") == "1"
|
||||||
|
|
||||||
|
# 伊朗攻击源(无经纬度时默认)
|
||||||
|
IRAN_COORD = [51.3890, 35.6892] # Tehran [lng, lat]
|
||||||
|
|
||||||
|
# 请求直连,不经过系统代理(避免 ProxyError / 代理超时)
|
||||||
|
_REQ_KW = {"timeout": 15, "headers": {"User-Agent": "US-Iran-Dashboard/1.0"}}
|
||||||
|
if os.environ.get("CRAWLER_USE_PROXY") != "1":
|
||||||
|
_REQ_KW["proxies"] = {"http": None, "https": None}
|
||||||
|
|
||||||
|
EVENT_CACHE: List[dict] = []
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================
|
||||||
|
# 冲突强度评分 (1–10)
|
||||||
|
# ==========================
|
||||||
|
def calculate_impact_score(title: str) -> int:
|
||||||
|
score = 1
|
||||||
|
t = (title or "").lower()
|
||||||
|
if "missile" in t or "导弹" in t:
|
||||||
|
score += 3
|
||||||
|
if "strike" in t or "袭击" in t or "打击" in t:
|
||||||
|
score += 2
|
||||||
|
if "killed" in t or "death" in t or "casualt" in t or "死亡" in t or "伤亡" in t:
|
||||||
|
score += 4
|
||||||
|
if "troops" in t or "soldier" in t or "士兵" in t or "军人" in t:
|
||||||
|
score += 2
|
||||||
|
if "attack" in t or "attacked" in t or "攻击" in t:
|
||||||
|
score += 3
|
||||||
|
if "nuclear" in t or "核" in t:
|
||||||
|
score += 4
|
||||||
|
if "explosion" in t or "blast" in t or "bomb" in t or "爆炸" in t:
|
||||||
|
score += 2
|
||||||
|
return min(score, 10)
|
||||||
|
|
||||||
|
|
||||||
|
# 根据 severity 映射到 impact_score
|
||||||
|
def _severity_to_score(sev: str) -> int:
|
||||||
|
m = {"critical": 9, "high": 7, "medium": 5, "low": 2}
|
||||||
|
return m.get((sev or "").lower(), 5)
|
||||||
|
|
||||||
|
|
||||||
|
# 根据文本推断坐标 [lng, lat],用于 GDELT 禁用时 RSS→gdelt_events
|
||||||
|
_LOC_COORDS = [
|
||||||
|
(["阿克罗蒂里", "akrotiri", "塞浦路斯", "cyprus"], (32.98, 34.58)),
|
||||||
|
(["巴格拉姆", "bagram", "阿富汗", "afghanistan"], (69.26, 34.95)),
|
||||||
|
(["巴格达", "baghdad", "伊拉克", "iraq"], (44.37, 33.31)),
|
||||||
|
(["贝鲁特", "beirut", "黎巴嫩", "lebanon"], (35.49, 33.89)),
|
||||||
|
(["耶路撒冷", "jerusalem", "特拉维夫", "tel aviv", "以色列", "israel"], (35.21, 31.77)),
|
||||||
|
(["阿巴斯港", "bandar abbas", "霍尔木兹", "hormuz"], (56.27, 27.18)),
|
||||||
|
(["米纳布", "minab"], (57.08, 27.13)),
|
||||||
|
(["德黑兰", "tehran", "伊朗", "iran"], (51.389, 35.689)),
|
||||||
|
(["大马士革", "damascus", "叙利亚", "syria"], (36.28, 33.50)),
|
||||||
|
(["迪拜", "dubai", "阿联酋", "uae"], (55.27, 25.20)),
|
||||||
|
(["沙特", "saudi"], (46.73, 24.71)),
|
||||||
|
(["巴基斯坦", "pakistan"], (73.06, 33.72)),
|
||||||
|
(["奥斯汀", "austin"], (-97.74, 30.27)),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_coords(text: str) -> tuple:
|
||||||
|
t = (text or "").lower()
|
||||||
|
for kws, (lng, lat) in _LOC_COORDS:
|
||||||
|
for k in kws:
|
||||||
|
if k in t:
|
||||||
|
return (lng, lat)
|
||||||
|
return (IRAN_COORD[0], IRAN_COORD[1])
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================
|
||||||
|
# 获取 GDELT 实时事件
|
||||||
|
# ==========================
|
||||||
|
def _parse_article(article: dict) -> Optional[dict]:
|
||||||
|
title_raw = article.get("title") or article.get("seendate") or ""
|
||||||
|
if not title_raw:
|
||||||
|
return None
|
||||||
|
from translate_utils import translate_to_chinese
|
||||||
|
from cleaner_ai import clean_news_for_panel
|
||||||
|
title = translate_to_chinese(str(title_raw)[:500])
|
||||||
|
title = clean_news_for_panel(title, max_len=150)
|
||||||
|
url = article.get("url") or article.get("socialimage") or ""
|
||||||
|
seendate = article.get("seendate") or datetime.utcnow().isoformat()
|
||||||
|
lat = article.get("lat")
|
||||||
|
lng = article.get("lng")
|
||||||
|
# 无经纬度时使用伊朗坐标(攻击源)
|
||||||
|
if lat is None or lng is None:
|
||||||
|
lat, lng = IRAN_COORD[1], IRAN_COORD[0]
|
||||||
|
try:
|
||||||
|
lat, lng = float(lat), float(lng)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
lat, lng = IRAN_COORD[1], IRAN_COORD[0]
|
||||||
|
impact = calculate_impact_score(title_raw)
|
||||||
|
event_id = hashlib.sha256(f"{url}{seendate}".encode()).hexdigest()[:24]
|
||||||
|
return {
|
||||||
|
"event_id": event_id,
|
||||||
|
"event_time": seendate,
|
||||||
|
"title": title[:500],
|
||||||
|
"lat": lat,
|
||||||
|
"lng": lng,
|
||||||
|
"impact_score": impact,
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_gdelt_events() -> None:
|
||||||
|
if GDELT_DISABLED:
|
||||||
|
return
|
||||||
|
url = (
|
||||||
|
"https://api.gdeltproject.org/api/v2/doc/doc"
|
||||||
|
f"?query={QUERY}"
|
||||||
|
"&mode=ArtList"
|
||||||
|
"&format=json"
|
||||||
|
f"&maxrecords={MAX_RECORDS}"
|
||||||
|
f"×pan={GDELT_TIMESPAN}"
|
||||||
|
"&sort=datedesc"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, **_REQ_KW)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
articles = data.get("articles", data) if isinstance(data, dict) else (data if isinstance(data, list) else [])
|
||||||
|
if not isinstance(articles, list):
|
||||||
|
articles = []
|
||||||
|
new_events = []
|
||||||
|
for a in articles:
|
||||||
|
ev = _parse_article(a) if isinstance(a, dict) else None
|
||||||
|
if ev:
|
||||||
|
new_events.append(ev)
|
||||||
|
# 按 event_time 排序,最新在前
|
||||||
|
new_events.sort(key=lambda e: e.get("event_time", ""), reverse=True)
|
||||||
|
global EVENT_CACHE
|
||||||
|
EVENT_CACHE = new_events
|
||||||
|
# 写入 SQLite 并通知 Node
|
||||||
|
_write_to_db(new_events)
|
||||||
|
_notify_node()
|
||||||
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] GDELT 更新 {len(new_events)} 条事件")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_table(conn: sqlite3.Connection) -> None:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS gdelt_events (
|
||||||
|
event_id TEXT PRIMARY KEY,
|
||||||
|
event_time TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
lat REAL NOT NULL,
|
||||||
|
lng REAL NOT NULL,
|
||||||
|
impact_score INTEGER NOT NULL,
|
||||||
|
url TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS conflict_stats (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
total_events INTEGER NOT NULL,
|
||||||
|
high_impact_events INTEGER NOT NULL,
|
||||||
|
estimated_casualties INTEGER NOT NULL,
|
||||||
|
estimated_strike_count INTEGER NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _write_to_db(events: List[dict]) -> None:
|
||||||
|
if not os.path.exists(DB_PATH):
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||||
|
try:
|
||||||
|
_ensure_table(conn)
|
||||||
|
for e in events:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO gdelt_events (event_id, event_time, title, lat, lng, impact_score, url) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||||
|
(
|
||||||
|
e["event_id"],
|
||||||
|
e.get("event_time", ""),
|
||||||
|
e.get("title", ""),
|
||||||
|
e.get("lat", 0),
|
||||||
|
e.get("lng", 0),
|
||||||
|
e.get("impact_score", 1),
|
||||||
|
e.get("url", ""),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# 战损统计模型(展示用)
|
||||||
|
high = sum(1 for x in events if x.get("impact_score", 0) >= 7)
|
||||||
|
strikes = sum(1 for x in events if "strike" in (x.get("title") or "").lower() or "attack" in (x.get("title") or "").lower())
|
||||||
|
casualties = min(5000, high * 80 + len(events) * 10) # 估算
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO conflict_stats (id, total_events, high_impact_events, estimated_casualties, estimated_strike_count, updated_at) VALUES (1, ?, ?, ?, ?, ?)",
|
||||||
|
(len(events), high, casualties, strikes, datetime.utcnow().isoformat()),
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)",
|
||||||
|
(datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.000Z"),),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"写入 DB 失败: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _notify_node() -> None:
|
||||||
|
try:
|
||||||
|
headers = {}
|
||||||
|
token = os.environ.get("API_CRAWLER_TOKEN", "").strip()
|
||||||
|
if token:
|
||||||
|
headers["X-Crawler-Token"] = token
|
||||||
|
r = requests.post(
|
||||||
|
f"{API_BASE}/api/crawler/notify",
|
||||||
|
timeout=5,
|
||||||
|
headers=headers,
|
||||||
|
proxies={"http": None, "https": None},
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
print(" [warn] notify API 失败")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] notify API: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _rss_to_gdelt_fallback() -> None:
|
||||||
|
"""GDELT 禁用时,将 situation_update 同步到 gdelt_events,使地图有冲突点"""
|
||||||
|
if not GDELT_DISABLED or not os.path.exists(DB_PATH):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 50"
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
events = []
|
||||||
|
for r in rows:
|
||||||
|
uid, ts, cat, summary, sev = r
|
||||||
|
lng, lat = _infer_coords((summary or "")[:300])
|
||||||
|
impact = _severity_to_score(sev)
|
||||||
|
events.append({
|
||||||
|
"event_id": f"rss_{uid}",
|
||||||
|
"event_time": ts,
|
||||||
|
"title": (summary or "")[:500],
|
||||||
|
"lat": lat,
|
||||||
|
"lng": lng,
|
||||||
|
"impact_score": impact,
|
||||||
|
"url": "",
|
||||||
|
})
|
||||||
|
if events:
|
||||||
|
global EVENT_CACHE
|
||||||
|
EVENT_CACHE = events
|
||||||
|
_write_to_db(events)
|
||||||
|
_notify_node()
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] RSS→gdelt fallback: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================
|
||||||
|
# RSS 新闻抓取:使用统一流水线(抓取 → 清洗 → 去重 → 映射 → 写表 → 通知)
|
||||||
|
# ==========================
|
||||||
|
LAST_FETCH = {"items": 0, "inserted": 0, "error": None}
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_news() -> None:
|
||||||
|
"""执行完整写库流水线;GDELT 禁用时用 RSS 回填 gdelt_events,再通知 Node。"""
|
||||||
|
try:
|
||||||
|
from pipeline import run_full_pipeline
|
||||||
|
LAST_FETCH["error"] = None
|
||||||
|
n_fetched, n_news, n_panel = run_full_pipeline(
|
||||||
|
db_path=DB_PATH,
|
||||||
|
api_base=API_BASE,
|
||||||
|
translate=True,
|
||||||
|
notify=False,
|
||||||
|
)
|
||||||
|
LAST_FETCH["items"] = n_fetched
|
||||||
|
LAST_FETCH["inserted"] = n_news
|
||||||
|
if GDELT_DISABLED:
|
||||||
|
_rss_to_gdelt_fallback()
|
||||||
|
_notify_node()
|
||||||
|
ts = datetime.now().strftime("%H:%M:%S")
|
||||||
|
print(f"[{ts}] RSS 抓取 {n_fetched} 条,去重后新增 {n_news} 条资讯,写入事件脉络 {n_panel} 条")
|
||||||
|
if n_fetched == 0:
|
||||||
|
print(f"[{ts}] (0 条:检查网络、RSS 源或 KEYWORDS 过滤)")
|
||||||
|
except Exception as e:
|
||||||
|
LAST_FETCH["error"] = str(e)
|
||||||
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================
|
||||||
|
# 定时任务(asyncio 后台任务,避免 APScheduler executor 关闭竞态)
|
||||||
|
# ==========================
|
||||||
|
_bg_task: Optional[asyncio.Task] = None
|
||||||
|
|
||||||
|
|
||||||
|
async def _periodic_fetch() -> None:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await loop.run_in_executor(None, fetch_news)
|
||||||
|
await loop.run_in_executor(None, fetch_gdelt_events)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [warn] 定时抓取: {e}")
|
||||||
|
await asyncio.sleep(min(RSS_INTERVAL_SEC, FETCH_INTERVAL_SEC))
|
||||||
|
|
||||||
|
|
||||||
|
# ==========================
|
||||||
|
# API 接口
|
||||||
|
# ==========================
|
||||||
|
@app.post("/crawler/backfill")
|
||||||
|
def crawler_backfill():
|
||||||
|
"""从 situation_update 重新解析并合并战损/报复等数据,用于修复历史数据未提取的情况"""
|
||||||
|
if not os.path.exists(DB_PATH):
|
||||||
|
return {"ok": False, "error": "db not found"}
|
||||||
|
try:
|
||||||
|
from db_merge import merge
|
||||||
|
use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip())
|
||||||
|
if use_dashscope:
|
||||||
|
from extractor_dashscope import extract_from_news
|
||||||
|
elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1":
|
||||||
|
from extractor_rules import extract_from_news
|
||||||
|
else:
|
||||||
|
from extractor_ai import extract_from_news
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=10)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, timestamp, category, summary FROM situation_update ORDER BY timestamp DESC LIMIT 50"
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
merged = 0
|
||||||
|
for r in rows:
|
||||||
|
uid, ts, cat, summary = r
|
||||||
|
text = ((cat or "") + " " + (summary or "")).strip()
|
||||||
|
if len(text) < 20:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
extracted = extract_from_news(text, timestamp=ts)
|
||||||
|
if extracted and merge(extracted, db_path=DB_PATH):
|
||||||
|
merged += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_notify_node()
|
||||||
|
return {"ok": True, "processed": len(rows), "merged": merged}
|
||||||
|
except Exception as e:
|
||||||
|
return {"ok": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/crawler/status")
|
||||||
|
def crawler_status():
|
||||||
|
"""爬虫状态:用于排查数据更新链路"""
|
||||||
|
import os
|
||||||
|
db_ok = os.path.exists(DB_PATH)
|
||||||
|
total = 0
|
||||||
|
if db_ok:
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=3)
|
||||||
|
total = conn.execute("SELECT COUNT(*) FROM situation_update").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {
|
||||||
|
"db_path": DB_PATH,
|
||||||
|
"db_exists": db_ok,
|
||||||
|
"situation_update_count": total,
|
||||||
|
"last_fetch_items": LAST_FETCH.get("items", 0),
|
||||||
|
"last_fetch_inserted": LAST_FETCH.get("inserted", 0),
|
||||||
|
"last_fetch_error": LAST_FETCH.get("error"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/events")
|
||||||
|
def get_events():
|
||||||
|
return {
|
||||||
|
"updated_at": datetime.utcnow().isoformat(),
|
||||||
|
"count": len(EVENT_CACHE),
|
||||||
|
"events": EVENT_CACHE,
|
||||||
|
"conflict_stats": _get_conflict_stats(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_conflict_stats() -> dict:
|
||||||
|
if not os.path.exists(DB_PATH):
|
||||||
|
return {"total_events": 0, "high_impact_events": 0, "estimated_casualties": 0, "estimated_strike_count": 0}
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(DB_PATH, timeout=5)
|
||||||
|
row = conn.execute("SELECT total_events, high_impact_events, estimated_casualties, estimated_strike_count FROM conflict_stats WHERE id = 1").fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
return {
|
||||||
|
"total_events": row[0],
|
||||||
|
"high_impact_events": row[1],
|
||||||
|
"estimated_casualties": row[2],
|
||||||
|
"estimated_strike_count": row[3],
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {"total_events": 0, "high_impact_events": 0, "estimated_casualties": 0, "estimated_strike_count": 0}
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup():
|
||||||
|
"""仅启动后台定时任务,不阻塞首次抓取,避免启动超时(验证脚本 /crawler/status 可尽快就绪)"""
|
||||||
|
global _bg_task
|
||||||
|
_bg_task = asyncio.create_task(_periodic_fetch())
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown():
|
||||||
|
global _bg_task
|
||||||
|
if _bg_task and not _bg_task.done():
|
||||||
|
_bg_task.cancel()
|
||||||
|
try:
|
||||||
|
await _bg_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
8
crawler/requirements.txt
Normal file
8
crawler/requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
requests>=2.31.0
|
||||||
|
feedparser>=6.0.0
|
||||||
|
beautifulsoup4>=4.12.0
|
||||||
|
pytest>=7.0.0
|
||||||
|
fastapi>=0.109.0
|
||||||
|
uvicorn>=0.27.0
|
||||||
|
deep-translator>=1.11.0
|
||||||
|
dashscope>=1.20.0
|
||||||
51
crawler/run_once.py
Normal file
51
crawler/run_once.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
单独运行爬虫一轮:抓取 → 清洗 → 去重 → 写库 → 通知 Node(可选)
|
||||||
|
终端直接输出抓取条数及内容摘要,便于排查。
|
||||||
|
用法(项目根或 crawler 目录):
|
||||||
|
python run_once.py
|
||||||
|
python -c "import run_once; run_once.main()"
|
||||||
|
或: npm run crawler:once
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# 保证可导入同目录模块
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from config import DB_PATH, API_BASE
|
||||||
|
from pipeline import run_full_pipeline
|
||||||
|
|
||||||
|
crawl_start = os.environ.get("CRAWL_START_DATE", "").strip()
|
||||||
|
print("========================================")
|
||||||
|
print("爬虫单次运行(RSS → 清洗 → 去重 → 写库)")
|
||||||
|
print("DB:", DB_PATH)
|
||||||
|
print("API_BASE:", API_BASE)
|
||||||
|
if crawl_start:
|
||||||
|
print("时间范围: 仅保留 CRAWL_START_DATE 之后:", crawl_start)
|
||||||
|
print("========================================\n")
|
||||||
|
|
||||||
|
n_fetched, n_news, n_panel = run_full_pipeline(
|
||||||
|
db_path=DB_PATH,
|
||||||
|
api_base=API_BASE,
|
||||||
|
translate=True,
|
||||||
|
notify=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("")
|
||||||
|
print("----------------------------------------")
|
||||||
|
print("本轮结果:")
|
||||||
|
print(f" 抓取: {n_fetched} 条")
|
||||||
|
print(f" 去重后新增资讯: {n_news} 条")
|
||||||
|
print(f" 写入事件脉络: {n_panel} 条")
|
||||||
|
if n_fetched == 0:
|
||||||
|
print(" (0 条:检查网络、RSS 源或 config.KEYWORDS 过滤)")
|
||||||
|
print("----------------------------------------")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
1
crawler/scrapers/__init__.py
Normal file
1
crawler/scrapers/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
BIN
crawler/scrapers/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
crawler/scrapers/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/scrapers/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
crawler/scrapers/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
crawler/scrapers/__pycache__/rss_scraper.cpython-311.pyc
Normal file
BIN
crawler/scrapers/__pycache__/rss_scraper.cpython-311.pyc
Normal file
Binary file not shown.
BIN
crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc
Normal file
BIN
crawler/scrapers/__pycache__/rss_scraper.cpython-39.pyc
Normal file
Binary file not shown.
94
crawler/scrapers/rss_scraper.py
Normal file
94
crawler/scrapers/rss_scraper.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""RSS 抓取:按源独立超时与错误隔离,单源失败不影响其他源"""
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
|
||||||
|
from config import KEYWORDS, FEED_TIMEOUT, get_feed_sources
|
||||||
|
from parser_ai import classify_and_severity
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_date(entry) -> datetime:
|
||||||
|
for attr in ("published_parsed", "updated_parsed"):
|
||||||
|
val = getattr(entry, attr, None)
|
||||||
|
if val:
|
||||||
|
try:
|
||||||
|
return datetime(*val[:6], tzinfo=timezone.utc)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(s: str) -> str:
|
||||||
|
return re.sub(r"<[^>]+>", "", s) if s else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_keywords(text: str) -> bool:
|
||||||
|
t = (text or "").lower()
|
||||||
|
for k in KEYWORDS:
|
||||||
|
if k.lower() in t:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_one_feed(name: str, url: str, timeout: int) -> list[dict]:
|
||||||
|
"""抓取单个 RSS 源,超时或异常返回空列表。不负责去重。"""
|
||||||
|
old_timeout = socket.getdefaulttimeout()
|
||||||
|
socket.setdefaulttimeout(timeout)
|
||||||
|
try:
|
||||||
|
feed = feedparser.parse(
|
||||||
|
url,
|
||||||
|
request_headers={"User-Agent": "US-Iran-Dashboard/1.0"},
|
||||||
|
agent="US-Iran-Dashboard/1.0",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [rss] {name} error: {e}")
|
||||||
|
return []
|
||||||
|
finally:
|
||||||
|
socket.setdefaulttimeout(old_timeout)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for entry in feed.entries:
|
||||||
|
title = getattr(entry, "title", "") or ""
|
||||||
|
raw_summary = getattr(entry, "summary", "") or getattr(entry, "description", "") or ""
|
||||||
|
summary = _strip_html(raw_summary)
|
||||||
|
link = getattr(entry, "link", "") or ""
|
||||||
|
text = f"{title} {summary}"
|
||||||
|
if not _matches_keywords(text):
|
||||||
|
continue
|
||||||
|
published = _parse_date(entry)
|
||||||
|
cat, sev = classify_and_severity(text)
|
||||||
|
out.append({
|
||||||
|
"title": title,
|
||||||
|
"summary": summary[:400] if summary else title,
|
||||||
|
"url": link,
|
||||||
|
"published": published,
|
||||||
|
"category": cat,
|
||||||
|
"severity": sev,
|
||||||
|
"source": name,
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_all() -> list[dict]:
|
||||||
|
"""抓取所有配置的 RSS 源,按源超时与隔离错误,全局去重后返回。"""
|
||||||
|
sources = get_feed_sources()
|
||||||
|
if not sources:
|
||||||
|
return []
|
||||||
|
|
||||||
|
items: list[dict] = []
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
|
||||||
|
for name, url in sources:
|
||||||
|
batch = _fetch_one_feed(name, url, FEED_TIMEOUT)
|
||||||
|
for item in batch:
|
||||||
|
key = (item["title"][:80], item["url"])
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
# 写入 DB 的 schema 不包含 source,可后续扩展
|
||||||
|
items.append({k: v for k, v in item.items() if k != "source"})
|
||||||
|
|
||||||
|
return items
|
||||||
1
crawler/tests/__init__.py
Normal file
1
crawler/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# crawler tests
|
||||||
BIN
crawler/tests/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
crawler/tests/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
198
crawler/tests/test_extraction.py
Normal file
198
crawler/tests/test_extraction.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
爬虫数据清洗与字段映射测试
|
||||||
|
验证 extractor_rules、extractor_dashscope、db_merge 的正确性
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# 确保 crawler 在 path 中
|
||||||
|
ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
if str(ROOT) not in __import__("sys").path:
|
||||||
|
__import__("sys").path.insert(0, str(ROOT))
|
||||||
|
|
||||||
|
from extractor_rules import extract_from_news as extract_rules
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractorRules:
|
||||||
|
"""规则提取器单元测试"""
|
||||||
|
|
||||||
|
def test_trump_1000_targets_no_bases(self):
|
||||||
|
"""特朗普说伊朗有1000个军事目标遭到袭击 -> 不应提取 bases_destroyed/bases_damaged"""
|
||||||
|
text = "特朗普说伊朗有1000个军事目标遭到袭击,美国已做好进一步打击准备"
|
||||||
|
out = extract_rules(text)
|
||||||
|
delta = out.get("combat_losses_delta", {})
|
||||||
|
for side in ("us", "iran"):
|
||||||
|
if side in delta:
|
||||||
|
assert delta[side].get("bases_destroyed") is None, f"{side} bases_destroyed 不应被提取"
|
||||||
|
assert delta[side].get("bases_damaged") is None, f"{side} bases_damaged 不应被提取"
|
||||||
|
|
||||||
|
def test_base_damaged_when_explicit(self):
|
||||||
|
"""阿萨德基地遭袭 -> 应提取 key_location_updates,且 combat_losses 若有则正确"""
|
||||||
|
text = "阿萨德空军基地遭袭,损失严重"
|
||||||
|
out = extract_rules(text)
|
||||||
|
# 规则会触发 key_location_updates(因为 base_attacked 且匹配 阿萨德)
|
||||||
|
assert "key_location_updates" in out
|
||||||
|
kl = out["key_location_updates"]
|
||||||
|
assert len(kl) >= 1
|
||||||
|
assert any(u.get("side") == "us" and "阿萨德" in (u.get("name_keywords") or "") for u in kl)
|
||||||
|
|
||||||
|
def test_us_personnel_killed(self):
|
||||||
|
"""3名美军阵亡 -> personnel_killed=3"""
|
||||||
|
text = "据报道,3名美军阵亡,另有5人受伤"
|
||||||
|
out = extract_rules(text)
|
||||||
|
assert "combat_losses_delta" in out
|
||||||
|
us = out["combat_losses_delta"].get("us", {})
|
||||||
|
assert us.get("personnel_killed") == 3
|
||||||
|
assert us.get("personnel_wounded") == 5
|
||||||
|
|
||||||
|
def test_iran_personnel_killed(self):
|
||||||
|
"""10名伊朗士兵死亡"""
|
||||||
|
text = "伊朗方面称10名伊朗士兵死亡"
|
||||||
|
out = extract_rules(text)
|
||||||
|
iran = out.get("combat_losses_delta", {}).get("iran", {})
|
||||||
|
assert iran.get("personnel_killed") == 10
|
||||||
|
|
||||||
|
def test_civilian_us_context(self):
|
||||||
|
"""美军空袭造成50名平民伤亡 -> loss_us"""
|
||||||
|
text = "美军空袭造成50名平民伤亡"
|
||||||
|
out = extract_rules(text)
|
||||||
|
us = out.get("combat_losses_delta", {}).get("us", {})
|
||||||
|
assert us.get("civilian_killed") == 50
|
||||||
|
|
||||||
|
def test_civilian_iran_context(self):
|
||||||
|
"""伊朗空袭造成伊拉克平民50人伤亡 -> loss_ir"""
|
||||||
|
text = "伊朗空袭造成伊拉克平民50人伤亡"
|
||||||
|
out = extract_rules(text)
|
||||||
|
iran = out.get("combat_losses_delta", {}).get("iran", {})
|
||||||
|
assert iran.get("civilian_killed") == 50
|
||||||
|
|
||||||
|
def test_drone_attribution_iran(self):
|
||||||
|
"""美军击落伊朗10架无人机 -> iran drones=10"""
|
||||||
|
text = "美军击落伊朗10架无人机"
|
||||||
|
out = extract_rules(text)
|
||||||
|
iran = out.get("combat_losses_delta", {}).get("iran", {})
|
||||||
|
assert iran.get("drones") == 10
|
||||||
|
|
||||||
|
def test_empty_or_short_text(self):
|
||||||
|
"""短文本或无内容 -> 无 combat_losses"""
|
||||||
|
assert extract_rules("") == {} or "combat_losses_delta" not in extract_rules("")
|
||||||
|
assert "combat_losses_delta" not in extract_rules("abc") or not extract_rules("abc").get("combat_losses_delta")
|
||||||
|
|
||||||
|
|
||||||
|
class TestDbMerge:
|
||||||
|
"""db_merge 字段映射与增量逻辑测试"""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_db(self):
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||||
|
path = f.name
|
||||||
|
yield path
|
||||||
|
try:
|
||||||
|
os.unlink(path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_merge_combat_losses_delta(self, temp_db):
|
||||||
|
"""merge 正确将 combat_losses_delta 叠加到 DB"""
|
||||||
|
from db_merge import merge
|
||||||
|
|
||||||
|
merge({"combat_losses_delta": {"us": {"personnel_killed": 3, "personnel_wounded": 2}}}, db_path=temp_db)
|
||||||
|
merge({"combat_losses_delta": {"us": {"personnel_killed": 2}}}, db_path=temp_db)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(temp_db)
|
||||||
|
row = conn.execute("SELECT personnel_killed, personnel_wounded FROM combat_losses WHERE side='us'").fetchone()
|
||||||
|
conn.close()
|
||||||
|
assert row[0] == 5
|
||||||
|
assert row[1] == 2
|
||||||
|
|
||||||
|
def test_merge_all_combat_fields(self, temp_db):
|
||||||
|
"""merge 正确映射所有 combat_losses 字段"""
|
||||||
|
from db_merge import merge
|
||||||
|
|
||||||
|
delta = {
|
||||||
|
"personnel_killed": 1,
|
||||||
|
"personnel_wounded": 2,
|
||||||
|
"civilian_killed": 3,
|
||||||
|
"civilian_wounded": 4,
|
||||||
|
"bases_destroyed": 1,
|
||||||
|
"bases_damaged": 2,
|
||||||
|
"aircraft": 3,
|
||||||
|
"warships": 4,
|
||||||
|
"armor": 5,
|
||||||
|
"vehicles": 6,
|
||||||
|
"drones": 7,
|
||||||
|
"missiles": 8,
|
||||||
|
"helicopters": 9,
|
||||||
|
"submarines": 10,
|
||||||
|
}
|
||||||
|
merge({"combat_losses_delta": {"iran": delta}}, db_path=temp_db)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(temp_db)
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT personnel_killed, personnel_wounded, civilian_killed, civilian_wounded,
|
||||||
|
bases_destroyed, bases_damaged, aircraft, warships, armor, vehicles,
|
||||||
|
drones, missiles, helicopters, submarines FROM combat_losses WHERE side='iran'"""
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
assert row == (1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
|
||||||
|
|
||||||
|
def test_merge_key_location_requires_table(self, temp_db):
|
||||||
|
"""key_location_updates 需要 key_location 表中有行才能更新"""
|
||||||
|
from db_merge import merge
|
||||||
|
|
||||||
|
conn = sqlite3.connect(temp_db)
|
||||||
|
conn.execute(
|
||||||
|
"""CREATE TABLE IF NOT EXISTS key_location (id INTEGER PRIMARY KEY, side TEXT, name TEXT, lat REAL, lng REAL, type TEXT, region TEXT, status TEXT, damage_level INTEGER)"""
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO key_location (side, name, lat, lng, type, region, status, damage_level) VALUES ('us', '阿萨德空军基地', 33.0, 43.0, 'Base', 'IRQ', 'operational', 0)"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
merge(
|
||||||
|
{"key_location_updates": [{"name_keywords": "阿萨德|asad", "side": "us", "status": "attacked", "damage_level": 2}]},
|
||||||
|
db_path=temp_db,
|
||||||
|
)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(temp_db)
|
||||||
|
row = conn.execute("SELECT status, damage_level FROM key_location WHERE name LIKE '%阿萨德%'").fetchone()
|
||||||
|
conn.close()
|
||||||
|
assert row[0] == "attacked"
|
||||||
|
assert row[1] == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestEndToEndTrumpExample:
|
||||||
|
"""端到端:特朗普 1000 军事目标案例"""
|
||||||
|
|
||||||
|
def test_full_pipeline_trump_no_bases(self, tmp_path):
|
||||||
|
"""完整流程:规则提取 + merge,特朗普案例不应增加 bases"""
|
||||||
|
from db_merge import merge
|
||||||
|
|
||||||
|
db_path = str(tmp_path / "test.db")
|
||||||
|
(tmp_path / "test.db").touch() # 确保文件存在,merge 才会执行
|
||||||
|
merge({"combat_losses_delta": {"us": {"bases_destroyed": 0, "bases_damaged": 0}, "iran": {"bases_destroyed": 0, "bases_damaged": 0}}}, db_path=db_path)
|
||||||
|
|
||||||
|
text = "特朗普说伊朗有1000个军事目标遭到袭击"
|
||||||
|
out = extract_rules(text)
|
||||||
|
# 规则提取不应包含 bases
|
||||||
|
assert "combat_losses_delta" not in out or (
|
||||||
|
"iran" not in out.get("combat_losses_delta", {})
|
||||||
|
or out["combat_losses_delta"].get("iran", {}).get("bases_destroyed") is None
|
||||||
|
and out["combat_losses_delta"].get("iran", {}).get("bases_damaged") is None
|
||||||
|
)
|
||||||
|
if "combat_losses_delta" in out:
|
||||||
|
merge(out, db_path=db_path)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
iran = conn.execute("SELECT bases_destroyed, bases_damaged FROM combat_losses WHERE side='iran'").fetchone()
|
||||||
|
conn.close()
|
||||||
|
# 若提取器没输出 bases,merge 不会改;若有错误输出则需要为 0
|
||||||
|
if iran:
|
||||||
|
assert iran[0] == 0
|
||||||
|
assert iran[1] == 0
|
||||||
38
crawler/translate_utils.py
Normal file
38
crawler/translate_utils.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""英译中,入库前统一翻译"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _is_mostly_chinese(text: str) -> bool:
|
||||||
|
if not text or len(text.strip()) < 2:
|
||||||
|
return False
|
||||||
|
chinese = len(re.findall(r"[\u4e00-\u9fff]", text))
|
||||||
|
return chinese / max(len(text), 1) > 0.3
|
||||||
|
|
||||||
|
|
||||||
|
def translate_to_chinese(text: str) -> str:
|
||||||
|
"""将文本翻译成中文,失败或已是中文则返回原文。Google 失败时尝试 MyMemory。"""
|
||||||
|
if not text or not text.strip():
|
||||||
|
return text
|
||||||
|
if os.environ.get("TRANSLATE_DISABLED", "0") == "1":
|
||||||
|
return text
|
||||||
|
s = str(text).strip()
|
||||||
|
if len(s) > 2000:
|
||||||
|
s = s[:2000]
|
||||||
|
if _is_mostly_chinese(s):
|
||||||
|
return text
|
||||||
|
for translator in ["google", "mymemory"]:
|
||||||
|
try:
|
||||||
|
if translator == "google":
|
||||||
|
from deep_translator import GoogleTranslator
|
||||||
|
out = GoogleTranslator(source="auto", target="zh-CN").translate(s)
|
||||||
|
else:
|
||||||
|
from deep_translator import MyMemoryTranslator
|
||||||
|
out = MyMemoryTranslator(source="auto", target="zh-CN").translate(s)
|
||||||
|
if out and out.strip() and out != s:
|
||||||
|
return out
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return text
|
||||||
13
docker-compose.dev.yml
Normal file
13
docker-compose.dev.yml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# 开发模式:挂载源码 + 热重载,代码更新后无需重新 build
|
||||||
|
# 使用: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||||
|
# 或: docker compose --profile dev up -d (需在 dev 服务加 profiles)
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
volumes:
|
||||||
|
- ./server:/app/server:ro
|
||||||
|
command: ["node", "--watch", "server/index.js"]
|
||||||
|
|
||||||
|
crawler:
|
||||||
|
volumes:
|
||||||
|
- ./crawler:/app
|
||||||
|
command: ["uvicorn", "realtime_conflict_service:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||||
35
docker-compose.yml
Normal file
35
docker-compose.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
services:
|
||||||
|
api:
|
||||||
|
image: usa-dashboard-api:latest
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
args:
|
||||||
|
- VITE_MAPBOX_ACCESS_TOKEN=${VITE_MAPBOX_ACCESS_TOKEN:-}
|
||||||
|
ports:
|
||||||
|
- "3001:3001"
|
||||||
|
environment:
|
||||||
|
- DB_PATH=/data/data.db
|
||||||
|
- API_PORT=3001
|
||||||
|
volumes:
|
||||||
|
- app-data:/data
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
crawler:
|
||||||
|
image: usa-dashboard-crawler:latest
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.crawler
|
||||||
|
environment:
|
||||||
|
- DB_PATH=/data/data.db
|
||||||
|
- API_BASE=http://api:3001
|
||||||
|
- GDELT_DISABLED=1
|
||||||
|
- RSS_INTERVAL_SEC=60
|
||||||
|
- DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY:-}
|
||||||
|
volumes:
|
||||||
|
- app-data:/data
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
app-data:
|
||||||
8
docker-entrypoint.sh
Normal file
8
docker-entrypoint.sh
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
export DB_PATH="${DB_PATH:-/data/data.db}"
|
||||||
|
if [ ! -f "$DB_PATH" ]; then
|
||||||
|
echo "==> Seeding database..."
|
||||||
|
node server/seed.js
|
||||||
|
fi
|
||||||
|
exec node server/index.js
|
||||||
91
docs/BACKEND_MODULES.md
Normal file
91
docs/BACKEND_MODULES.md
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# 后端模块说明
|
||||||
|
|
||||||
|
## 一、现有模块结构
|
||||||
|
|
||||||
|
```
|
||||||
|
server/
|
||||||
|
├── index.js # HTTP + WebSocket 入口
|
||||||
|
├── routes.js # REST API 路由
|
||||||
|
├── db.js # SQLite schema 与连接
|
||||||
|
├── situationData.js # 态势数据聚合 (从 DB 读取)
|
||||||
|
├── seed.js # 初始数据填充
|
||||||
|
├── data.db # SQLite 数据库
|
||||||
|
└── package.json
|
||||||
|
|
||||||
|
crawler/
|
||||||
|
├── realtime_conflict_service.py # GDELT 实时冲突服务 (核心)
|
||||||
|
├── requirements.txt
|
||||||
|
├── config.py, db_writer.py # 旧 RSS 爬虫(可保留)
|
||||||
|
├── main.py
|
||||||
|
└── README.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1. server/index.js
|
||||||
|
- Express + CORS
|
||||||
|
- WebSocket (`/ws`),每 5 秒广播 `situation`
|
||||||
|
- `POST /api/crawler/notify`:爬虫写入后触发立即广播
|
||||||
|
|
||||||
|
### 2. server/routes.js
|
||||||
|
- `GET /api/situation`:完整态势
|
||||||
|
- `GET /api/events`:GDELT 事件 + 冲突统计
|
||||||
|
- `GET /api/health`:健康检查
|
||||||
|
|
||||||
|
### 3. server/db.js
|
||||||
|
- 表:`situation`、`force_summary`、`power_index`、`force_asset`、
|
||||||
|
`key_location`、`combat_losses`、`wall_street_trend`、
|
||||||
|
`retaliation_current`、`retaliation_history`、`situation_update`、
|
||||||
|
**`gdelt_events`**、**`conflict_stats`**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、GDELT 核心数据源
|
||||||
|
|
||||||
|
**GDELT Project**:全球冲突数据库,约 15 分钟级更新,含经纬度、事件编码、参与方、事件强度。
|
||||||
|
|
||||||
|
### realtime_conflict_service.py
|
||||||
|
|
||||||
|
- 定时(默认 60 秒)从 GDELT API 抓取
|
||||||
|
- 冲突强度评分:missile +3, strike +2, killed +4 等
|
||||||
|
- 无经纬度时默认攻击源:`IRAN_COORD = [51.3890, 35.6892]`
|
||||||
|
- 写入 `gdelt_events`、`conflict_stats`
|
||||||
|
- 调用 `POST /api/crawler/notify` 触发 Node 广播
|
||||||
|
|
||||||
|
### 冲突强度 → 地图效果
|
||||||
|
|
||||||
|
| impact_score | 效果 |
|
||||||
|
|--------------|------------|
|
||||||
|
| 1–3 | 绿色点 |
|
||||||
|
| 4–6 | 橙色闪烁 |
|
||||||
|
| 7–10 | 红色脉冲扩散 |
|
||||||
|
|
||||||
|
### 战损统计模型(展示用)
|
||||||
|
|
||||||
|
- `total_events`
|
||||||
|
- `high_impact_events` (impact ≥ 7)
|
||||||
|
- `estimated_casualties`
|
||||||
|
- `estimated_strike_count`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、数据流
|
||||||
|
|
||||||
|
```
|
||||||
|
GDELT API → Python 服务(60s) → gdelt_events, conflict_stats
|
||||||
|
↓
|
||||||
|
POST /api/crawler/notify → situation.updated_at
|
||||||
|
↓
|
||||||
|
WebSocket 广播 getSituation() → 前端
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、运行方式
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 启动 Node API
|
||||||
|
npm run api
|
||||||
|
|
||||||
|
# 2. 启动 GDELT 服务
|
||||||
|
npm run gdelt
|
||||||
|
# 或: cd crawler && uvicorn realtime_conflict_service:app --port 8000
|
||||||
|
```
|
||||||
137
docs/CRAWLER_LOGIC.md
Normal file
137
docs/CRAWLER_LOGIC.md
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# 爬虫逻辑梳理与数据校验
|
||||||
|
|
||||||
|
## 一、两条入口,数据流不同
|
||||||
|
|
||||||
|
### 1. 入口 A:`npm run crawler`(main.py)
|
||||||
|
|
||||||
|
- **流程**:RSS 抓取 → 关键词过滤 → 分类/严重度 → **直接写 situation_update** → 通知 API
|
||||||
|
- **不经过**:翻译、news_content、AI 提取(战损/基地等)
|
||||||
|
- **写入表**:`situation_update`、`situation.updated_at`
|
||||||
|
- **用途**:轻量、只给「事件脉络」喂新条目,不更新战损/基地/报复指数
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS_FEEDS → fetch_all() → KEYWORDS 过滤 → parser_ai.classify_and_severity
|
||||||
|
→ write_updates(items) → situation_update INSERT + situation 表 touch
|
||||||
|
→ notify_api()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 入口 B:`npm run gdelt`(realtime_conflict_service.py)
|
||||||
|
|
||||||
|
- **流程**:RSS 抓取 → 翻译 → 清洗 → **news_content 去重** → situation_update → **AI 提取 → db_merge** → GDELT 事件(可选)→ 通知 API
|
||||||
|
- **写入表**:`news_content`、`situation_update`、`situation`;提取后还有 `combat_losses`、`key_location`、`retaliation_*`、`wall_street_trend` 等
|
||||||
|
- **用途**:完整管线,前端「战损 / 军事基地 / 报复 / 美股」等数据都依赖这条
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS → fetch_all() → translate_to_chinese → cleaner_ai → save_and_dedup → news_content
|
||||||
|
→ write_updates(new_items) → situation_update
|
||||||
|
→ _extract_and_merge_panel_data(new_items) → extract_from_news() → db_merge.merge()
|
||||||
|
→ (可选) fetch_gdelt_events() → gdelt_events, conflict_stats
|
||||||
|
→ _notify_node()
|
||||||
|
```
|
||||||
|
|
||||||
|
**结论**:要检查「抓回的数据是否有效」且包含战损/基地等,应跑 **入口 B**(gdelt 服务);若只关心事件脉络条数,可看入口 A。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、入口 B 逐步拆解(用于逐段校验)
|
||||||
|
|
||||||
|
### 2.1 RSS 抓取与过滤
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 源列表 | `config.RSS_FEEDS` | 多国媒体 RSS,见 config.py |
|
||||||
|
| 抓取 | `scrapers.rss_scraper.fetch_all()` | feedparser,单源超时 10s |
|
||||||
|
| 过滤 | `_matches_keywords(text)` | 标题+摘要 至少命中 `config.KEYWORDS` 中一个才保留 |
|
||||||
|
| 去重 | `(title[:80], link)` | 同一条不重复加入当次列表 |
|
||||||
|
| 分类 | `parser_ai.classify_and_severity(text)` | 得到 category、severity(Ollama 或规则) |
|
||||||
|
|
||||||
|
**校验**:`npm run crawler:test` 看本次抓到的条数;若为 0,查网络或放宽/检查 KEYWORDS。
|
||||||
|
|
||||||
|
### 2.2 翻译与清洗(仅入口 B)
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 翻译 | `translate_utils.translate_to_chinese()` | 标题/摘要译成中文(依赖配置) |
|
||||||
|
| 清洗 | `cleaner_ai.clean_news_for_panel()` | 截断、清理;`ensure_category` / `ensure_severity` 合法化 |
|
||||||
|
|
||||||
|
### 2.3 落库:news_content(去重)与 situation_update
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 去重 | `news_storage.save_and_dedup(items)` | 按 `content_hash(title, summary, url)` 判重,只插入新记录 |
|
||||||
|
| 表 | `news_content` | id, content_hash, title, summary, url, source, published_at, category, severity |
|
||||||
|
| 表 | `situation_update` | 仅对 **去重后的 new_items** 调用 `write_updates()`,供前端「事件脉络」 |
|
||||||
|
|
||||||
|
**校验**:
|
||||||
|
|
||||||
|
- `news_content`:`SELECT COUNT(*), MAX(published_at) FROM news_content`
|
||||||
|
- `situation_update`:`SELECT COUNT(*), MAX(timestamp) FROM situation_update`
|
||||||
|
- 服务状态:`GET http://localhost:8000/crawler/status` 看 `last_fetch_items` / `last_fetch_inserted` / `last_fetch_error`
|
||||||
|
|
||||||
|
### 2.4 AI 提取与 db_merge(战损 / 基地 / 报复等)
|
||||||
|
|
||||||
|
| 步骤 | 位置 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| 输入 | `_extract_and_merge_panel_data(new_items)` | 仅处理本次 **新增** 的 new_items,前 limit 条(DashScope 10 条,规则 25 条,Ollama 10 条) |
|
||||||
|
| 文本 | 每条 `title + " " + summary`,长度 < 20 跳过 |
|
||||||
|
| 提取器选择 | 环境变量 | `DASHSCOPE_API_KEY` → extractor_dashscope;`CLEANER_AI_DISABLED=1` → extractor_rules;否则 extractor_ai(Ollama) |
|
||||||
|
| 输出结构 | 见 panel_schema / 各 extractor | `situation_update?`, `combat_losses_delta?`, `retaliation?`, `wall_street?`, `key_location_updates?` |
|
||||||
|
| 合并 | `db_merge.merge(extracted)` | 见下表 |
|
||||||
|
|
||||||
|
**merge 映射概要**:
|
||||||
|
|
||||||
|
| 提取字段 | 写入表/逻辑 |
|
||||||
|
|----------|-------------|
|
||||||
|
| situation_update | situation_update 表 INSERT(id 为 hash) |
|
||||||
|
| combat_losses_delta | combat_losses 表,按 side 增量叠加 |
|
||||||
|
| retaliation | retaliation_current 替换 + retaliation_history 追加 |
|
||||||
|
| wall_street | wall_street_trend 表 INSERT |
|
||||||
|
| key_location_updates | key_location 表 UPDATE status/damage_level(name LIKE 关键词) |
|
||||||
|
|
||||||
|
**校验**:
|
||||||
|
|
||||||
|
- 战损:`SELECT * FROM combat_losses`
|
||||||
|
- 基地:`SELECT id, name, side, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level > 0`
|
||||||
|
- 报复:`SELECT * FROM retaliation_current` 与 `retaliation_history` 最近几条
|
||||||
|
- 事件脉络:`SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 20`
|
||||||
|
|
||||||
|
### 2.5 GDELT(可选)
|
||||||
|
|
||||||
|
- `GDELT_DISABLED=1` 时跳过 GDELT,仅用 RSS;可用 `_rss_to_gdelt_fallback()` 用 RSS 标题生成 gdelt_events。
|
||||||
|
- 未禁用时:`fetch_gdelt_events()` 拉 GDELT → 写 `gdelt_events`、`conflict_stats`。
|
||||||
|
|
||||||
|
**校验**:`SELECT COUNT(*), MAX(event_time) FROM gdelt_events`;`SELECT * FROM conflict_stats WHERE id=1`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、如何检查「抓回的数据是否有效」
|
||||||
|
|
||||||
|
1. **确认跑的入口**
|
||||||
|
- 只跑 `npm run crawler`:只有 situation_update 会有新数据,战损/基地不会变。
|
||||||
|
- 跑 `npm run gdelt` 且服务常驻:才会既有 situation_update,又有 combat_losses、key_location 等。
|
||||||
|
|
||||||
|
2. **看 DB 与 API**
|
||||||
|
- 同上:查 `news_content`、`situation_update`、`combat_losses`、`key_location`、`retaliation_*`、`gdelt_events`、`conflict_stats`。
|
||||||
|
- 前端数据来源:`GET /api/situation`(见 server/situationData.js),对照上述表即可。
|
||||||
|
|
||||||
|
3. **看提取是否触发**
|
||||||
|
- 若 `combat_losses` / `key_location` 一直不更新:确认是入口 B、有 new_items、提取器未报错;可对单条新闻跑 `extract_from_news(text)` 看是否产出 combat_losses_delta / key_location_updates。
|
||||||
|
|
||||||
|
4. **重跑历史提取(补数据)**
|
||||||
|
- `POST http://localhost:8000/crawler/backfill`:用当前 situation_update 最近 50 条重新做一次提取并 merge,可用来修历史未提取的数据。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、配置与环境变量(与数据有效性相关)
|
||||||
|
|
||||||
|
| 变量 | 作用 |
|
||||||
|
|------|------|
|
||||||
|
| DB_PATH | 与 server 共用的 SQLite 路径,必须一致 |
|
||||||
|
| API_BASE | 通知 Node 的地址,merge 后通知前端 |
|
||||||
|
| DASHSCOPE_API_KEY | 有则用 DashScope 提取;无则用 Ollama 或规则 |
|
||||||
|
| CLEANER_AI_DISABLED=1 | 用规则提取(extractor_rules),不用 Ollama |
|
||||||
|
| GDELT_DISABLED=1 | 不用 GDELT,仅 RSS;RSS 可转 gdelt_events 占位 |
|
||||||
|
| CRAWL_INTERVAL | main.py 抓取间隔(秒) |
|
||||||
|
| RSS_INTERVAL_SEC / FETCH_INTERVAL_SEC | realtime 服务里 RSS / GDELT 间隔 |
|
||||||
|
|
||||||
|
按上述顺序对照「入口 → RSS → 去重 → situation_update → 提取 → merge → 表」即可逐段检查爬虫抓回的数据是否有效。
|
||||||
65
docs/CRAWLER_PIPELINE.md
Normal file
65
docs/CRAWLER_PIPELINE.md
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# 爬虫数据流水线
|
||||||
|
|
||||||
|
## 数据流
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS 抓取
|
||||||
|
↓ 翻译、清洗
|
||||||
|
↓ news_storage.save_and_dedup() → 历史去重
|
||||||
|
↓
|
||||||
|
news_content(资讯独立表,供后续消费)
|
||||||
|
↓
|
||||||
|
↓ 去重后的新数据
|
||||||
|
↓
|
||||||
|
situation_update(面板展示用)
|
||||||
|
↓
|
||||||
|
↓ AI 提取(阿里云 DashScope)
|
||||||
|
↓
|
||||||
|
combat_losses / retaliation / key_location / wall_street_trend
|
||||||
|
↓
|
||||||
|
↓ notify Node
|
||||||
|
↓
|
||||||
|
前端 WebSocket + 轮询
|
||||||
|
```
|
||||||
|
|
||||||
|
## 阿里云 DashScope API Key
|
||||||
|
|
||||||
|
设置环境变量 `DASHSCOPE_API_KEY` 后,爬虫使用阿里云通义千问进行 AI 提取。不设置时回退到规则提取(`extractor_rules`)或 Ollama(若可用)。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 本地
|
||||||
|
export DASHSCOPE_API_KEY=sk-xxx
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
docker compose up -d -e DASHSCOPE_API_KEY=sk-xxx
|
||||||
|
# 或在 .env 中写入 DASHSCOPE_API_KEY=sk-xxx
|
||||||
|
```
|
||||||
|
|
||||||
|
## 表说明
|
||||||
|
|
||||||
|
| 表 | 用途 |
|
||||||
|
|----|------|
|
||||||
|
| `news_content` | 资讯原文,独立存储,支持去重(content_hash),供后续消费 |
|
||||||
|
| `situation_update` | 面板「近期更新」展示 |
|
||||||
|
| `combat_losses` | 战损数据(AI/规则提取) |
|
||||||
|
| `key_location` | 基地状态 |
|
||||||
|
| `gdelt_events` | 地图冲突点 |
|
||||||
|
|
||||||
|
## 去重逻辑
|
||||||
|
|
||||||
|
根据 `content_hash = sha256(normalize(title) + normalize(summary) + url)` 判断,相同或高度相似内容视为重复,不入库。
|
||||||
|
|
||||||
|
## 消费资讯
|
||||||
|
|
||||||
|
- HTTP: `GET /api/news?limit=50`
|
||||||
|
- 调试: `/db` 面板查看 `news_content` 表
|
||||||
|
|
||||||
|
## 链路验证
|
||||||
|
|
||||||
|
运行脚本一键检查全链路:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/verify-pipeline.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
支持环境变量覆盖:`API_URL`、`CRAWLER_URL`
|
||||||
62
docs/DATA_FLOW.md
Normal file
62
docs/DATA_FLOW.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# 前端数据更新链路与字段映射
|
||||||
|
|
||||||
|
## 1. 前端数据点
|
||||||
|
|
||||||
|
| 组件 | 数据 | API 字段 | DB 表/列 |
|
||||||
|
|------|------|----------|----------|
|
||||||
|
| HeaderPanel | lastUpdated | situation.lastUpdated | situation.updated_at |
|
||||||
|
| HeaderPanel | powerIndex | usForces/iranForces.powerIndex | power_index |
|
||||||
|
| HeaderPanel | feedbackCount, shareCount | POST /api/feedback, /api/share | feedback, share_count |
|
||||||
|
| TimelinePanel | recentUpdates | situation.recentUpdates | situation_update |
|
||||||
|
| WarMap | keyLocations | usForces/iranForces.keyLocations | key_location |
|
||||||
|
| BaseStatusPanel | 基地统计 | keyLocations (status, damage_level) | key_location |
|
||||||
|
| CombatLossesPanel | 人员/平民伤亡 | combatLosses, civilianCasualtiesTotal | combat_losses |
|
||||||
|
| CombatLossesOtherPanel | 装备毁伤 | combatLosses (bases, aircraft, drones, …) | combat_losses |
|
||||||
|
| PowerChart | 雷达图 | powerIndex | power_index |
|
||||||
|
| WallStreetTrend | 美股趋势 | wallStreetInvestmentTrend | wall_street_trend |
|
||||||
|
| RetaliationGauge | 报复指数 | retaliationSentiment | retaliation_current/history |
|
||||||
|
|
||||||
|
**轮询**: `fetchSituation()` 加载,WebSocket `/ws` 每 3 秒广播。`GET /api/situation` → `getSituation()`。
|
||||||
|
|
||||||
|
## 2. 爬虫 → DB 字段映射
|
||||||
|
|
||||||
|
| 提取器输出 | DB 表 | 逻辑 |
|
||||||
|
|------------|-------|------|
|
||||||
|
| situation_update | situation_update | INSERT |
|
||||||
|
| combat_losses_delta | combat_losses | 增量叠加 (ADD) |
|
||||||
|
| retaliation | retaliation_current, retaliation_history | REPLACE / APPEND |
|
||||||
|
| wall_street | wall_street_trend | INSERT |
|
||||||
|
| key_location_updates | key_location | UPDATE status, damage_level WHERE name LIKE |
|
||||||
|
|
||||||
|
### combat_losses 字段对应
|
||||||
|
|
||||||
|
| 提取器 (us/iran) | DB 列 |
|
||||||
|
|------------------|-------|
|
||||||
|
| personnel_killed | personnel_killed |
|
||||||
|
| personnel_wounded | personnel_wounded |
|
||||||
|
| civilian_killed | civilian_killed |
|
||||||
|
| civilian_wounded | civilian_wounded |
|
||||||
|
| bases_destroyed | bases_destroyed |
|
||||||
|
| bases_damaged | bases_damaged |
|
||||||
|
| aircraft, warships, armor, vehicles | 同名 |
|
||||||
|
| drones, missiles, helicopters, submarines | 同名 |
|
||||||
|
|
||||||
|
## 3. 测试用例
|
||||||
|
|
||||||
|
运行: `npm run crawler:test:extraction`
|
||||||
|
|
||||||
|
| 用例 | 输入 | 预期 |
|
||||||
|
|------|------|------|
|
||||||
|
| 特朗普 1000 军事目标 | "特朗普说伊朗有1000个军事目标遭到袭击" | 不提取 bases_destroyed/bases_damaged |
|
||||||
|
| 阿萨德基地遭袭 | "阿萨德空军基地遭袭,损失严重" | 输出 key_location_updates |
|
||||||
|
| 美军伤亡 | "3名美军阵亡,另有5人受伤" | personnel_killed=3, personnel_wounded=5 |
|
||||||
|
| 伊朗平民 | "伊朗空袭造成伊拉克平民50人伤亡" | iran.civilian_killed=50 |
|
||||||
|
| 伊朗无人机 | "美军击落伊朗10架无人机" | iran.drones=10 |
|
||||||
|
| db_merge 增量 | 两次 merge 3+2 | personnel_killed=5 |
|
||||||
|
|
||||||
|
## 4. 注意事项
|
||||||
|
|
||||||
|
- **bases_***: 仅指已确认损毁/受损的基地;"军事目标"/targets 不填 bases_*。
|
||||||
|
- **正则 [\s\w]***: 会匹配数字,导致 (\d+) 只捕获末位;数字前用 `[^\d]*`。
|
||||||
|
- **伊朗平民**: 规则已支持 "伊朗空袭造成…平民" 归入 loss_ir。
|
||||||
|
- **key_location**: 需 name LIKE '%keyword%' 匹配,关键词见 extractor_rules.bases_all。
|
||||||
269
docs/DEBUG_PANELS.md
Normal file
269
docs/DEBUG_PANELS.md
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
# 看板板块逐项调试指南
|
||||||
|
|
||||||
|
本文档按前端每个板块列出:**数据来源表**、**谁写入**、**如何验证**、**常见问题**,便于逐项排查。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 数据流总览
|
||||||
|
|
||||||
|
```
|
||||||
|
前端 Dashboard
|
||||||
|
→ useReplaySituation() → situation (来自 WebSocket / GET /api/situation)
|
||||||
|
→ getSituation() 读 server/situationData.js
|
||||||
|
→ 从 SQLite (server/data.db) 多表 SELECT 后拼成 JSON
|
||||||
|
```
|
||||||
|
|
||||||
|
- **写入方**:`server/seed.js`(初始化)、爬虫流水线(`crawler/pipeline.py` + `db_merge.py`)、GDELT 服务(`gdelt_events` / `conflict_stats`)。
|
||||||
|
- **读入方**:仅 `server/situationData.js` 的 `getSituation()`,被 `/api/situation` 与 WebSocket 广播使用。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 顶栏 (HeaderPanel)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 最后更新时间 | `situation.lastUpdated` | `situation.updated_at`(表 `situation` id=1) | 爬虫 notify 时更新 |
|
||||||
|
| 在看/看过 | `stats.viewers` / `stats.cumulative` | `visits` / `visitor_count`,见 `POST /api/visit` | 与爬虫无关 |
|
||||||
|
| 美/伊战力条 | `usForces.powerIndex.overall` / `iranForces.powerIndex.overall` | `power_index` 表 | **仅 seed** |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.lastUpdated, .usForces.powerIndex.overall, .iranForces.powerIndex.overall'`
|
||||||
|
- 看板顶栏是否显示时间、双战力数值。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- `lastUpdated` 不变:爬虫未调 `POST /api/crawler/notify` 或 Node 未执行 `reloadFromFile()`。
|
||||||
|
- 战力条为 0:未跑 seed 或 `power_index` 无数据。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 事件脉络 / 时间线 (TimelinePanel → EventTimelinePanel + RecentUpdatesPanel)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 近期更新列表 | `situation.recentUpdates` | `situation_update` 表(ORDER BY timestamp DESC LIMIT 50) | 爬虫 `write_updates(new_items)` + seed 若干条 |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.recentUpdates | length'`
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.recentUpdates[0]'`
|
||||||
|
- 或用调试接口:`curl -s -H "x-api-key: $API_ADMIN_KEY" http://localhost:3001/api/db/dashboard | jq '.situation_update | length'`
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 条数为 0:未 seed 且爬虫未写入;或爬虫只跑 main.py(入口 A)未跑 gdelt(入口 B)时,仍会写 `situation_update`,但若 RSS 抓取 0 条则无新数据。
|
||||||
|
- 不更新:爬虫未启动;或未调 notify;或 Node 与爬虫用的不是同一个 `data.db`(路径/环境变量不一致)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 地图 (WarMap)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 美军据点 | `usForces.keyLocations` | `key_location` WHERE side='us' | seed 全量;爬虫通过 `key_location_updates` 只更新 status/damage_level |
|
||||||
|
| 伊朗据点 | `iranForces.keyLocations` | `key_location` WHERE side='iran' | 同上 |
|
||||||
|
| 冲突点(绿/橙/红) | `situation.conflictEvents` | `gdelt_events` 表(ORDER BY event_time DESC LIMIT 30) | GDELT API 写入;或 GDELT 关闭时 RSS 回填 |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.usForces.keyLocations | length, .conflictEvents | length'`
|
||||||
|
- 地图上是否有基地/舰船点位、是否有冲突点图层。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 无冲突点:`gdelt_events` 为空;未跑 gdelt 或 GDELT 被墙且未用 RSS 回填(`_rss_to_gdelt_fallback`)。
|
||||||
|
- 基地状态不更新:爬虫提取的 `key_location_updates` 的 `name_keywords` 与 `key_location.name` 无法 LIKE 匹配(名称不一致)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. 美国基地状态 (BaseStatusPanel)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 基地列表 | `usForces.keyLocations` 中 `type === 'Base'` | `key_location` side='us' | 同 WarMap |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '[.usForces.keyLocations[] | select(.type == "Base")] | length'`
|
||||||
|
- 看板左侧「美国基地」是否展示且状态/损伤与预期一致。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 与「地图」一致;若 seed 的 key_location 有 type/region,而爬虫只更新 status/damage_level,名称必须能与 extractor 的 name_keywords 匹配。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 战损 (CombatLossesPanel + CombatLossesOtherPanel)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 美军/伊朗阵亡/受伤/装备等 | `usForces.combatLosses` / `iranForces.combatLosses` | `combat_losses` 表(side=us/iran) | seed 初始值;爬虫 AI 提取 `combat_losses_delta` 后 db_merge **增量**叠加 |
|
||||||
|
| 冲突统计(估计伤亡等) | `situation.conflictStats` | `conflict_stats` 表 id=1 | GDELT 或 RSS 回填时写入 |
|
||||||
|
| 平民伤亡合计 | `situation.civilianCasualtiesTotal` | 由 combat_losses 双方平民字段 + conflict_stats.estimated_casualties 计算 | 见 situationData.js |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.usForces.combatLosses, .iranForces.combatLosses, .conflictStats'`
|
||||||
|
- 看板战损数字是否与 API 一致。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 战损一直不变:新闻中无明确伤亡/装备数字;或未跑入口 B(gdelt);或 AI 提取器未启用/报错(Ollama/通义/规则);或 merge 时单次增量被上限截断。
|
||||||
|
- 数字异常大:提取器误把「累计总数」当成本条增量;已用 `MAX_DELTA_PER_MERGE` 做上限。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 伊朗基地状态 (IranBaseStatusPanel)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 基地/港/核/导弹等 | `iranForces.keyLocations` 中 type 为 Base/Port/Nuclear/Missile | `key_location` side='iran' | 同 WarMap |
|
||||||
|
|
||||||
|
**验证与常见问题**
|
||||||
|
|
||||||
|
- 同「美国基地」;确保 seed 中伊朗 key_location 的 name 与爬虫 extractor 的 name_keywords 能匹配(如德黑兰、伊斯法罕、布什尔等)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. 战力对比图 (PowerChart)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 美/伊战力指数 | `usForces.powerIndex` / `iranForces.powerIndex` | `power_index` 表 | **仅 seed**,爬虫不写 |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.usForces.powerIndex, .iranForces.powerIndex'`
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 为 0 或缺失:未执行 seed;或 `power_index` 表空。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. 华尔街/投资趋势 (InvestmentTrendChart)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 时间序列 | `usForces.wallStreetInvestmentTrend` | `wall_street_trend` 表(time, value) | seed 写入初始曲线;爬虫仅在提取出 `wall_street` 时 **INSERT 新点** |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.usForces.wallStreetInvestmentTrend | length'`
|
||||||
|
- 看板右侧美国下方趋势图是否有数据。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 无曲线:未 seed 或表空。
|
||||||
|
- 不随新闻更新:提取器未输出 `wall_street` 或新闻中无相关表述。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. 美国力量摘要 (ForcePanel side=us)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 摘要数字 | `usForces.summary` | `force_summary` side='us' | **仅 seed** |
|
||||||
|
| 战力指数 | `usForces.powerIndex` | `power_index` | **仅 seed** |
|
||||||
|
| 资产列表 | `usForces.assets` | `force_asset` side='us' | **仅 seed** |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.usForces.summary, .usForces.assets | length'`
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 全为 0 或空:未 seed;爬虫不更新这些表。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 报复情绪 (RetaliationGauge)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 当前值 | `iranForces.retaliationSentiment` | `retaliation_current` id=1 | seed 初始;爬虫提取 `retaliation` 时 **替换** 当前值并 **追加** history |
|
||||||
|
| 历史曲线 | `iranForces.retaliationSentimentHistory` | `retaliation_history` 表 | 同上 |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s http://localhost:3001/api/situation | jq '.iranForces.retaliationSentiment, .iranForces.retaliationSentimentHistory | length'`
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- 不更新:新闻中无报复相关表述;或提取器未输出 `retaliation`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. 伊朗力量摘要 (ForcePanel side=iran)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 同美国侧 | `iranForces.summary` / `powerIndex` / `assets` | `force_summary` / `power_index` / `force_asset` side='iran' | **仅 seed** |
|
||||||
|
|
||||||
|
**验证与常见问题**
|
||||||
|
|
||||||
|
- 同「美国力量摘要」。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 12. 资讯列表 (GET /api/news,若有单独页面消费)
|
||||||
|
|
||||||
|
| 项目 | 数据来源 | 写入方 | 说明 |
|
||||||
|
|------|----------|--------|------|
|
||||||
|
| 资讯行 | `news_content` 表 | 爬虫 `save_and_dedup` 后写入 | 仅入口 B 流水线;事件脉络来自 situation_update,资讯表独立 |
|
||||||
|
|
||||||
|
**验证**
|
||||||
|
|
||||||
|
- `curl -s -H "x-api-key: $API_ADMIN_KEY" http://localhost:3001/api/news?limit=5 | jq '.items | length'`
|
||||||
|
- 若未配 ADMIN_KEY,部分环境可能不鉴权也可访问,视 routes 配置而定。
|
||||||
|
|
||||||
|
**常见问题**
|
||||||
|
|
||||||
|
- `items` 为 0:未跑入口 B;或去重后无新增;或 RSS 抓取 0 条。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 快速检查命令汇总
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. API 与态势整体
|
||||||
|
curl -s http://localhost:3001/api/health
|
||||||
|
curl -s http://localhost:3001/api/situation | jq '{
|
||||||
|
lastUpdated,
|
||||||
|
recentUpdates: (.recentUpdates | length),
|
||||||
|
conflictEvents: (.conflictEvents | length),
|
||||||
|
usPower: .usForces.powerIndex.overall,
|
||||||
|
iranPower: .iranForces.powerIndex.overall,
|
||||||
|
usLosses: .usForces.combatLosses.personnelCasualties,
|
||||||
|
iranLosses: .iranForces.combatLosses.personnelCasualties,
|
||||||
|
usBases: (.usForces.keyLocations | length),
|
||||||
|
iranBases: (.iranForces.keyLocations | length),
|
||||||
|
wallStreetLen: (.usForces.wallStreetInvestmentTrend | length),
|
||||||
|
retaliationCur: .iranForces.retaliationSentiment
|
||||||
|
}'
|
||||||
|
|
||||||
|
# 2. 各表行数(需 sqlite3)
|
||||||
|
DB="${DB_PATH:-server/data.db}"
|
||||||
|
for t in force_summary power_index force_asset key_location combat_losses wall_street_trend retaliation_current retaliation_history situation_update gdelt_events conflict_stats news_content; do
|
||||||
|
echo -n "$t: "; sqlite3 "$DB" "SELECT COUNT(*) FROM $t" 2>/dev/null || echo "?"
|
||||||
|
done
|
||||||
|
|
||||||
|
# 3. 爬虫状态与通知
|
||||||
|
curl -s http://localhost:8000/crawler/status | jq .
|
||||||
|
curl -s -X POST http://localhost:3001/api/crawler/notify
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 建议调试顺序
|
||||||
|
|
||||||
|
1. **先确认 API 与 DB 一致**:`npm run api` 已起、`GET /api/situation` 返回 200,且 `lastUpdated`、`recentUpdates` 等存在。
|
||||||
|
2. **确认 seed**:若从未 seed,先跑 `node server/seed.js`(或项目提供的 seed 命令),再刷新看板,检查战力/摘要/基地/战损等是否有初始值。
|
||||||
|
3. **事件脉络**:确认爬虫已起(`npm run gdelt`)、RSS 能抓到条数、`situation_update` 条数增加、notify 后前端/API 的 `recentUpdates` 增加。
|
||||||
|
4. **战损/基地/报复/美股**:确认跑的是入口 B、提取器可用(Ollama 或 DASHSCOPE_API_KEY 或规则)、新闻内容包含可解析的伤亡/基地/报复表述;必要时用 crawler 的提取单测或 backfill 接口验证。
|
||||||
|
5. **地图冲突点**:确认 `gdelt_events` 有数据(GDELT 或 RSS 回填);冲突统计看 `conflict_stats`。
|
||||||
|
|
||||||
|
按上述顺序逐板块对照「数据来源 → 写入方 → 验证命令 → 常见问题」,即可定位每个板块不更新或显示异常的原因。
|
||||||
|
|
||||||
|
**若只关心战损、基地、地图战区**:见 **docs/DEBUG_战损_基地_地图.md**,并运行 `./scripts/debug-panels-focus.sh` 做专项检查。
|
||||||
135
docs/DEBUG_战损_基地_地图.md
Normal file
135
docs/DEBUG_战损_基地_地图.md
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
# 战损、基地、地图战区 — 专项调试
|
||||||
|
|
||||||
|
只关心这三块时,按下面数据源 + 排查顺序即可。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、战损 (combat_losses)
|
||||||
|
|
||||||
|
### 数据流
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS 新闻(标题+摘要/正文) → 爬虫流水线 run_full_pipeline
|
||||||
|
→ extract_from_news(text) → combat_losses_delta { us: { personnel_killed, ... }, iran: { ... } }
|
||||||
|
→ db_merge.merge() → 按「增量」叠加到 combat_losses 表
|
||||||
|
→ POST /api/crawler/notify → Node 重载 DB
|
||||||
|
→ getSituation() 读 combat_losses → 前端 CombatLossesPanel / CombatLossesOtherPanel
|
||||||
|
```
|
||||||
|
|
||||||
|
- **表**:`combat_losses`(side=us / iran),字段含 personnel_killed、personnel_wounded、bases_destroyed、bases_damaged、aircraft、drones、missiles 等。
|
||||||
|
- **初始值**:`node server/seed.js` 会写入美/伊两行。
|
||||||
|
- **更新条件**:只有新闻里**明确出现可解析的伤亡/装备数字**(如「2 名美军死亡」「14 人受伤」「1 架战机受损」)时,提取器才会输出 `combat_losses_delta`,merge 才会叠加。
|
||||||
|
|
||||||
|
### 提取器选择(三选一)
|
||||||
|
|
||||||
|
| 环境变量 | 使用模块 | 说明 |
|
||||||
|
|----------|----------|------|
|
||||||
|
| `DASHSCOPE_API_KEY` 已设 | `extractor_dashscope` | 通义抽取,精度较好 |
|
||||||
|
| 未设通义 且 `CLEANER_AI_DISABLED≠1` | `extractor_ai` | 需本机 Ollama(如 llama3.1) |
|
||||||
|
| 未设通义 且 `CLEANER_AI_DISABLED=1` | `extractor_rules` | 规则正则,无需模型 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# API 返回的战损
|
||||||
|
curl -s http://localhost:3001/api/situation | jq '{
|
||||||
|
us: .usForces.combatLosses.personnelCasualties,
|
||||||
|
iran: .iranForces.combatLosses.personnelCasualties,
|
||||||
|
conflictStats: .conflictStats
|
||||||
|
}'
|
||||||
|
|
||||||
|
# 表内原始值
|
||||||
|
sqlite3 server/data.db "SELECT side, personnel_killed, personnel_wounded, bases_destroyed, bases_damaged, aircraft FROM combat_losses"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 常见问题
|
||||||
|
|
||||||
|
| 现象 | 可能原因 | 处理 |
|
||||||
|
|------|----------|------|
|
||||||
|
| 战损数字从不变化 | 1) 只跑了 main.py 未跑 gdelt<br>2) 新闻里没有明确伤亡/装备数字<br>3) 提取器未启用或报错(Ollama 未起、通义未配) | 跑 `npm run gdelt`;用带数字的新闻测;看爬虫日志是否有提取/merge 报错 |
|
||||||
|
| 数字暴增一次 | 提取器把「累计总数」当成本条增量 | 已用 MAX_DELTA_PER_MERGE 做单次上限;可查 db_merge.py |
|
||||||
|
| 想用已有事件脉络重算战损 | 历史新闻当时未做提取 | `curl -X POST http://localhost:8000/crawler/backfill` 用 situation_update 最近 50 条重新提取并 merge |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、基地 (key_location)
|
||||||
|
|
||||||
|
### 数据流
|
||||||
|
|
||||||
|
```
|
||||||
|
RSS 新闻 → extract_from_news → key_location_updates: [ { name_keywords, side, status, damage_level } ]
|
||||||
|
→ db_merge.merge() → UPDATE key_location SET status=?, damage_level=? WHERE side=? AND (name LIKE ? OR ...)
|
||||||
|
→ getSituation() 读 key_location → 前端 BaseStatusPanel(美) / IranBaseStatusPanel(伊) / WarMap 据点层
|
||||||
|
```
|
||||||
|
|
||||||
|
- **表**:`key_location`(side=us / iran),字段含 name、lat、lng、type、region、**status**、**damage_level**。
|
||||||
|
- **初始数据**:seed 写入大量美/伊据点和基地(含 name);**爬虫只更新已有行的 status、damage_level**,不新增行。
|
||||||
|
- **匹配规则**:提取器的 `name_keywords`(如 `阿萨德|asad`)会按 **LIKE '%关键词%'** 与 `key_location.name` 匹配。例如 name 为「阿萨德空军基地」时,关键词「阿萨德」能匹配。
|
||||||
|
|
||||||
|
### 规则提取器支持的基地关键词(与 seed name 对应关系)
|
||||||
|
|
||||||
|
- **美军**:阿萨德|阿因|asad → 匹配 seed「阿萨德空军基地」「阿因·阿萨德」;巴格达 → 巴格达外交支援中心;乌代德|卡塔尔 → 乌代德空军基地;埃尔比勒 → 埃尔比勒空军基地;因吉尔利克|土耳其 → 因吉尔利克空军基地;苏尔坦|沙特 → 苏尔坦亲王空军基地;坦夫|叙利亚 → 坦夫驻军;达夫拉|阿联酋 → 达夫拉空军基地;内瓦提姆|拉蒙|以色列 → 内瓦提姆/拉蒙等;赛利耶、巴林、科威特 等。
|
||||||
|
- **伊朗**:阿巴斯港、德黑兰、布什尔、伊斯法罕、纳坦兹、米纳布、霍尔木兹 等(seed 中需有对应 name 的伊朗据点)。
|
||||||
|
|
||||||
|
若 seed 里没有某据点,或 name 与关键词完全对不上(例如英文报道只写 "Al-Asad" 而 seed 只有「阿萨德空军基地」),规则里已含 asad/阿萨德,一般能匹配;若仍不匹配,可查 `key_location.name` 与 extractor_rules.py / extractor_dashscope 的 name_keywords 是否有一致子串。
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 被标为遭袭的据点
|
||||||
|
curl -s http://localhost:3001/api/situation | jq '[.usForces.keyLocations[], .iranForces.keyLocations[]] | map(select(.status == "attacked")) | length'
|
||||||
|
|
||||||
|
# 表内 status / damage_level
|
||||||
|
sqlite3 server/data.db "SELECT side, name, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level IS NOT NULL LIMIT 20"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 常见问题
|
||||||
|
|
||||||
|
| 现象 | 可能原因 | 处理 |
|
||||||
|
|------|----------|------|
|
||||||
|
| 基地状态从不更新 | 1) 新闻未提及「某基地遭袭」类表述<br>2) 提取的 name_keywords 与 key_location.name 无法 LIKE 匹配 | 确认 seed 的 name 含中文/英文与提取器关键词一致;或扩展 extractor 的 name_keywords |
|
||||||
|
| 地图/基地面板无据点 | key_location 表空 | 先执行 `node server/seed.js` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、地图战区 / 冲突点 (gdelt_events + conflict_stats)
|
||||||
|
|
||||||
|
### 数据流
|
||||||
|
|
||||||
|
- **正常模式**:`fetch_gdelt_events()` 请求 GDELT API → 解析为事件列表 → `_write_to_db(events)` 写入 `gdelt_events` 和 `conflict_stats`(总事件数、高影响事件数、估计伤亡、打击次数等)。
|
||||||
|
- **GDELT 不可用**:设 `GDELT_DISABLED=1` 时,`fetch_news()` 里在流水线结束后调 `_rss_to_gdelt_fallback()`,用 **situation_update 最近 50 条** 按 summary 推断经纬度(`_infer_coords`)和 impact_score(由 severity 映射),写入 `gdelt_events`,这样地图仍有冲突点。
|
||||||
|
|
||||||
|
前端 WarMap 根据 `conflictEvents`(= gdelt_events)的 impact_score 分绿/橙/红三层显示;战损区「冲突统计」来自 `conflict_stats`。
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 冲突点条数 + 冲突统计
|
||||||
|
curl -s http://localhost:3001/api/situation | jq '{ conflictEvents: (.conflictEvents | length), conflictStats: .conflictStats }'
|
||||||
|
|
||||||
|
# 表内
|
||||||
|
sqlite3 server/data.db "SELECT COUNT(*) FROM gdelt_events"
|
||||||
|
sqlite3 server/data.db "SELECT * FROM conflict_stats WHERE id = 1"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 常见问题
|
||||||
|
|
||||||
|
| 现象 | 可能原因 | 处理 |
|
||||||
|
|------|----------|------|
|
||||||
|
| 地图没有冲突点 | 1) gdelt_events 表空<br>2) 未跑 gdelt 或 GDELT 被墙且未开 RSS 回填 | 跑 `npm run gdelt`;国内可设 `GDELT_DISABLED=1`,靠 situation_update 回填 |
|
||||||
|
| 冲突点不更新 | 爬虫未调 notify,或 Node/爬虫用的不是同一个 data.db | 确认 API_BASE、DB_PATH 一致;看 Node 终端是否有 `[crawler/notify] DB 已重载` |
|
||||||
|
| conflict_stats 全 0 | 从未成功写入过 gdelt_events(GDELT 与 RSS 回填都未执行) | 先让 gdelt_events 有数据(见上) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、一键检查(仅战损 / 基地 / 地图)
|
||||||
|
|
||||||
|
在项目根执行:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/debug-panels-focus.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
会检查:API 是否通、`combat_losses` / `key_location` / `gdelt_events` / `conflict_stats` 行数及关键字段、并给出简短结论。需已启动 API(`npm run api`);可选 `jq`、`sqlite3` 以输出更全。
|
||||||
|
|
||||||
|
详细逐板块说明见 `docs/DEBUG_PANELS.md`。
|
||||||
30
docs/DOCKER_MIRROR.md
Normal file
30
docs/DOCKER_MIRROR.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Docker 拉取超时 / 配置镜像加速
|
||||||
|
|
||||||
|
国内环境从 Docker Hub 拉取镜像常超时,需在 Docker 中配置镜像加速。
|
||||||
|
|
||||||
|
## Docker Desktop(macOS / Windows)
|
||||||
|
|
||||||
|
1. 打开 **Docker Desktop**
|
||||||
|
2. **Settings** → **Docker Engine**
|
||||||
|
3. 在 JSON 中增加 `registry-mirrors`(若已有其他配置,只需合并进该字段):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"registry-mirrors": [
|
||||||
|
"https://docker.m.daocloud.io",
|
||||||
|
"https://docker.1ms.run"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
4. 点击 **Apply & Restart**
|
||||||
|
5. 重新执行:`docker compose up -d --build`
|
||||||
|
|
||||||
|
## 备选镜像源
|
||||||
|
|
||||||
|
可替换或补充到 `registry-mirrors` 中:
|
||||||
|
|
||||||
|
- `https://docker.m.daocloud.io`(DaoCloud)
|
||||||
|
- `https://docker.1ms.run`
|
||||||
|
- `https://docker.rainbond.cc`(好雨科技)
|
||||||
|
- 阿里云 / 腾讯云等:在对应云控制台的「容器镜像服务」中获取个人专属加速地址
|
||||||
22
g.sh
Executable file
22
g.sh
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 快速 git add + commit + push
|
||||||
|
# 用法: g "fix: 提交说明"
|
||||||
|
set -e
|
||||||
|
|
||||||
|
msg="${1:-}"
|
||||||
|
if [[ -z "$msg" ]]; then
|
||||||
|
echo "用法: g \"commit message\""
|
||||||
|
echo "示例: g \"fix: 修复登录问题\""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 检查是否有改动
|
||||||
|
if [[ -z $(git status --porcelain) ]]; then
|
||||||
|
echo "无文件改动,跳过提交"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
git add .
|
||||||
|
git commit -m "$msg"
|
||||||
|
git push
|
||||||
|
echo "✓ 已推送"
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<link rel="icon" type="image/svg+xml" href="/usa_logo.png" />
|
<link rel="icon" type="image/svg+xml" href="/usa_logo.png" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover" />
|
||||||
<title>美伊军事态势显示</title>
|
<title>美伊军事态势显示</title>
|
||||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||||
|
|||||||
6
map.md
6
map.md
@@ -282,3 +282,9 @@ const IRAN_SOURCE = [51.3890, 35.6892] // Tehran
|
|||||||
所有动画走 WebGL 图层
|
所有动画走 WebGL 图层
|
||||||
|
|
||||||
禁止 DOM 动画
|
禁止 DOM 动画
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
git代码更新:git fetch origin && git reset --hard origin/master
|
||||||
|
前端发版:npm run build
|
||||||
|
后端发版:pm2 restart 3
|
||||||
477
package-lock.json
generated
477
package-lock.json
generated
@@ -8,16 +8,18 @@
|
|||||||
"name": "us-iran-military-dashboard",
|
"name": "us-iran-military-dashboard",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"better-sqlite3": "^11.6.0",
|
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"echarts": "^5.5.0",
|
"echarts": "^5.5.0",
|
||||||
"echarts-for-react": "^3.0.2",
|
"echarts-for-react": "^3.0.2",
|
||||||
"express": "^4.21.1",
|
"express": "^4.21.1",
|
||||||
"lucide-react": "^0.460.0",
|
"lucide-react": "^0.576.0",
|
||||||
"mapbox-gl": "^3.6.0",
|
"mapbox-gl": "^3.6.0",
|
||||||
"react": "^18.3.1",
|
"react": "^18.3.1",
|
||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"react-map-gl": "^7.1.7",
|
"react-map-gl": "^7.1.7",
|
||||||
|
"react-router-dom": "^7.13.1",
|
||||||
|
"sql.js": "^1.11.0",
|
||||||
|
"swagger-ui-express": "^5.0.1",
|
||||||
"ws": "^8.19.0",
|
"ws": "^8.19.0",
|
||||||
"zustand": "^5.0.0"
|
"zustand": "^5.0.0"
|
||||||
},
|
},
|
||||||
@@ -1342,6 +1344,12 @@
|
|||||||
"win32"
|
"win32"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"node_modules/@scarf/scarf": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/@scarf/scarf/-/scarf-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
|
||||||
|
"hasInstallScript": true
|
||||||
|
},
|
||||||
"node_modules/@types/babel__core": {
|
"node_modules/@types/babel__core": {
|
||||||
"version": "7.20.5",
|
"version": "7.20.5",
|
||||||
"resolved": "https://registry.npmmirror.com/@types/babel__core/-/babel__core-7.20.5.tgz",
|
"resolved": "https://registry.npmmirror.com/@types/babel__core/-/babel__core-7.20.5.tgz",
|
||||||
@@ -1921,25 +1929,6 @@
|
|||||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/base64-js": {
|
|
||||||
"version": "1.5.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/base64-js/-/base64-js-1.5.1.tgz",
|
|
||||||
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "patreon",
|
|
||||||
"url": "https://www.patreon.com/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "consulting",
|
|
||||||
"url": "https://feross.org/support"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/baseline-browser-mapping": {
|
"node_modules/baseline-browser-mapping": {
|
||||||
"version": "2.10.0",
|
"version": "2.10.0",
|
||||||
"resolved": "https://registry.npmmirror.com/baseline-browser-mapping/-/baseline-browser-mapping-2.10.0.tgz",
|
"resolved": "https://registry.npmmirror.com/baseline-browser-mapping/-/baseline-browser-mapping-2.10.0.tgz",
|
||||||
@@ -1952,16 +1941,6 @@
|
|||||||
"node": ">=6.0.0"
|
"node": ">=6.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/better-sqlite3": {
|
|
||||||
"version": "11.10.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/better-sqlite3/-/better-sqlite3-11.10.0.tgz",
|
|
||||||
"integrity": "sha512-EwhOpyXiOEL/lKzHz9AW1msWFNzGc/z+LzeB3/jnFJpxu+th2yqvzsSWas1v9jgs9+xiXJcD5A8CJxAG2TaghQ==",
|
|
||||||
"hasInstallScript": true,
|
|
||||||
"dependencies": {
|
|
||||||
"bindings": "^1.5.0",
|
|
||||||
"prebuild-install": "^7.1.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/binary-extensions": {
|
"node_modules/binary-extensions": {
|
||||||
"version": "2.3.0",
|
"version": "2.3.0",
|
||||||
"resolved": "https://registry.npmmirror.com/binary-extensions/-/binary-extensions-2.3.0.tgz",
|
"resolved": "https://registry.npmmirror.com/binary-extensions/-/binary-extensions-2.3.0.tgz",
|
||||||
@@ -1974,24 +1953,6 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/bindings": {
|
|
||||||
"version": "1.5.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/bindings/-/bindings-1.5.0.tgz",
|
|
||||||
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"file-uri-to-path": "1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bl": {
|
|
||||||
"version": "4.1.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/bl/-/bl-4.1.0.tgz",
|
|
||||||
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
|
|
||||||
"dependencies": {
|
|
||||||
"buffer": "^5.5.0",
|
|
||||||
"inherits": "^2.0.4",
|
|
||||||
"readable-stream": "^3.4.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/body-parser": {
|
"node_modules/body-parser": {
|
||||||
"version": "1.20.4",
|
"version": "1.20.4",
|
||||||
"resolved": "https://registry.npmmirror.com/body-parser/-/body-parser-1.20.4.tgz",
|
"resolved": "https://registry.npmmirror.com/body-parser/-/body-parser-1.20.4.tgz",
|
||||||
@@ -2083,29 +2044,6 @@
|
|||||||
"node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
|
"node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/buffer": {
|
|
||||||
"version": "5.7.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/buffer/-/buffer-5.7.1.tgz",
|
|
||||||
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "patreon",
|
|
||||||
"url": "https://www.patreon.com/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "consulting",
|
|
||||||
"url": "https://feross.org/support"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"dependencies": {
|
|
||||||
"base64-js": "^1.3.1",
|
|
||||||
"ieee754": "^1.1.13"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/bytes": {
|
"node_modules/bytes": {
|
||||||
"version": "3.1.2",
|
"version": "3.1.2",
|
||||||
"resolved": "https://registry.npmmirror.com/bytes/-/bytes-3.1.2.tgz",
|
"resolved": "https://registry.npmmirror.com/bytes/-/bytes-3.1.2.tgz",
|
||||||
@@ -2253,11 +2191,6 @@
|
|||||||
"node": ">= 6"
|
"node": ">= 6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/chownr": {
|
|
||||||
"version": "1.1.4",
|
|
||||||
"resolved": "https://registry.npmmirror.com/chownr/-/chownr-1.1.4.tgz",
|
|
||||||
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
|
|
||||||
},
|
|
||||||
"node_modules/color-convert": {
|
"node_modules/color-convert": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
"resolved": "https://registry.npmmirror.com/color-convert/-/color-convert-2.0.1.tgz",
|
"resolved": "https://registry.npmmirror.com/color-convert/-/color-convert-2.0.1.tgz",
|
||||||
@@ -2399,28 +2332,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/decompress-response": {
|
|
||||||
"version": "6.0.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/decompress-response/-/decompress-response-6.0.0.tgz",
|
|
||||||
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"mimic-response": "^3.1.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/sindresorhus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/deep-extend": {
|
|
||||||
"version": "0.6.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/deep-extend/-/deep-extend-0.6.0.tgz",
|
|
||||||
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=4.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/deep-is": {
|
"node_modules/deep-is": {
|
||||||
"version": "0.1.4",
|
"version": "0.1.4",
|
||||||
"resolved": "https://registry.npmmirror.com/deep-is/-/deep-is-0.1.4.tgz",
|
"resolved": "https://registry.npmmirror.com/deep-is/-/deep-is-0.1.4.tgz",
|
||||||
@@ -2444,14 +2355,6 @@
|
|||||||
"npm": "1.2.8000 || >= 1.4.16"
|
"npm": "1.2.8000 || >= 1.4.16"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/detect-libc": {
|
|
||||||
"version": "2.1.2",
|
|
||||||
"resolved": "https://registry.npmmirror.com/detect-libc/-/detect-libc-2.1.2.tgz",
|
|
||||||
"integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/didyoumean": {
|
"node_modules/didyoumean": {
|
||||||
"version": "1.2.2",
|
"version": "1.2.2",
|
||||||
"resolved": "https://registry.npmmirror.com/didyoumean/-/didyoumean-1.2.2.tgz",
|
"resolved": "https://registry.npmmirror.com/didyoumean/-/didyoumean-1.2.2.tgz",
|
||||||
@@ -2523,14 +2426,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/end-of-stream": {
|
|
||||||
"version": "1.4.5",
|
|
||||||
"resolved": "https://registry.npmmirror.com/end-of-stream/-/end-of-stream-1.4.5.tgz",
|
|
||||||
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
|
|
||||||
"dependencies": {
|
|
||||||
"once": "^1.4.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/es-define-property": {
|
"node_modules/es-define-property": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.1.tgz",
|
"resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||||
@@ -2797,14 +2692,6 @@
|
|||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/expand-template": {
|
|
||||||
"version": "2.0.3",
|
|
||||||
"resolved": "https://registry.npmmirror.com/expand-template/-/expand-template-2.0.3.tgz",
|
|
||||||
"integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/express": {
|
"node_modules/express": {
|
||||||
"version": "4.22.1",
|
"version": "4.22.1",
|
||||||
"resolved": "https://registry.npmmirror.com/express/-/express-4.22.1.tgz",
|
"resolved": "https://registry.npmmirror.com/express/-/express-4.22.1.tgz",
|
||||||
@@ -2940,11 +2827,6 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/file-uri-to-path": {
|
|
||||||
"version": "1.0.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
|
||||||
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
|
|
||||||
},
|
|
||||||
"node_modules/fill-range": {
|
"node_modules/fill-range": {
|
||||||
"version": "7.1.1",
|
"version": "7.1.1",
|
||||||
"resolved": "https://registry.npmmirror.com/fill-range/-/fill-range-7.1.1.tgz",
|
"resolved": "https://registry.npmmirror.com/fill-range/-/fill-range-7.1.1.tgz",
|
||||||
@@ -3051,11 +2933,6 @@
|
|||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/fs-constants": {
|
|
||||||
"version": "1.0.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/fs-constants/-/fs-constants-1.0.0.tgz",
|
|
||||||
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
|
|
||||||
},
|
|
||||||
"node_modules/fsevents": {
|
"node_modules/fsevents": {
|
||||||
"version": "2.3.3",
|
"version": "2.3.3",
|
||||||
"resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.3.tgz",
|
"resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.3.tgz",
|
||||||
@@ -3135,11 +3012,6 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/github-from-package": {
|
|
||||||
"version": "0.0.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/github-from-package/-/github-from-package-0.0.0.tgz",
|
|
||||||
"integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
|
|
||||||
},
|
|
||||||
"node_modules/gl-matrix": {
|
"node_modules/gl-matrix": {
|
||||||
"version": "3.4.4",
|
"version": "3.4.4",
|
||||||
"resolved": "https://registry.npmmirror.com/gl-matrix/-/gl-matrix-3.4.4.tgz",
|
"resolved": "https://registry.npmmirror.com/gl-matrix/-/gl-matrix-3.4.4.tgz",
|
||||||
@@ -3246,25 +3118,6 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/ieee754": {
|
|
||||||
"version": "1.2.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/ieee754/-/ieee754-1.2.1.tgz",
|
|
||||||
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "patreon",
|
|
||||||
"url": "https://www.patreon.com/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "consulting",
|
|
||||||
"url": "https://feross.org/support"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/ignore": {
|
"node_modules/ignore": {
|
||||||
"version": "5.3.2",
|
"version": "5.3.2",
|
||||||
"resolved": "https://registry.npmmirror.com/ignore/-/ignore-5.3.2.tgz",
|
"resolved": "https://registry.npmmirror.com/ignore/-/ignore-5.3.2.tgz",
|
||||||
@@ -3304,11 +3157,6 @@
|
|||||||
"resolved": "https://registry.npmmirror.com/inherits/-/inherits-2.0.4.tgz",
|
"resolved": "https://registry.npmmirror.com/inherits/-/inherits-2.0.4.tgz",
|
||||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
|
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
|
||||||
},
|
},
|
||||||
"node_modules/ini": {
|
|
||||||
"version": "1.3.8",
|
|
||||||
"resolved": "https://registry.npmmirror.com/ini/-/ini-1.3.8.tgz",
|
|
||||||
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
|
|
||||||
},
|
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "1.9.1",
|
"version": "1.9.1",
|
||||||
"resolved": "https://registry.npmmirror.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
|
"resolved": "https://registry.npmmirror.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
|
||||||
@@ -3567,11 +3415,11 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/lucide-react": {
|
"node_modules/lucide-react": {
|
||||||
"version": "0.460.0",
|
"version": "0.576.0",
|
||||||
"resolved": "https://registry.npmmirror.com/lucide-react/-/lucide-react-0.460.0.tgz",
|
"resolved": "https://registry.npmmirror.com/lucide-react/-/lucide-react-0.576.0.tgz",
|
||||||
"integrity": "sha512-BVtq/DykVeIvRTJvRAgCsOwaGL8Un3Bxh8MbDxMhEWlZay3T4IpEKDEpwt5KZ0KJMHzgm6jrltxlT5eXOWXDHg==",
|
"integrity": "sha512-koNxU14BXrxUfZQ9cUaP0ES1uyPZKYDjk31FQZB6dQ/x+tXk979sVAn9ppZ/pVeJJyOxVM8j1E+8QEuSc02Vug==",
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc"
|
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/mapbox-gl": {
|
"node_modules/mapbox-gl": {
|
||||||
@@ -3706,17 +3554,6 @@
|
|||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/mimic-response": {
|
|
||||||
"version": "3.1.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/mimic-response/-/mimic-response-3.1.0.tgz",
|
|
||||||
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/sindresorhus"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/minimatch": {
|
"node_modules/minimatch": {
|
||||||
"version": "3.1.5",
|
"version": "3.1.5",
|
||||||
"resolved": "https://registry.npmmirror.com/minimatch/-/minimatch-3.1.5.tgz",
|
"resolved": "https://registry.npmmirror.com/minimatch/-/minimatch-3.1.5.tgz",
|
||||||
@@ -3737,11 +3574,6 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/mkdirp-classic": {
|
|
||||||
"version": "0.5.3",
|
|
||||||
"resolved": "https://registry.npmmirror.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
|
|
||||||
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
|
|
||||||
},
|
|
||||||
"node_modules/ms": {
|
"node_modules/ms": {
|
||||||
"version": "2.1.3",
|
"version": "2.1.3",
|
||||||
"resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz",
|
"resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz",
|
||||||
@@ -3781,11 +3613,6 @@
|
|||||||
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
|
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/napi-build-utils": {
|
|
||||||
"version": "2.0.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
|
|
||||||
"integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA=="
|
|
||||||
},
|
|
||||||
"node_modules/natural-compare": {
|
"node_modules/natural-compare": {
|
||||||
"version": "1.4.0",
|
"version": "1.4.0",
|
||||||
"resolved": "https://registry.npmmirror.com/natural-compare/-/natural-compare-1.4.0.tgz",
|
"resolved": "https://registry.npmmirror.com/natural-compare/-/natural-compare-1.4.0.tgz",
|
||||||
@@ -3800,28 +3627,6 @@
|
|||||||
"node": ">= 0.6"
|
"node": ">= 0.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/node-abi": {
|
|
||||||
"version": "3.87.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/node-abi/-/node-abi-3.87.0.tgz",
|
|
||||||
"integrity": "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"semver": "^7.3.5"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/node-abi/node_modules/semver": {
|
|
||||||
"version": "7.7.4",
|
|
||||||
"resolved": "https://registry.npmmirror.com/semver/-/semver-7.7.4.tgz",
|
|
||||||
"integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
|
|
||||||
"bin": {
|
|
||||||
"semver": "bin/semver.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/node-releases": {
|
"node_modules/node-releases": {
|
||||||
"version": "2.0.27",
|
"version": "2.0.27",
|
||||||
"resolved": "https://registry.npmmirror.com/node-releases/-/node-releases-2.0.27.tgz",
|
"resolved": "https://registry.npmmirror.com/node-releases/-/node-releases-2.0.27.tgz",
|
||||||
@@ -3876,14 +3681,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/once": {
|
|
||||||
"version": "1.4.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/once/-/once-1.4.0.tgz",
|
|
||||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
|
||||||
"dependencies": {
|
|
||||||
"wrappy": "1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/optionator": {
|
"node_modules/optionator": {
|
||||||
"version": "0.9.4",
|
"version": "0.9.4",
|
||||||
"resolved": "https://registry.npmmirror.com/optionator/-/optionator-0.9.4.tgz",
|
"resolved": "https://registry.npmmirror.com/optionator/-/optionator-0.9.4.tgz",
|
||||||
@@ -4188,32 +3985,6 @@
|
|||||||
"resolved": "https://registry.npmmirror.com/potpack/-/potpack-2.1.0.tgz",
|
"resolved": "https://registry.npmmirror.com/potpack/-/potpack-2.1.0.tgz",
|
||||||
"integrity": "sha512-pcaShQc1Shq0y+E7GqJqvZj8DTthWV1KeHGdi0Z6IAin2Oi3JnLCOfwnCo84qc+HAp52wT9nK9H7FAJp5a44GQ=="
|
"integrity": "sha512-pcaShQc1Shq0y+E7GqJqvZj8DTthWV1KeHGdi0Z6IAin2Oi3JnLCOfwnCo84qc+HAp52wT9nK9H7FAJp5a44GQ=="
|
||||||
},
|
},
|
||||||
"node_modules/prebuild-install": {
|
|
||||||
"version": "7.1.3",
|
|
||||||
"resolved": "https://registry.npmmirror.com/prebuild-install/-/prebuild-install-7.1.3.tgz",
|
|
||||||
"integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
|
|
||||||
"deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.",
|
|
||||||
"dependencies": {
|
|
||||||
"detect-libc": "^2.0.0",
|
|
||||||
"expand-template": "^2.0.3",
|
|
||||||
"github-from-package": "0.0.0",
|
|
||||||
"minimist": "^1.2.3",
|
|
||||||
"mkdirp-classic": "^0.5.3",
|
|
||||||
"napi-build-utils": "^2.0.0",
|
|
||||||
"node-abi": "^3.3.0",
|
|
||||||
"pump": "^3.0.0",
|
|
||||||
"rc": "^1.2.7",
|
|
||||||
"simple-get": "^4.0.0",
|
|
||||||
"tar-fs": "^2.0.0",
|
|
||||||
"tunnel-agent": "^0.6.0"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"prebuild-install": "bin.js"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/prelude-ls": {
|
"node_modules/prelude-ls": {
|
||||||
"version": "1.2.1",
|
"version": "1.2.1",
|
||||||
"resolved": "https://registry.npmmirror.com/prelude-ls/-/prelude-ls-1.2.1.tgz",
|
"resolved": "https://registry.npmmirror.com/prelude-ls/-/prelude-ls-1.2.1.tgz",
|
||||||
@@ -4240,15 +4011,6 @@
|
|||||||
"node": ">= 0.10"
|
"node": ">= 0.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/pump": {
|
|
||||||
"version": "3.0.4",
|
|
||||||
"resolved": "https://registry.npmmirror.com/pump/-/pump-3.0.4.tgz",
|
|
||||||
"integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==",
|
|
||||||
"dependencies": {
|
|
||||||
"end-of-stream": "^1.1.0",
|
|
||||||
"once": "^1.3.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/punycode": {
|
"node_modules/punycode": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmmirror.com/punycode/-/punycode-2.3.1.tgz",
|
"resolved": "https://registry.npmmirror.com/punycode/-/punycode-2.3.1.tgz",
|
||||||
@@ -4319,28 +4081,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/rc": {
|
|
||||||
"version": "1.2.8",
|
|
||||||
"resolved": "https://registry.npmmirror.com/rc/-/rc-1.2.8.tgz",
|
|
||||||
"integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
|
|
||||||
"dependencies": {
|
|
||||||
"deep-extend": "^0.6.0",
|
|
||||||
"ini": "~1.3.0",
|
|
||||||
"minimist": "^1.2.0",
|
|
||||||
"strip-json-comments": "~2.0.1"
|
|
||||||
},
|
|
||||||
"bin": {
|
|
||||||
"rc": "cli.js"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/rc/node_modules/strip-json-comments": {
|
|
||||||
"version": "2.0.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
|
|
||||||
"integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=0.10.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/react": {
|
"node_modules/react": {
|
||||||
"version": "18.3.1",
|
"version": "18.3.1",
|
||||||
"resolved": "https://registry.npmmirror.com/react/-/react-18.3.1.tgz",
|
"resolved": "https://registry.npmmirror.com/react/-/react-18.3.1.tgz",
|
||||||
@@ -4396,6 +4136,54 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/react-router": {
|
||||||
|
"version": "7.13.1",
|
||||||
|
"resolved": "https://registry.npmmirror.com/react-router/-/react-router-7.13.1.tgz",
|
||||||
|
"integrity": "sha512-td+xP4X2/6BJvZoX6xw++A2DdEi++YypA69bJUV5oVvqf6/9/9nNlD70YO1e9d3MyamJEBQFEzk6mbfDYbqrSA==",
|
||||||
|
"dependencies": {
|
||||||
|
"cookie": "^1.0.1",
|
||||||
|
"set-cookie-parser": "^2.6.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">=18",
|
||||||
|
"react-dom": ">=18"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react-router-dom": {
|
||||||
|
"version": "7.13.1",
|
||||||
|
"resolved": "https://registry.npmmirror.com/react-router-dom/-/react-router-dom-7.13.1.tgz",
|
||||||
|
"integrity": "sha512-UJnV3Rxc5TgUPJt2KJpo1Jpy0OKQr0AjgbZzBFjaPJcFOb2Y8jA5H3LT8HUJAiRLlWrEXWHbF1Z4SCZaQjWDHw==",
|
||||||
|
"dependencies": {
|
||||||
|
"react-router": "7.13.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">=18",
|
||||||
|
"react-dom": ">=18"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/react-router/node_modules/cookie": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmmirror.com/cookie/-/cookie-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"type": "opencollective",
|
||||||
|
"url": "https://opencollective.com/express"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/read-cache": {
|
"node_modules/read-cache": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmmirror.com/read-cache/-/read-cache-1.0.0.tgz",
|
"resolved": "https://registry.npmmirror.com/read-cache/-/read-cache-1.0.0.tgz",
|
||||||
@@ -4405,19 +4193,6 @@
|
|||||||
"pify": "^2.3.0"
|
"pify": "^2.3.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/readable-stream": {
|
|
||||||
"version": "3.6.2",
|
|
||||||
"resolved": "https://registry.npmmirror.com/readable-stream/-/readable-stream-3.6.2.tgz",
|
|
||||||
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
|
|
||||||
"dependencies": {
|
|
||||||
"inherits": "^2.0.3",
|
|
||||||
"string_decoder": "^1.1.1",
|
|
||||||
"util-deprecate": "^1.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/readdirp": {
|
"node_modules/readdirp": {
|
||||||
"version": "3.6.0",
|
"version": "3.6.0",
|
||||||
"resolved": "https://registry.npmmirror.com/readdirp/-/readdirp-3.6.0.tgz",
|
"resolved": "https://registry.npmmirror.com/readdirp/-/readdirp-3.6.0.tgz",
|
||||||
@@ -4645,6 +4420,11 @@
|
|||||||
"node": ">= 0.8.0"
|
"node": ">= 0.8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/set-cookie-parser": {
|
||||||
|
"version": "2.7.2",
|
||||||
|
"resolved": "https://registry.npmmirror.com/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz",
|
||||||
|
"integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw=="
|
||||||
|
},
|
||||||
"node_modules/set-value": {
|
"node_modules/set-value": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
"resolved": "https://registry.npmmirror.com/set-value/-/set-value-2.0.1.tgz",
|
"resolved": "https://registry.npmmirror.com/set-value/-/set-value-2.0.1.tgz",
|
||||||
@@ -4753,49 +4533,6 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/simple-concat": {
|
|
||||||
"version": "1.0.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/simple-concat/-/simple-concat-1.0.1.tgz",
|
|
||||||
"integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "patreon",
|
|
||||||
"url": "https://www.patreon.com/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "consulting",
|
|
||||||
"url": "https://feross.org/support"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/simple-get": {
|
|
||||||
"version": "4.0.1",
|
|
||||||
"resolved": "https://registry.npmmirror.com/simple-get/-/simple-get-4.0.1.tgz",
|
|
||||||
"integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
|
|
||||||
"funding": [
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "patreon",
|
|
||||||
"url": "https://www.patreon.com/feross"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "consulting",
|
|
||||||
"url": "https://feross.org/support"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"dependencies": {
|
|
||||||
"decompress-response": "^6.0.0",
|
|
||||||
"once": "^1.3.1",
|
|
||||||
"simple-concat": "^1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/size-sensor": {
|
"node_modules/size-sensor": {
|
||||||
"version": "1.0.3",
|
"version": "1.0.3",
|
||||||
"resolved": "https://registry.npmmirror.com/size-sensor/-/size-sensor-1.0.3.tgz",
|
"resolved": "https://registry.npmmirror.com/size-sensor/-/size-sensor-1.0.3.tgz",
|
||||||
@@ -4881,6 +4618,11 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/sql.js": {
|
||||||
|
"version": "1.14.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/sql.js/-/sql.js-1.14.0.tgz",
|
||||||
|
"integrity": "sha512-NXYh+kFqLiYRCNAaHD0PcbjFgXyjuolEKLMk5vRt2DgPENtF1kkNzzMlg42dUk5wIsH8MhUzsRhaUxIisoSlZQ=="
|
||||||
|
},
|
||||||
"node_modules/statuses": {
|
"node_modules/statuses": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmmirror.com/statuses/-/statuses-2.0.2.tgz",
|
"resolved": "https://registry.npmmirror.com/statuses/-/statuses-2.0.2.tgz",
|
||||||
@@ -4889,14 +4631,6 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/string_decoder": {
|
|
||||||
"version": "1.3.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/string_decoder/-/string_decoder-1.3.0.tgz",
|
|
||||||
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
|
|
||||||
"dependencies": {
|
|
||||||
"safe-buffer": "~5.2.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/strip-json-comments": {
|
"node_modules/strip-json-comments": {
|
||||||
"version": "3.1.1",
|
"version": "3.1.1",
|
||||||
"resolved": "https://registry.npmmirror.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
|
"resolved": "https://registry.npmmirror.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz",
|
||||||
@@ -4963,6 +4697,28 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/swagger-ui-dist": {
|
||||||
|
"version": "5.32.0",
|
||||||
|
"resolved": "https://registry.npmmirror.com/swagger-ui-dist/-/swagger-ui-dist-5.32.0.tgz",
|
||||||
|
"integrity": "sha512-nKZB0OuDvacB0s/lC2gbge+RigYvGRGpLLMWMFxaTUwfM+CfndVk9Th2IaTinqXiz6Mn26GK2zriCpv6/+5m3Q==",
|
||||||
|
"dependencies": {
|
||||||
|
"@scarf/scarf": "=1.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/swagger-ui-express": {
|
||||||
|
"version": "5.0.1",
|
||||||
|
"resolved": "https://registry.npmmirror.com/swagger-ui-express/-/swagger-ui-express-5.0.1.tgz",
|
||||||
|
"integrity": "sha512-SrNU3RiBGTLLmFU8GIJdOdanJTl4TOmT27tt3bWWHppqYmAZ6IDuEuBvMU6nZq0zLEe6b/1rACXCgLZqO6ZfrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"swagger-ui-dist": ">=5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= v0.10.32"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"express": ">=4.0.0 || >=5.0.0-beta"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/tailwindcss": {
|
"node_modules/tailwindcss": {
|
||||||
"version": "3.4.19",
|
"version": "3.4.19",
|
||||||
"resolved": "https://registry.npmmirror.com/tailwindcss/-/tailwindcss-3.4.19.tgz",
|
"resolved": "https://registry.npmmirror.com/tailwindcss/-/tailwindcss-3.4.19.tgz",
|
||||||
@@ -5000,32 +4756,6 @@
|
|||||||
"node": ">=14.0.0"
|
"node": ">=14.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/tar-fs": {
|
|
||||||
"version": "2.1.4",
|
|
||||||
"resolved": "https://registry.npmmirror.com/tar-fs/-/tar-fs-2.1.4.tgz",
|
|
||||||
"integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"chownr": "^1.1.1",
|
|
||||||
"mkdirp-classic": "^0.5.2",
|
|
||||||
"pump": "^3.0.0",
|
|
||||||
"tar-stream": "^2.1.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/tar-stream": {
|
|
||||||
"version": "2.2.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/tar-stream/-/tar-stream-2.2.0.tgz",
|
|
||||||
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
|
|
||||||
"dependencies": {
|
|
||||||
"bl": "^4.0.3",
|
|
||||||
"end-of-stream": "^1.4.1",
|
|
||||||
"fs-constants": "^1.0.0",
|
|
||||||
"inherits": "^2.0.3",
|
|
||||||
"readable-stream": "^3.1.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/thenify": {
|
"node_modules/thenify": {
|
||||||
"version": "3.3.1",
|
"version": "3.3.1",
|
||||||
"resolved": "https://registry.npmmirror.com/thenify/-/thenify-3.3.1.tgz",
|
"resolved": "https://registry.npmmirror.com/thenify/-/thenify-3.3.1.tgz",
|
||||||
@@ -5140,17 +4870,6 @@
|
|||||||
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz",
|
"resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.3.0.tgz",
|
||||||
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg=="
|
"integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg=="
|
||||||
},
|
},
|
||||||
"node_modules/tunnel-agent": {
|
|
||||||
"version": "0.6.0",
|
|
||||||
"resolved": "https://registry.npmmirror.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
|
||||||
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
|
|
||||||
"dependencies": {
|
|
||||||
"safe-buffer": "^5.0.1"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": "*"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/type-check": {
|
"node_modules/type-check": {
|
||||||
"version": "0.4.0",
|
"version": "0.4.0",
|
||||||
"resolved": "https://registry.npmmirror.com/type-check/-/type-check-0.4.0.tgz",
|
"resolved": "https://registry.npmmirror.com/type-check/-/type-check-0.4.0.tgz",
|
||||||
@@ -5288,7 +5007,8 @@
|
|||||||
"node_modules/util-deprecate": {
|
"node_modules/util-deprecate": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmmirror.com/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
"resolved": "https://registry.npmmirror.com/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
||||||
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
|
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
|
||||||
|
"dev": true
|
||||||
},
|
},
|
||||||
"node_modules/utils-merge": {
|
"node_modules/utils-merge": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
@@ -5389,11 +5109,6 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/wrappy": {
|
|
||||||
"version": "1.0.2",
|
|
||||||
"resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz",
|
|
||||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
|
|
||||||
},
|
|
||||||
"node_modules/ws": {
|
"node_modules/ws": {
|
||||||
"version": "8.19.0",
|
"version": "8.19.0",
|
||||||
"resolved": "https://registry.npmmirror.com/ws/-/ws-8.19.0.tgz",
|
"resolved": "https://registry.npmmirror.com/ws/-/ws-8.19.0.tgz",
|
||||||
|
|||||||
19
package.json
19
package.json
@@ -4,25 +4,38 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
"start": "./start.sh",
|
||||||
"dev": "vite",
|
"dev": "vite",
|
||||||
"api": "node server/index.js",
|
"api": "node server/index.js",
|
||||||
"api:seed": "node server/seed.js",
|
"api:seed": "node server/seed.js",
|
||||||
|
"crawler": "cd crawler && python main.py",
|
||||||
|
"gdelt": "cd crawler && uvicorn realtime_conflict_service:app --host 0.0.0.0 --port 8000",
|
||||||
|
"crawler:once": "cd crawler && python run_once.py",
|
||||||
|
"crawler:once:range": "./scripts/run-crawler-range.sh",
|
||||||
|
"crawler:test": "cd crawler && python3 -c \"import sys; sys.path.insert(0,'.'); from scrapers.rss_scraper import fetch_all; n=len(fetch_all()); print('RSS 抓取:', n, '条' if n else '(0 条,检查网络或关键词过滤)')\"",
|
||||||
|
"crawler:test:extraction": "cd crawler && python3 -m pytest tests/test_extraction.py -v",
|
||||||
"build": "vite build",
|
"build": "vite build",
|
||||||
"typecheck": "tsc --noEmit",
|
"typecheck": "tsc --noEmit",
|
||||||
"lint": "eslint .",
|
"lint": "eslint .",
|
||||||
"preview": "vite preview"
|
"preview": "vite preview",
|
||||||
|
"verify": "./scripts/verify-pipeline.sh",
|
||||||
|
"verify:full": "./scripts/verify-pipeline.sh --start-crawler",
|
||||||
|
"verify-panels": "node scripts/verify-panels.cjs",
|
||||||
|
"check-crawler-data": "node scripts/check-crawler-data.cjs"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"better-sqlite3": "^11.6.0",
|
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"echarts": "^5.5.0",
|
"echarts": "^5.5.0",
|
||||||
"echarts-for-react": "^3.0.2",
|
"echarts-for-react": "^3.0.2",
|
||||||
"express": "^4.21.1",
|
"express": "^4.21.1",
|
||||||
"lucide-react": "^0.460.0",
|
"lucide-react": "^0.576.0",
|
||||||
"mapbox-gl": "^3.6.0",
|
"mapbox-gl": "^3.6.0",
|
||||||
"react": "^18.3.1",
|
"react": "^18.3.1",
|
||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"react-map-gl": "^7.1.7",
|
"react-map-gl": "^7.1.7",
|
||||||
|
"react-router-dom": "^7.13.1",
|
||||||
|
"sql.js": "^1.11.0",
|
||||||
|
"swagger-ui-express": "^5.0.1",
|
||||||
"ws": "^8.19.0",
|
"ws": "^8.19.0",
|
||||||
"zustand": "^5.0.0"
|
"zustand": "^5.0.0"
|
||||||
},
|
},
|
||||||
|
|||||||
140
scripts/check-crawler-data.cjs
Normal file
140
scripts/check-crawler-data.cjs
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* 检查爬虫写入的数据:条数 + 最近内容(situation_update、news_content、gdelt_events)
|
||||||
|
* 用法(项目根目录): node scripts/check-crawler-data.cjs
|
||||||
|
* 可选:先启动爬虫 npm run gdelt,再启动 API 或直接运行本脚本读 DB
|
||||||
|
*/
|
||||||
|
const path = require('path')
|
||||||
|
const http = require('http')
|
||||||
|
|
||||||
|
const projectRoot = path.resolve(__dirname, '..')
|
||||||
|
process.chdir(projectRoot)
|
||||||
|
|
||||||
|
const db = require('../server/db')
|
||||||
|
|
||||||
|
const CRAWLER_URL = process.env.CRAWLER_URL || 'http://localhost:8000'
|
||||||
|
const SHOW_ROWS = 10
|
||||||
|
|
||||||
|
function fetchCrawlerStatus() {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const url = new URL(`${CRAWLER_URL}/crawler/status`)
|
||||||
|
const req = http.request(
|
||||||
|
{ hostname: url.hostname, port: url.port || 80, path: url.pathname, method: 'GET', timeout: 3000 },
|
||||||
|
(res) => {
|
||||||
|
let body = ''
|
||||||
|
res.on('data', (c) => (body += c))
|
||||||
|
res.on('end', () => {
|
||||||
|
try {
|
||||||
|
resolve(JSON.parse(body))
|
||||||
|
} catch {
|
||||||
|
resolve(null)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
)
|
||||||
|
req.on('error', () => resolve(null))
|
||||||
|
req.end()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function run() {
|
||||||
|
console.log('========================================')
|
||||||
|
console.log('爬虫数据检查(条数 + 最近内容)')
|
||||||
|
console.log('========================================\n')
|
||||||
|
|
||||||
|
// ---------- 爬虫服务状态(可选)----------
|
||||||
|
const status = await fetchCrawlerStatus()
|
||||||
|
if (status) {
|
||||||
|
console.log('--- 爬虫服务状态 GET /crawler/status ---')
|
||||||
|
console.log(' db_path:', status.db_path)
|
||||||
|
console.log(' db_exists:', status.db_exists)
|
||||||
|
console.log(' situation_update_count:', status.situation_update_count)
|
||||||
|
console.log(' last_fetch_items:', status.last_fetch_items, '(本轮抓取条数)')
|
||||||
|
console.log(' last_fetch_inserted:', status.last_fetch_inserted, '(去重后新增)')
|
||||||
|
if (status.last_fetch_error) console.log(' last_fetch_error:', status.last_fetch_error)
|
||||||
|
console.log('')
|
||||||
|
} else {
|
||||||
|
console.log('--- 爬虫服务 ---')
|
||||||
|
console.log(' 未启动或不可达:', CRAWLER_URL)
|
||||||
|
console.log('')
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- situation_update(事件脉络,看板「近期更新」)----------
|
||||||
|
let situationUpdateRows = []
|
||||||
|
let situationUpdateCount = 0
|
||||||
|
try {
|
||||||
|
situationUpdateCount = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c
|
||||||
|
situationUpdateRows = db
|
||||||
|
.prepare(
|
||||||
|
'SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT ?'
|
||||||
|
)
|
||||||
|
.all(SHOW_ROWS)
|
||||||
|
} catch (e) {
|
||||||
|
console.log('situation_update 表读取失败:', e.message)
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('--- situation_update(事件脉络)---')
|
||||||
|
console.log(' 总条数:', situationUpdateCount)
|
||||||
|
if (situationUpdateRows.length > 0) {
|
||||||
|
console.log(' 最近', situationUpdateRows.length, '条:')
|
||||||
|
situationUpdateRows.forEach((r, i) => {
|
||||||
|
const summary = (r.summary || '').slice(0, 50)
|
||||||
|
console.log(` ${i + 1}. [${r.timestamp}] ${r.category}/${r.severity} ${summary}${summary.length >= 50 ? '…' : ''}`)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- news_content(资讯表,爬虫去重后写入)----------
|
||||||
|
let newsCount = 0
|
||||||
|
let newsRows = []
|
||||||
|
try {
|
||||||
|
newsCount = db.prepare('SELECT COUNT(*) as c FROM news_content').get().c
|
||||||
|
newsRows = db
|
||||||
|
.prepare(
|
||||||
|
'SELECT title, summary, source, published_at, category, severity FROM news_content ORDER BY published_at DESC LIMIT ?'
|
||||||
|
)
|
||||||
|
.all(SHOW_ROWS)
|
||||||
|
} catch (e) {
|
||||||
|
console.log('news_content 表读取失败:', e.message)
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('--- news_content(资讯表)---')
|
||||||
|
console.log(' 总条数:', newsCount)
|
||||||
|
if (newsRows.length > 0) {
|
||||||
|
console.log(' 最近', newsRows.length, '条:')
|
||||||
|
newsRows.forEach((r, i) => {
|
||||||
|
const title = (r.title || '').slice(0, 45)
|
||||||
|
console.log(` ${i + 1}. [${r.published_at || ''}] ${r.source || ''} ${title}${title.length >= 45 ? '…' : ''}`)
|
||||||
|
if (r.summary) console.log(` summary: ${(r.summary || '').slice(0, 60)}…`)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- gdelt_events(地图冲突点)----------
|
||||||
|
let gdeltCount = 0
|
||||||
|
let gdeltRows = []
|
||||||
|
try {
|
||||||
|
gdeltCount = db.prepare('SELECT COUNT(*) as c FROM gdelt_events').get().c
|
||||||
|
gdeltRows = db
|
||||||
|
.prepare('SELECT event_id, event_time, title, impact_score FROM gdelt_events ORDER BY event_time DESC LIMIT 5')
|
||||||
|
.all()
|
||||||
|
} catch (e) {
|
||||||
|
console.log('gdelt_events 表读取失败:', e.message)
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('--- gdelt_events(地图冲突点)---')
|
||||||
|
console.log(' 总条数:', gdeltCount)
|
||||||
|
if (gdeltRows.length > 0) {
|
||||||
|
console.log(' 最近 5 条:')
|
||||||
|
gdeltRows.forEach((r, i) => {
|
||||||
|
const title = (r.title || '').slice(0, 50)
|
||||||
|
console.log(` ${i + 1}. [${r.event_time}] impact=${r.impact_score} ${title}${title.length >= 50 ? '…' : ''}`)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
console.log('========================================')
|
||||||
|
}
|
||||||
|
|
||||||
|
db.initDb().then(() => run()).catch((err) => {
|
||||||
|
console.error('失败:', err.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
61
scripts/check-crawler-panel-connectivity.sh
Executable file
61
scripts/check-crawler-panel-connectivity.sh
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 检查爬虫数据与面板数据是否联通
|
||||||
|
# 用法: ./scripts/check-crawler-panel-connectivity.sh
|
||||||
|
# 需先启动: npm run api;可选: npm run gdelt
|
||||||
|
set -e
|
||||||
|
|
||||||
|
API_URL="${API_URL:-http://localhost:3001}"
|
||||||
|
CRAWLER_URL="${CRAWLER_URL:-http://localhost:8000}"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "爬虫 ↔ 面板 联通检查"
|
||||||
|
echo "API: $API_URL | Crawler: $CRAWLER_URL"
|
||||||
|
echo "=========================================="
|
||||||
|
|
||||||
|
# 1. 爬虫侧:situation_update 条数
|
||||||
|
CRAWLER_SU_COUNT=""
|
||||||
|
if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
CRAWLER_SU_COUNT=$(curl -sf "$CRAWLER_URL/crawler/status" | jq -r '.situation_update_count // "?"')
|
||||||
|
else
|
||||||
|
CRAWLER_SU_COUNT="(需 jq 查看)"
|
||||||
|
fi
|
||||||
|
echo "[爬虫] situation_update 条数: $CRAWLER_SU_COUNT"
|
||||||
|
else
|
||||||
|
echo "[爬虫] 未启动或不可达 (curl $CRAWLER_URL/crawler/status 失败)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. 面板侧:API 返回的 recentUpdates 条数、lastUpdated
|
||||||
|
if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
|
||||||
|
echo "[API] 未启动,请先运行: npm run api"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
RU_LEN=$(echo "$SIT" | jq '.recentUpdates | length')
|
||||||
|
LAST=$(echo "$SIT" | jq -r '.lastUpdated // "?"')
|
||||||
|
echo "[面板] recentUpdates 条数: $RU_LEN | lastUpdated: $LAST"
|
||||||
|
else
|
||||||
|
echo "[面板] 态势数据已获取 (安装 jq 可显示条数)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. 一致性:爬虫写的是 server/data.db,Node 通过 notify 重载后应一致
|
||||||
|
echo ""
|
||||||
|
echo "--- 联动说明 ---"
|
||||||
|
echo " • 事件脉络 (recentUpdates) ← situation_update 表,由爬虫 write_updates() 写入"
|
||||||
|
echo " • 爬虫每次抓取后会 POST $API_URL/api/crawler/notify,Node 会 reloadFromFile() 后广播"
|
||||||
|
echo " • 若爬虫有数据但面板 recentUpdates 很少/为空:检查 Node 终端是否出现 [crawler/notify] DB 已重载"
|
||||||
|
echo " • 若从未出现:检查 API_BASE 是否指向当前 API(默认 http://localhost:3001)"
|
||||||
|
echo " • 战损/基地/力量指数:仅当 AI/规则从新闻中提取到数字时才会更新,多数新闻不会触发"
|
||||||
|
echo "=========================================="
|
||||||
|
|
||||||
|
# 4. 可选:触发一次 notify 看 Node 是否重载(不启动爬虫时可用于测试)
|
||||||
|
# 非交互时跳过;交互时可用: echo y | ./scripts/check-crawler-panel-connectivity.sh
|
||||||
|
if [[ -t 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
read -r -p "是否发送一次 POST /api/crawler/notify 测试 Node 重载? [y/N] " ans
|
||||||
|
if [[ "${ans,,}" = "y" ]]; then
|
||||||
|
curl -sf -X POST "$API_URL/api/crawler/notify" && echo " 已发送 notify,请看 Node 终端是否打印 [crawler/notify] DB 已重载"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
78
scripts/debug-panels-focus.sh
Executable file
78
scripts/debug-panels-focus.sh
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 仅检查:战损、基地、地图战区 三块数据
|
||||||
|
# 用法: ./scripts/debug-panels-focus.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
API_URL="${API_URL:-http://localhost:3001}"
|
||||||
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "战损 / 基地 / 地图战区 — 数据检查"
|
||||||
|
echo "API: $API_URL | DB: $DB_PATH"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- API 连通 ----------
|
||||||
|
if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
|
||||||
|
echo "✗ API 无响应,请先运行: npm run api"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✓ API 正常"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
|
||||||
|
|
||||||
|
# ---------- 1. 战损 ----------
|
||||||
|
echo "[1] 战损 (combat_losses)"
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
us_k=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed // "?"')
|
||||||
|
us_w=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.wounded // "?"')
|
||||||
|
ir_k=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.killed // "?"')
|
||||||
|
ir_w=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.wounded // "?"')
|
||||||
|
echo " 美军 阵亡=$us_k 受伤=$us_w | 伊朗 阵亡=$ir_k 受伤=$ir_w"
|
||||||
|
echo " conflictStats: $(echo "$SIT" | jq -c '.conflictStats')"
|
||||||
|
else
|
||||||
|
echo " (安装 jq 可显示详细数字)"
|
||||||
|
fi
|
||||||
|
if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
|
||||||
|
echo " 表 combat_losses:"
|
||||||
|
sqlite3 "$DB_PATH" "SELECT side, personnel_killed, personnel_wounded, bases_destroyed, bases_damaged FROM combat_losses" 2>/dev/null | while read -r line; do echo " $line"; done
|
||||||
|
fi
|
||||||
|
echo " 数据来源: seed 初始;爬虫从新闻提取 combat_losses_delta 后 db_merge 增量叠加。不更新→检查是否跑 gdelt、提取器是否输出、新闻是否含伤亡数字。"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- 2. 基地 ----------
|
||||||
|
echo "[2] 基地 (key_location)"
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
us_loc=$(echo "$SIT" | jq -r '.usForces.keyLocations | length')
|
||||||
|
ir_loc=$(echo "$SIT" | jq -r '.iranForces.keyLocations | length')
|
||||||
|
us_attacked=$(echo "$SIT" | jq -r '[.usForces.keyLocations[] | select(.status == "attacked")] | length')
|
||||||
|
ir_attacked=$(echo "$SIT" | jq -r '[.iranForces.keyLocations[] | select(.status == "attacked")] | length')
|
||||||
|
echo " 美军 据点=$us_loc 遭袭=$us_attacked | 伊朗 据点=$ir_loc 遭袭=$ir_attacked"
|
||||||
|
fi
|
||||||
|
if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
|
||||||
|
echo " 表 key_location 遭袭/有损伤的:"
|
||||||
|
sqlite3 "$DB_PATH" "SELECT side, name, status, damage_level FROM key_location WHERE status != 'operational' OR damage_level IS NOT NULL LIMIT 10" 2>/dev/null | while read -r line; do echo " $line"; done
|
||||||
|
fi
|
||||||
|
echo " 数据来源: seed 写入全部据点;爬虫只更新 status/damage_level,需 name_keywords 与 name LIKE 匹配。不更新→检查新闻是否提基地遭袭、关键词与 seed name 是否一致。"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- 3. 地图战区 ----------
|
||||||
|
echo "[3] 地图战区 (gdelt_events + conflict_stats)"
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
ev_cnt=$(echo "$SIT" | jq -r '.conflictEvents | length')
|
||||||
|
echo " conflictEvents 条数: $ev_cnt"
|
||||||
|
echo " conflictStats: $(echo "$SIT" | jq -c '.conflictStats')"
|
||||||
|
fi
|
||||||
|
if [[ -f "$DB_PATH" ]] && command -v sqlite3 &>/dev/null; then
|
||||||
|
n_ev=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM gdelt_events" 2>/dev/null || echo "0")
|
||||||
|
echo " 表 gdelt_events 行数: $n_ev"
|
||||||
|
sqlite3 "$DB_PATH" "SELECT total_events, high_impact_events, estimated_casualties, estimated_strike_count FROM conflict_stats WHERE id = 1" 2>/dev/null | while read -r line; do echo " conflict_stats: $line"; done
|
||||||
|
fi
|
||||||
|
echo " 数据来源: GDELT API 写入;或 GDELT_DISABLED=1 时由 situation_update 回填。无点→跑 gdelt 或开启 RSS 回填。"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "详细说明与排查顺序见: docs/DEBUG_战损_基地_地图.md"
|
||||||
|
echo "=========================================="
|
||||||
83
scripts/debug-panels.sh
Executable file
83
scripts/debug-panels.sh
Executable file
@@ -0,0 +1,83 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 看板板块数据快速检查:各表/API 与板块对应关系,便于逐项 debug
|
||||||
|
# 用法: ./scripts/debug-panels.sh
|
||||||
|
# 依赖: curl;可选 jq、sqlite3 以输出更清晰
|
||||||
|
|
||||||
|
set -e
|
||||||
|
API_URL="${API_URL:-http://localhost:3001}"
|
||||||
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
DB_PATH="${DB_PATH:-$PROJECT_ROOT/server/data.db}"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "看板板块数据检查 (DEBUG_PANELS)"
|
||||||
|
echo "API: $API_URL | DB: $DB_PATH"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- 1. API 健康与态势摘要 ----------
|
||||||
|
echo "[1] API 与态势摘要"
|
||||||
|
if ! curl -sf "$API_URL/api/health" >/dev/null 2>&1; then
|
||||||
|
echo " ✗ API 无响应,请先运行: npm run api"
|
||||||
|
echo " 后续表检查将跳过(依赖 API 或直接读 DB)"
|
||||||
|
else
|
||||||
|
echo " ✓ API 正常"
|
||||||
|
SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
echo " lastUpdated: $(echo "$SIT" | jq -r '.lastUpdated // "?"')"
|
||||||
|
echo " recentUpdates: $(echo "$SIT" | jq -r '.recentUpdates | length') 条 → 事件脉络"
|
||||||
|
echo " conflictEvents: $(echo "$SIT" | jq -r '.conflictEvents | length') 条 → 地图冲突点"
|
||||||
|
echo " us powerIndex: $(echo "$SIT" | jq -r '.usForces.powerIndex.overall') → 顶栏/战力图"
|
||||||
|
echo " iran powerIndex: $(echo "$SIT" | jq -r '.iranForces.powerIndex.overall')"
|
||||||
|
echo " us keyLocations: $(echo "$SIT" | jq -r '.usForces.keyLocations | length') 条 → 美国基地/地图"
|
||||||
|
echo " iran keyLocations: $(echo "$SIT" | jq -r '.iranForces.keyLocations | length') 条 → 伊朗基地/地图"
|
||||||
|
echo " us combatLosses: killed=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed') wounded=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.wounded')"
|
||||||
|
echo " wallStreet points: $(echo "$SIT" | jq -r '.usForces.wallStreetInvestmentTrend | length') → 华尔街图"
|
||||||
|
echo " retaliation: $(echo "$SIT" | jq -r '.iranForces.retaliationSentiment') (history: $(echo "$SIT" | jq -r '.iranForces.retaliationSentimentHistory | length') 条)"
|
||||||
|
else
|
||||||
|
echo " (安装 jq 可显示详细字段) 态势已拉取,长度: ${#SIT}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- 2. 各表行数(直接读 DB)----------
|
||||||
|
echo "[2] 数据库表行数(与板块对应)"
|
||||||
|
if ! [[ -f "$DB_PATH" ]]; then
|
||||||
|
echo " ✗ 数据库文件不存在: $DB_PATH"
|
||||||
|
echo " 请先 seed: node server/seed.js 或 启动 API 后由 initDb 创建"
|
||||||
|
elif ! command -v sqlite3 &>/dev/null; then
|
||||||
|
echo " (未安装 sqlite3,跳过表统计。可安装后重试)"
|
||||||
|
else
|
||||||
|
TABLES="force_summary power_index force_asset key_location combat_losses wall_street_trend retaliation_current retaliation_history situation_update situation gdelt_events conflict_stats news_content"
|
||||||
|
for t in $TABLES; do
|
||||||
|
n=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM $t" 2>/dev/null || echo "?")
|
||||||
|
case "$t" in
|
||||||
|
force_summary) desc="力量摘要(美/伊)" ;;
|
||||||
|
power_index) desc="战力指数 → 顶栏/战力图" ;;
|
||||||
|
force_asset) desc="资产列表 → 左右侧摘要" ;;
|
||||||
|
key_location) desc="据点 → 地图/美伊基地面板" ;;
|
||||||
|
combat_losses) desc="战损 → 战损面板" ;;
|
||||||
|
wall_street_trend) desc="华尔街趋势图" ;;
|
||||||
|
retaliation_current) desc="报复当前值" ;;
|
||||||
|
retaliation_history) desc="报复历史 → 仪表盘" ;;
|
||||||
|
situation_update) desc="事件脉络 → 时间线" ;;
|
||||||
|
situation) desc="updated_at → 顶栏时间" ;;
|
||||||
|
gdelt_events) desc="冲突点 → 地图图层" ;;
|
||||||
|
conflict_stats) desc="冲突统计 → 战损区" ;;
|
||||||
|
news_content) desc="资讯表 → /api/news" ;;
|
||||||
|
*) desc="" ;;
|
||||||
|
esac
|
||||||
|
printf " %-22s %6s %s\n" "$t" "$n" "$desc"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# ---------- 3. 板块健康简要判断 ----------
|
||||||
|
echo "[3] 板块数据来源与可能问题"
|
||||||
|
echo " • 仅 seed、爬虫不写: force_summary, power_index, force_asset"
|
||||||
|
echo " • 爬虫可更新: situation_update(事件脉络), key_location(基地状态), combat_losses(战损), retaliation_*, wall_street_trend, gdelt_events"
|
||||||
|
echo " • 事件脉络不更新 → 检查爬虫是否启动、是否调用 POST /api/crawler/notify"
|
||||||
|
echo " • 战损/基地不更新 → 检查是否跑 npm run gdelt、提取器是否输出、新闻是否含相关表述"
|
||||||
|
echo " • 地图无冲突点 → 检查 gdelt_events 是否有数据、GDELT 或 RSS 回填是否执行"
|
||||||
|
echo ""
|
||||||
|
echo "详细逐板块说明见: docs/DEBUG_PANELS.md"
|
||||||
|
echo "=========================================="
|
||||||
17
scripts/run-crawler-range.sh
Executable file
17
scripts/run-crawler-range.sh
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 按时间范围跑一轮爬虫(RSS:仅保留指定起始时间之后的条目)
|
||||||
|
# 用法:
|
||||||
|
# ./scripts/run-crawler-range.sh # 默认从 2026-02-28 0:00 到现在
|
||||||
|
# ./scripts/run-crawler-range.sh 2026-02-25T00:00:00
|
||||||
|
#
|
||||||
|
# GDELT 时间范围需在启动 gdelt 服务时设置,例如:
|
||||||
|
# GDELT_TIMESPAN=3d npm run gdelt
|
||||||
|
|
||||||
|
set -e
|
||||||
|
START="${1:-2026-02-28T00:00:00}"
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
echo "RSS 抓取时间范围: 仅保留 ${START} 之后"
|
||||||
|
echo "运行: cd crawler && CRAWL_START_DATE=${START} python run_once.py"
|
||||||
|
echo ""
|
||||||
|
export CRAWL_START_DATE="$START"
|
||||||
|
(cd crawler && python3 run_once.py)
|
||||||
81
scripts/verify-panels.cjs
Normal file
81
scripts/verify-panels.cjs
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* 代码层执行看板验证:直接调用 getSituation() 与 DB,输出战损 / 基地 / 地图战区 结果。
|
||||||
|
* 用法(项目根目录): node scripts/verify-panels.cjs
|
||||||
|
*/
|
||||||
|
const path = require('path')
|
||||||
|
|
||||||
|
const projectRoot = path.resolve(__dirname, '..')
|
||||||
|
process.chdir(projectRoot)
|
||||||
|
|
||||||
|
const db = require('../server/db')
|
||||||
|
const { getSituation } = require('../server/situationData')
|
||||||
|
|
||||||
|
function run() {
|
||||||
|
const s = getSituation()
|
||||||
|
|
||||||
|
console.log('========================================')
|
||||||
|
console.log('看板数据验证(与 API getSituation 一致)')
|
||||||
|
console.log('========================================\n')
|
||||||
|
|
||||||
|
console.log('lastUpdated:', s.lastUpdated)
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- 1. 战损 ----------
|
||||||
|
console.log('--- [1] 战损 combat_losses ---')
|
||||||
|
const us = s.usForces.combatLosses
|
||||||
|
const ir = s.iranForces.combatLosses
|
||||||
|
console.log('美军 阵亡:', us.personnelCasualties.killed, '受伤:', us.personnelCasualties.wounded)
|
||||||
|
console.log('美军 基地毁/损:', us.bases.destroyed, '/', us.bases.damaged)
|
||||||
|
console.log('美军 战机/舰艇/装甲/车辆:', us.aircraft, us.warships, us.armor, us.vehicles)
|
||||||
|
console.log('伊朗 阵亡:', ir.personnelCasualties.killed, '受伤:', ir.personnelCasualties.wounded)
|
||||||
|
console.log('伊朗 基地毁/损:', ir.bases.destroyed, '/', ir.bases.damaged)
|
||||||
|
console.log('平民合计 killed/wounded:', s.civilianCasualtiesTotal.killed, s.civilianCasualtiesTotal.wounded)
|
||||||
|
console.log('conflictStats:', JSON.stringify(s.conflictStats))
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- 2. 基地(与看板口径一致:美军仅 type===Base,伊朗为 Base/Port/Nuclear/Missile)----------
|
||||||
|
console.log('--- [2] 基地 key_location ---')
|
||||||
|
const usLoc = s.usForces.keyLocations || []
|
||||||
|
const irLoc = s.iranForces.keyLocations || []
|
||||||
|
const usBases = usLoc.filter((l) => l.type === 'Base')
|
||||||
|
const irBases = irLoc.filter((l) => ['Base', 'Port', 'Nuclear', 'Missile'].includes(l.type))
|
||||||
|
const usAttacked = usBases.filter((l) => l.status === 'attacked')
|
||||||
|
const irAttacked = irBases.filter((l) => l.status === 'attacked')
|
||||||
|
console.log('美军 总基地数(仅Base):', usBases.length, '| 遭袭:', usAttacked.length, '(与看板「美军基地态势」一致)')
|
||||||
|
console.log('伊朗 总基地数(Base/Port/Nuclear/Missile):', irBases.length, '| 遭袭:', irAttacked.length, '(与看板「伊朗基地态势」一致)')
|
||||||
|
if (usAttacked.length > 0) {
|
||||||
|
console.log('美军遭袭示例:', usAttacked.slice(0, 3).map((l) => `${l.name}(${l.status},damage=${l.damage_level})`).join(', '))
|
||||||
|
}
|
||||||
|
if (irAttacked.length > 0) {
|
||||||
|
console.log('伊朗遭袭示例:', irAttacked.slice(0, 3).map((l) => `${l.name}(${l.status},damage=${l.damage_level})`).join(', '))
|
||||||
|
}
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- 3. 地图战区 ----------
|
||||||
|
console.log('--- [3] 地图战区 gdelt_events + conflict_stats ---')
|
||||||
|
const events = s.conflictEvents || []
|
||||||
|
console.log('conflictEvents 条数:', events.length)
|
||||||
|
console.log('conflictStats:', JSON.stringify(s.conflictStats))
|
||||||
|
if (events.length > 0) {
|
||||||
|
console.log('最近 3 条:', events.slice(0, 3).map((e) => `${e.event_time} ${(e.title || '').slice(0, 40)} impact=${e.impact_score}`))
|
||||||
|
}
|
||||||
|
console.log('')
|
||||||
|
|
||||||
|
// ---------- 附加:事件脉络 ----------
|
||||||
|
const updates = s.recentUpdates || []
|
||||||
|
console.log('--- [附] 事件脉络 situation_update ---')
|
||||||
|
console.log('recentUpdates 条数:', updates.length)
|
||||||
|
if (updates.length > 0) {
|
||||||
|
console.log('最新 1 条:', updates[0].timestamp, (updates[0].summary || '').slice(0, 50))
|
||||||
|
}
|
||||||
|
console.log('========================================')
|
||||||
|
}
|
||||||
|
|
||||||
|
db
|
||||||
|
.initDb()
|
||||||
|
.then(() => run())
|
||||||
|
.catch((err) => {
|
||||||
|
console.error('验证失败:', err.message)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
124
scripts/verify-pipeline.sh
Executable file
124
scripts/verify-pipeline.sh
Executable file
@@ -0,0 +1,124 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# 验证爬虫 → 数据库 → API → 前端 全链路
|
||||||
|
# 用法: ./scripts/verify-pipeline.sh [--start-crawler]
|
||||||
|
set -e
|
||||||
|
|
||||||
|
API_URL="${API_URL:-http://localhost:3001}"
|
||||||
|
CRAWLER_URL="${CRAWLER_URL:-http://localhost:8000}"
|
||||||
|
START_CRAWLER=false
|
||||||
|
[[ "${1:-}" = "--start-crawler" ]] && START_CRAWLER=true
|
||||||
|
|
||||||
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "US-Iran 态势面板 链路验证"
|
||||||
|
echo "API: $API_URL | Crawler: $CRAWLER_URL"
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 可选:启动爬虫
|
||||||
|
if $START_CRAWLER; then
|
||||||
|
echo "[0/6] 启动爬虫..."
|
||||||
|
if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
|
||||||
|
echo " ✓ 爬虫已在运行"
|
||||||
|
else
|
||||||
|
cd "$PROJECT_ROOT/crawler"
|
||||||
|
python3 -c "import uvicorn" 2>/dev/null || { echo " 需安装: pip install uvicorn"; exit 1; }
|
||||||
|
uvicorn realtime_conflict_service:app --host 127.0.0.1 --port 8000 &
|
||||||
|
echo " 等待爬虫就绪..."
|
||||||
|
for i in $(seq 1 15); do
|
||||||
|
sleep 2
|
||||||
|
if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
|
||||||
|
echo " ✓ 爬虫已启动"
|
||||||
|
echo " 等待首次 RSS 抓取(约 70 秒)..."
|
||||||
|
sleep 70
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if ! curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then
|
||||||
|
echo " ✗ 爬虫启动超时"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 1. API 健康检查
|
||||||
|
echo "[1/6] API 健康检查..."
|
||||||
|
if curl -sf "$API_URL/api/health" > /dev/null; then
|
||||||
|
echo " ✓ API 正常"
|
||||||
|
else
|
||||||
|
echo " ✗ API 无响应,请先运行: npm run api"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. 态势数据
|
||||||
|
echo "[2/6] 态势数据..."
|
||||||
|
SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}")
|
||||||
|
if echo "$SIT" | grep -q "lastUpdated"; then
|
||||||
|
echo " ✓ 态势数据可读"
|
||||||
|
LAST=$(echo "$SIT" | grep -o '"lastUpdated":"[^"]*"' | head -1)
|
||||||
|
echo " $LAST"
|
||||||
|
else
|
||||||
|
echo " ✗ 态势数据异常"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 3. 爬虫状态
|
||||||
|
echo "[3/6] 爬虫状态..."
|
||||||
|
CRAWLER=$(curl -sf "$CRAWLER_URL/crawler/status" 2>/dev/null || echo "{}")
|
||||||
|
if echo "$CRAWLER" | grep -q "db_path\|db_exists"; then
|
||||||
|
echo " ✓ 爬虫服务可访问"
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
CNT=$(echo "$CRAWLER" | jq -r '.situation_update_count // "?"')
|
||||||
|
echo " situation_update 条数: $CNT"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo " ⚠ 爬虫未启动或不可达(可选,需单独运行爬虫)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. 资讯表
|
||||||
|
echo "[4/6] 资讯表 news_content..."
|
||||||
|
NEWS=$(curl -sf "$API_URL/api/news?limit=3" 2>/dev/null || echo '{"items":[]}')
|
||||||
|
if echo "$NEWS" | grep -q '"items"'; then
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
N=$(echo "$NEWS" | jq '.items | length')
|
||||||
|
echo " ✓ 最近 $N 条资讯"
|
||||||
|
else
|
||||||
|
echo " ✓ 资讯接口可读"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo " ⚠ news_content 可能为空(爬虫未跑或刚启动)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 5. 战损数据
|
||||||
|
echo "[5/6] 战损数据 combat_losses..."
|
||||||
|
if echo "$SIT" | grep -q "personnelCasualties"; then
|
||||||
|
echo " ✓ 战损字段存在"
|
||||||
|
if command -v jq &>/dev/null; then
|
||||||
|
US_K=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed // "?"')
|
||||||
|
IR_K=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.killed // "?"')
|
||||||
|
echo " 美军阵亡: $US_K | 伊朗阵亡: $IR_K"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo " ✗ 战损结构异常"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 6. 通知接口(仅验证可调用)
|
||||||
|
echo "[6/6] 通知接口 POST /api/crawler/notify..."
|
||||||
|
NOTIFY=$(curl -sf -X POST "$API_URL/api/crawler/notify" 2>/dev/null || echo "{}")
|
||||||
|
if echo "$NOTIFY" | grep -q '"ok"'; then
|
||||||
|
echo " ✓ 通知接口正常"
|
||||||
|
else
|
||||||
|
echo " ⚠ 通知接口可能异常"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "验证完成。"
|
||||||
|
echo ""
|
||||||
|
echo "建议:"
|
||||||
|
echo " - 访问 $API_URL/db 查看各表数据"
|
||||||
|
echo " - 爬虫未启动时: ./scripts/verify-pipeline.sh --start-crawler"
|
||||||
|
echo " - 或手动启动: cd crawler && uvicorn realtime_conflict_service:app --port 8000"
|
||||||
|
echo "=========================================="
|
||||||
170
server/README.md
Normal file
170
server/README.md
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
# 后端运行逻辑
|
||||||
|
|
||||||
|
后端是 **Node.js Express + SQLite + WebSocket**,与 Python 爬虫共用同一数据库文件,负责提供「态势数据」API、实时推送和简单统计。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、启动方式
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run api # 启动 server/index.js,默认端口 3001
|
||||||
|
```
|
||||||
|
|
||||||
|
- 端口:`process.env.API_PORT || 3001`
|
||||||
|
- 数据库:`process.env.DB_PATH` 或 `server/data.db`(与爬虫共用)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、整体架构
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────┐
|
||||||
|
│ server/index.js │
|
||||||
|
│ (HTTP Server + WebSocket Server) │
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌───────────────────────────────┼───────────────────────────────┐
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
│ /api/* │ │ /ws │ │ 静态 dist │
|
||||||
|
│ routes.js │ │ WebSocket │ │ (生产) │
|
||||||
|
└──────┬──────┘ └──────┬──────┘ └─────────────┘
|
||||||
|
│ │
|
||||||
|
│ 读/写 │ 广播 situation + stats
|
||||||
|
▼ │
|
||||||
|
┌─────────────┐ │
|
||||||
|
│ db.js │◄─────────────────────┘
|
||||||
|
│ (SQLite) │ getSituation() / getStats()
|
||||||
|
└──────┬──────┘
|
||||||
|
│
|
||||||
|
│ 同文件 data.db
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Python 爬虫 │ 抓取 → 去重 → AI 清洗 → 映射到库字段 → 写表 → POST /api/crawler/notify
|
||||||
|
│ situation_ │ (main.py 或 gdelt 服务;写 situation_update / news_content / combat_losses 等)
|
||||||
|
│ update 等 │
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、核心模块
|
||||||
|
|
||||||
|
| 文件 | 作用 |
|
||||||
|
|------|------|
|
||||||
|
| **index.js** | 创建 HTTP + WebSocket 服务,挂载路由、静态资源、定时广播、爬虫通知回调 |
|
||||||
|
| **routes.js** | 所有 `/api/*` 接口:situation、db/dashboard、visit、feedback、share、stats、events、news 等 |
|
||||||
|
| **situationData.js** | `getSituation()`:从多张表聚合为前端所需的「态势」JSON(军力、基地、战损、事件脉络、GDELT 等) |
|
||||||
|
| **db.js** | SQLite 连接、建表、迁移(better-sqlite3,WAL 模式) |
|
||||||
|
| **stats.js** | `getStats()`:在看人数、累计访问、留言数、分享数 |
|
||||||
|
| **openapi.js** | Swagger/OpenAPI 文档定义 |
|
||||||
|
| **seed.js** | 初始化/重置种子数据(可单独运行 `npm run api:seed`) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、数据流(读)
|
||||||
|
|
||||||
|
1. **前端要「整页态势」**
|
||||||
|
- 请求 `GET /api/situation` → `routes.js` 调用 `getSituation()`
|
||||||
|
- `situationData.js` 从 db 读:`force_summary`、`power_index`、`force_asset`、`key_location`、`combat_losses`、`wall_street_trend`、`retaliation_*`、`situation_update`(最近 50 条)、`gdelt_events`、`conflict_stats` 等
|
||||||
|
- 组装成 `{ lastUpdated, usForces, iranForces, recentUpdates, conflictEvents, conflictStats, civilianCasualtiesTotal }` 返回。
|
||||||
|
|
||||||
|
2. **前端要「事件列表」**
|
||||||
|
- `GET /api/events` 返回 `conflictEvents` + `conflict_stats` + `updated_at`(同样来自 getSituation 的数据)。
|
||||||
|
|
||||||
|
3. **前端要「原始表数据」**
|
||||||
|
- `GET /api/db/dashboard` 返回多张表的 `SELECT *` 结果(含 `situation_update`),供 `/db` 调试页使用。
|
||||||
|
|
||||||
|
4. **WebSocket**
|
||||||
|
- 连接 `ws://host/ws` 时立即收到一条 `{ type: 'situation', data: getSituation(), stats: getStats() }`。
|
||||||
|
- 之后每 3 秒服务端主动广播同结构数据,前端可据此做实时刷新。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、数据流(写)
|
||||||
|
|
||||||
|
### 5.1 爬虫侧写库链路(推荐理解顺序)
|
||||||
|
|
||||||
|
爬虫写入前端库的完整链路如下,**不是**「抓完直接写表」,而是经过去重、AI 清洗、字段映射后再落库:
|
||||||
|
|
||||||
|
1. **爬虫抓取实时数据**
|
||||||
|
- RSS 等源抓取(`scrapers/rss_scraper.fetch_all`),得到原始条目列表。
|
||||||
|
|
||||||
|
2. **数据去重**
|
||||||
|
- 抓取阶段:RSS 内按 (title, url) 去重。
|
||||||
|
- 落库前:按 `content_hash(title, summary, url)` 在 `news_content` 表中去重,仅**未出现过**的条目进入后续流程(`news_storage.save_and_dedup`)。
|
||||||
|
|
||||||
|
3. **去重后按批次推送给 AI 清洗**
|
||||||
|
- 对通过去重的每条/每批数据:
|
||||||
|
- **展示用清洗**:标题/摘要翻译、`clean_news_for_panel` 提炼为符合面板的纯文本与长度(如 summary ≤120 字),`ensure_category` / `ensure_severity` 规范为前端枚举(`cleaner_ai`)。
|
||||||
|
- **结构化提取**(可选):`extractor_ai` / `extractor_dashscope` / `extractor_rules` 从新闻文本中抽取战损、基地状态等,输出符合 `panel_schema` 的结构。
|
||||||
|
- 得到「有效数据」:既有人读的 summary/category/severity,也有可落库的 combat_losses_delta、key_location 等。
|
||||||
|
|
||||||
|
4. **有效数据映射回前端数据库字段**
|
||||||
|
- 事件脉络:清洗后的条目写入 `situation_update`(`db_writer.write_updates`)。
|
||||||
|
- 资讯存档:去重后的新数据写入 `news_content`(已在步骤 2 完成)。
|
||||||
|
- 结构化数据:AI 提取结果通过 `db_merge.merge` 映射到前端表结构,更新 `combat_losses`、`key_location`、`retaliation_*`、`wall_street_trend` 等(与 `situationData.getSituation` 所用字段一致)。
|
||||||
|
|
||||||
|
5. **更新数据库表并通知后端**
|
||||||
|
- 上述表更新完成后,爬虫请求 **POST /api/crawler/notify**。
|
||||||
|
- 后端(index.js)更新 `situation.updated_at` 并调用 `broadcastSituation()`,前端通过 WebSocket 拿到最新态势。
|
||||||
|
|
||||||
|
实现上,**gdelt 服务**(`realtime_conflict_service`)里:先对抓取结果做翻译与清洗,再 `save_and_dedup` 去重落库 `news_content`,用去重后的新项写 `situation_update`,再按批次对这批新项做 AI 提取并 `db_merge.merge` 写战损/基地等表。
|
||||||
|
|
||||||
|
### 5.2 用户行为写入
|
||||||
|
|
||||||
|
- **POST /api/visit**:记 IP 到 `visits`,`visitor_count.total` +1,并触发一次广播。
|
||||||
|
- **POST /api/feedback**:插入 `feedback`。
|
||||||
|
- **POST /api/share**:`share_count.total` +1。
|
||||||
|
|
||||||
|
这些写操作在 `routes.js` 中通过 `db.prepare().run()` 完成。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、API 一览
|
||||||
|
|
||||||
|
| 方法 | 路径 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| GET | /api/health | 健康检查 |
|
||||||
|
| GET | /api/situation | 完整态势(供主面板) |
|
||||||
|
| GET | /api/events | 冲突事件 + 统计 |
|
||||||
|
| GET | /api/db/dashboard | 各表原始数据(供 /db 页) |
|
||||||
|
| GET | /api/news | 资讯列表(news_content 表) |
|
||||||
|
| GET | /api/stats | 在看/累计/留言/分享数 |
|
||||||
|
| POST | /api/visit | 记录访问并返回 stats |
|
||||||
|
| POST | /api/feedback | 提交留言 |
|
||||||
|
| POST | /api/share | 分享计数 +1 |
|
||||||
|
| POST | /api/crawler/notify | 爬虫通知:更新 updated_at 并广播(内部用) |
|
||||||
|
|
||||||
|
- **Swagger**:`http://localhost:3001/api-docs`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、WebSocket 行为
|
||||||
|
|
||||||
|
- **路径**:`/ws`(与 HTTP 同端口)。
|
||||||
|
- **连接时**:服务端发送一条 `{ type: 'situation', data, stats }`。
|
||||||
|
- **定时广播**:`setInterval(broadcastSituation, 3000)` 每 3 秒向所有已连接客户端推送最新 `getSituation()` + `getStats()`。
|
||||||
|
- **爬虫通知**:POST `/api/crawler/notify` 会立即执行一次 `broadcastSituation()`,不必等 3 秒。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、与爬虫的协作
|
||||||
|
|
||||||
|
- **共享 DB**:后端与爬虫都使用同一 `DB_PATH`(默认 `server/data.db`)。
|
||||||
|
- **爬虫写库链路**:爬虫抓取 → 去重 → AI 清洗出有效数据 → 映射到前端库字段 → 更新 `situation_update`、`news_content`、`combat_losses`、`key_location`、`gdelt_events` 等表 → 调用 POST `/api/crawler/notify` 通知后端。
|
||||||
|
- **后端角色**:只读这些表(`getSituation()` 等)并推送;不参与抓取、去重或 AI 清洗,不调度爬虫。
|
||||||
|
|
||||||
|
整体上,后端是「读库 + 聚合 + 推送」的服务;写库来自**爬虫(经过去重与 AI 清洗、字段映射后)**以及**用户行为**(访问/留言/分享)。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、本地验证链路
|
||||||
|
|
||||||
|
1. **启动后端**:`npm run api`(默认 3001)。
|
||||||
|
2. **检查读库**:`curl -s http://localhost:3001/api/situation` 应返回含 `lastUpdated`、`recentUpdates` 的 JSON。
|
||||||
|
3. **检查写库与通知**:爬虫跑完流水线后会 POST `/api/crawler/notify`,后端会更新 `situation.updated_at` 并广播;可再请求 `/api/situation` 看 `lastUpdated` 是否更新。
|
||||||
|
4. **查原始表**:浏览器打开 `http://localhost:3001/api/db/dashboard` 或前端 `/db` 页,查看 `situation_update`、`news_content` 等表。
|
||||||
|
|
||||||
|
爬虫侧完整验证步骤见 **crawler/README.md** 的「本地验证链路」;项目根目录可执行 `./scripts/verify-pipeline.sh` 做一键检查。
|
||||||
Binary file not shown.
275
server/db.js
275
server/db.js
@@ -1,20 +1,69 @@
|
|||||||
const Database = require('better-sqlite3')
|
/**
|
||||||
|
* SQLite 封装:使用 sql.js(纯 JS/WebAssembly,无需 node-gyp)
|
||||||
|
* 对外接口与 better-sqlite3 兼容:db.prepare().get/all/run、db.exec
|
||||||
|
*/
|
||||||
const path = require('path')
|
const path = require('path')
|
||||||
|
const fs = require('fs')
|
||||||
|
|
||||||
const dbPath = path.join(__dirname, 'data.db')
|
const dbPath = process.env.DB_PATH || path.join(__dirname, 'data.db')
|
||||||
const db = new Database(dbPath)
|
let _db = null
|
||||||
|
/** sql.js 构造函数,initDb 时注入,供 reloadFromFile 使用 */
|
||||||
|
let _sqlJs = null
|
||||||
|
|
||||||
// 启用外键
|
function getDb() {
|
||||||
db.pragma('journal_mode = WAL')
|
if (!_db) throw new Error('DB not initialized. Call initDb() first.')
|
||||||
|
return _db
|
||||||
|
}
|
||||||
|
|
||||||
// 建表
|
function wrapDatabase(nativeDb, persist) {
|
||||||
db.exec(`
|
return {
|
||||||
|
prepare(sql) {
|
||||||
|
return {
|
||||||
|
get(...args) {
|
||||||
|
const stmt = nativeDb.prepare(sql)
|
||||||
|
stmt.bind(args.length ? args : null)
|
||||||
|
const row = stmt.step() ? stmt.getAsObject() : undefined
|
||||||
|
stmt.free()
|
||||||
|
return row
|
||||||
|
},
|
||||||
|
all(...args) {
|
||||||
|
const stmt = nativeDb.prepare(sql)
|
||||||
|
stmt.bind(args.length ? args : null)
|
||||||
|
const rows = []
|
||||||
|
while (stmt.step()) rows.push(stmt.getAsObject())
|
||||||
|
stmt.free()
|
||||||
|
return rows
|
||||||
|
},
|
||||||
|
run(...args) {
|
||||||
|
const stmt = nativeDb.prepare(sql)
|
||||||
|
stmt.bind(args.length ? args : null)
|
||||||
|
while (stmt.step());
|
||||||
|
stmt.free()
|
||||||
|
persist()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
exec(sql) {
|
||||||
|
const statements = sql.split(';').map((s) => s.trim()).filter(Boolean)
|
||||||
|
statements.forEach((s) => nativeDb.run(s))
|
||||||
|
persist()
|
||||||
|
},
|
||||||
|
pragma(str) {
|
||||||
|
nativeDb.run('PRAGMA ' + str)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function runMigrations(db) {
|
||||||
|
const exec = (sql) => db.exec(sql)
|
||||||
|
const prepare = (sql) => db.prepare(sql)
|
||||||
|
|
||||||
|
exec(`
|
||||||
CREATE TABLE IF NOT EXISTS situation (
|
CREATE TABLE IF NOT EXISTS situation (
|
||||||
id INTEGER PRIMARY KEY CHECK (id = 1),
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
data TEXT NOT NULL,
|
data TEXT NOT NULL,
|
||||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS force_summary (
|
CREATE TABLE IF NOT EXISTS force_summary (
|
||||||
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
||||||
total_assets INTEGER NOT NULL,
|
total_assets INTEGER NOT NULL,
|
||||||
@@ -26,7 +75,6 @@ db.exec(`
|
|||||||
missile_consumed INTEGER NOT NULL,
|
missile_consumed INTEGER NOT NULL,
|
||||||
missile_stock INTEGER NOT NULL
|
missile_stock INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS power_index (
|
CREATE TABLE IF NOT EXISTS power_index (
|
||||||
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
||||||
overall INTEGER NOT NULL,
|
overall INTEGER NOT NULL,
|
||||||
@@ -34,7 +82,6 @@ db.exec(`
|
|||||||
economic_power INTEGER NOT NULL,
|
economic_power INTEGER NOT NULL,
|
||||||
geopolitical_influence INTEGER NOT NULL
|
geopolitical_influence INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS force_asset (
|
CREATE TABLE IF NOT EXISTS force_asset (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
|
side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
|
||||||
@@ -45,7 +92,6 @@ db.exec(`
|
|||||||
lat REAL,
|
lat REAL,
|
||||||
lng REAL
|
lng REAL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS key_location (
|
CREATE TABLE IF NOT EXISTS key_location (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
|
side TEXT NOT NULL CHECK (side IN ('us', 'iran')),
|
||||||
@@ -55,7 +101,6 @@ db.exec(`
|
|||||||
type TEXT,
|
type TEXT,
|
||||||
region TEXT
|
region TEXT
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS combat_losses (
|
CREATE TABLE IF NOT EXISTS combat_losses (
|
||||||
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
side TEXT PRIMARY KEY CHECK (side IN ('us', 'iran')),
|
||||||
bases_destroyed INTEGER NOT NULL,
|
bases_destroyed INTEGER NOT NULL,
|
||||||
@@ -67,24 +112,20 @@ db.exec(`
|
|||||||
armor INTEGER NOT NULL,
|
armor INTEGER NOT NULL,
|
||||||
vehicles INTEGER NOT NULL
|
vehicles INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS wall_street_trend (
|
CREATE TABLE IF NOT EXISTS wall_street_trend (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
time TEXT NOT NULL,
|
time TEXT NOT NULL,
|
||||||
value INTEGER NOT NULL
|
value INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS retaliation_current (
|
CREATE TABLE IF NOT EXISTS retaliation_current (
|
||||||
id INTEGER PRIMARY KEY CHECK (id = 1),
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
value INTEGER NOT NULL
|
value INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS retaliation_history (
|
CREATE TABLE IF NOT EXISTS retaliation_history (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
time TEXT NOT NULL,
|
time TEXT NOT NULL,
|
||||||
value INTEGER NOT NULL
|
value INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS situation_update (
|
CREATE TABLE IF NOT EXISTS situation_update (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
timestamp TEXT NOT NULL,
|
timestamp TEXT NOT NULL,
|
||||||
@@ -92,16 +133,196 @@ db.exec(`
|
|||||||
summary TEXT NOT NULL,
|
summary TEXT NOT NULL,
|
||||||
severity TEXT NOT NULL
|
severity TEXT NOT NULL
|
||||||
);
|
);
|
||||||
`)
|
CREATE TABLE IF NOT EXISTS gdelt_events (
|
||||||
|
event_id TEXT PRIMARY KEY,
|
||||||
|
event_time TEXT NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
lat REAL NOT NULL,
|
||||||
|
lng REAL NOT NULL,
|
||||||
|
impact_score INTEGER NOT NULL,
|
||||||
|
url TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS conflict_stats (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
total_events INTEGER NOT NULL DEFAULT 0,
|
||||||
|
high_impact_events INTEGER NOT NULL DEFAULT 0,
|
||||||
|
estimated_casualties INTEGER NOT NULL DEFAULT 0,
|
||||||
|
estimated_strike_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS news_content (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
content_hash TEXT NOT NULL UNIQUE,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL DEFAULT '',
|
||||||
|
source TEXT NOT NULL DEFAULT '',
|
||||||
|
published_at TEXT NOT NULL,
|
||||||
|
category TEXT NOT NULL DEFAULT 'other',
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium',
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
`)
|
||||||
|
try { exec('CREATE INDEX IF NOT EXISTS idx_news_content_hash ON news_content(content_hash)') } catch (_) {}
|
||||||
|
try { exec('CREATE INDEX IF NOT EXISTS idx_news_content_published ON news_content(published_at DESC)') } catch (_) {}
|
||||||
|
|
||||||
// 迁移:为已有 key_location 表添加 type、region、status、damage_level 列
|
try {
|
||||||
try {
|
const cols = prepare('PRAGMA table_info(key_location)').all()
|
||||||
const cols = db.prepare('PRAGMA table_info(key_location)').all()
|
const names = cols.map((c) => c.name)
|
||||||
const names = cols.map((c) => c.name)
|
if (!names.includes('type')) exec('ALTER TABLE key_location ADD COLUMN type TEXT')
|
||||||
if (!names.includes('type')) db.exec('ALTER TABLE key_location ADD COLUMN type TEXT')
|
if (!names.includes('region')) exec('ALTER TABLE key_location ADD COLUMN region TEXT')
|
||||||
if (!names.includes('region')) db.exec('ALTER TABLE key_location ADD COLUMN region TEXT')
|
if (!names.includes('status')) exec('ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT "operational"')
|
||||||
if (!names.includes('status')) db.exec('ALTER TABLE key_location ADD COLUMN status TEXT DEFAULT "operational"')
|
if (!names.includes('damage_level')) exec('ALTER TABLE key_location ADD COLUMN damage_level INTEGER')
|
||||||
if (!names.includes('damage_level')) db.exec('ALTER TABLE key_location ADD COLUMN damage_level INTEGER')
|
} catch (_) {}
|
||||||
} catch (_) {}
|
try {
|
||||||
|
const lossCols = prepare('PRAGMA table_info(combat_losses)').all()
|
||||||
|
const lossNames = lossCols.map((c) => c.name)
|
||||||
|
if (!lossNames.includes('civilian_killed')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_killed INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('civilian_wounded')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_wounded INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('updated_at')) exec('ALTER TABLE combat_losses ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))')
|
||||||
|
if (!lossNames.includes('drones')) exec('ALTER TABLE combat_losses ADD COLUMN drones INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('missiles')) exec('ALTER TABLE combat_losses ADD COLUMN missiles INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('helicopters')) exec('ALTER TABLE combat_losses ADD COLUMN helicopters INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('submarines')) exec('ALTER TABLE combat_losses ADD COLUMN submarines INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('tanks')) exec('ALTER TABLE combat_losses ADD COLUMN tanks INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('carriers')) {
|
||||||
|
exec('ALTER TABLE combat_losses ADD COLUMN carriers INTEGER NOT NULL DEFAULT 0')
|
||||||
|
exec('UPDATE combat_losses SET carriers = tanks')
|
||||||
|
}
|
||||||
|
if (!lossNames.includes('civilian_ships')) exec('ALTER TABLE combat_losses ADD COLUMN civilian_ships INTEGER NOT NULL DEFAULT 0')
|
||||||
|
if (!lossNames.includes('airport_port')) exec('ALTER TABLE combat_losses ADD COLUMN airport_port INTEGER NOT NULL DEFAULT 0')
|
||||||
|
} catch (_) {}
|
||||||
|
|
||||||
module.exports = db
|
const addUpdatedAt = (table) => {
|
||||||
|
try {
|
||||||
|
const cols = prepare(`PRAGMA table_info(${table})`).all()
|
||||||
|
if (!cols.some((c) => c.name === 'updated_at')) {
|
||||||
|
exec(`ALTER TABLE ${table} ADD COLUMN updated_at TEXT DEFAULT (datetime("now"))`)
|
||||||
|
}
|
||||||
|
} catch (_) {}
|
||||||
|
}
|
||||||
|
;['force_summary', 'power_index', 'force_asset', 'key_location', 'retaliation_current'].forEach(addUpdatedAt)
|
||||||
|
|
||||||
|
try {
|
||||||
|
exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS visits (
|
||||||
|
ip TEXT PRIMARY KEY,
|
||||||
|
last_seen TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_count (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
total INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
INSERT OR IGNORE INTO visitor_count (id, total) VALUES (1, 0);
|
||||||
|
`)
|
||||||
|
} catch (_) {}
|
||||||
|
try {
|
||||||
|
exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS feedback (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
content TEXT NOT NULL,
|
||||||
|
ip TEXT,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
`)
|
||||||
|
} catch (_) {}
|
||||||
|
try {
|
||||||
|
exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS share_count (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
total INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
INSERT OR IGNORE INTO share_count (id, total) VALUES (1, 0);
|
||||||
|
`)
|
||||||
|
} catch (_) {}
|
||||||
|
try {
|
||||||
|
exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS like_count (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
total INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
INSERT OR IGNORE INTO like_count (id, total) VALUES (1, 0);
|
||||||
|
`)
|
||||||
|
} catch (_) {}
|
||||||
|
try {
|
||||||
|
exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS display_stats (
|
||||||
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
||||||
|
viewers INTEGER NULL,
|
||||||
|
cumulative INTEGER NULL,
|
||||||
|
share_count INTEGER NULL,
|
||||||
|
like_count INTEGER NULL,
|
||||||
|
feedback_count INTEGER NULL
|
||||||
|
);
|
||||||
|
INSERT OR IGNORE INTO display_stats (id) VALUES (1);
|
||||||
|
`)
|
||||||
|
} catch (_) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function initDb() {
|
||||||
|
const initSqlJs = require('sql.js')
|
||||||
|
const SQL = await initSqlJs()
|
||||||
|
_sqlJs = SQL
|
||||||
|
let data = new Uint8Array(0)
|
||||||
|
if (fs.existsSync(dbPath)) {
|
||||||
|
data = new Uint8Array(fs.readFileSync(dbPath))
|
||||||
|
}
|
||||||
|
const nativeDb = new SQL.Database(data)
|
||||||
|
|
||||||
|
function persist() {
|
||||||
|
try {
|
||||||
|
const buf = nativeDb.export()
|
||||||
|
fs.writeFileSync(dbPath, Buffer.from(buf))
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[db] persist error:', e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nativeDb.run('PRAGMA journal_mode = WAL')
|
||||||
|
const wrapped = wrapDatabase(nativeDb, persist)
|
||||||
|
runMigrations(wrapped)
|
||||||
|
_db = wrapped
|
||||||
|
return _db
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从磁盘重新加载 DB(爬虫写入同一文件后调用,使 Node 内存中的库与文件一致)
|
||||||
|
*/
|
||||||
|
function reloadFromFile() {
|
||||||
|
if (!_sqlJs || !_db) throw new Error('DB not initialized. Call initDb() first.')
|
||||||
|
let data = new Uint8Array(0)
|
||||||
|
if (fs.existsSync(dbPath)) {
|
||||||
|
data = new Uint8Array(fs.readFileSync(dbPath))
|
||||||
|
}
|
||||||
|
const nativeDb = new _sqlJs.Database(data)
|
||||||
|
function persist() {
|
||||||
|
try {
|
||||||
|
const buf = nativeDb.export()
|
||||||
|
fs.writeFileSync(dbPath, Buffer.from(buf))
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[db] persist error:', e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nativeDb.run('PRAGMA journal_mode = WAL')
|
||||||
|
const wrapped = wrapDatabase(nativeDb, persist)
|
||||||
|
runMigrations(wrapped)
|
||||||
|
_db = wrapped
|
||||||
|
}
|
||||||
|
|
||||||
|
const proxy = {
|
||||||
|
prepare(sql) {
|
||||||
|
return getDb().prepare(sql)
|
||||||
|
},
|
||||||
|
exec(sql) {
|
||||||
|
return getDb().exec(sql)
|
||||||
|
},
|
||||||
|
pragma(str) {
|
||||||
|
getDb().pragma(str)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = proxy
|
||||||
|
module.exports.initDb = initDb
|
||||||
|
module.exports.getDb = getDb
|
||||||
|
module.exports.reloadFromFile = reloadFromFile
|
||||||
|
|||||||
@@ -1,34 +1,91 @@
|
|||||||
const http = require('http')
|
const http = require('http')
|
||||||
|
const path = require('path')
|
||||||
|
const fs = require('fs')
|
||||||
const express = require('express')
|
const express = require('express')
|
||||||
const cors = require('cors')
|
const cors = require('cors')
|
||||||
const { WebSocketServer } = require('ws')
|
const { WebSocketServer } = require('ws')
|
||||||
|
const db = require('./db')
|
||||||
const routes = require('./routes')
|
const routes = require('./routes')
|
||||||
const { getSituation } = require('./situationData')
|
const { getSituation } = require('./situationData')
|
||||||
|
|
||||||
const app = express()
|
const app = express()
|
||||||
const PORT = process.env.API_PORT || 3001
|
const PORT = process.env.API_PORT || 3001
|
||||||
|
|
||||||
|
// 爬虫通知用的共享密钥:API_CRAWLER_TOKEN(仅在服务端与爬虫进程间传递)
|
||||||
|
const CRAWLER_TOKEN = process.env.API_CRAWLER_TOKEN || ''
|
||||||
|
|
||||||
|
app.set('trust proxy', 1)
|
||||||
app.use(cors())
|
app.use(cors())
|
||||||
app.use(express.json())
|
app.use(express.json())
|
||||||
|
|
||||||
app.use('/api', routes)
|
app.use('/api', routes)
|
||||||
app.get('/api/health', (_, res) => res.json({ ok: true }))
|
app.get('/api/health', (_, res) => res.json({ ok: true }))
|
||||||
|
app.post('/api/crawler/notify', (req, res) => {
|
||||||
|
// 若配置了 API_CRAWLER_TOKEN,则要求爬虫携带 X-Crawler-Token 头
|
||||||
|
if (CRAWLER_TOKEN) {
|
||||||
|
const token = req.headers['x-crawler-token']
|
||||||
|
if (typeof token !== 'string' || token !== CRAWLER_TOKEN) {
|
||||||
|
return res.status(401).json({ error: 'unauthorized' })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
notifyCrawlerUpdate()
|
||||||
|
res.json({ ok: true })
|
||||||
|
})
|
||||||
|
|
||||||
|
// 生产环境:提供前端静态文件(含修订页 /edit,依赖 SPA fallback)
|
||||||
|
const distPath = path.join(__dirname, '..', 'dist')
|
||||||
|
if (fs.existsSync(distPath)) {
|
||||||
|
app.use(express.static(distPath))
|
||||||
|
// 非 API/WS 的请求一律返回 index.html,由前端路由处理 /、/edit、/db 等
|
||||||
|
app.get('*', (req, res, next) => {
|
||||||
|
if (!req.path.startsWith('/api') && req.path !== '/ws') {
|
||||||
|
res.sendFile(path.join(distPath, 'index.html'))
|
||||||
|
} else next()
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
console.warn('[server] dist 目录不存在,前端页面(含 /edit 修订页)不可用。请在项目根目录执行 npm run build 后再启动。')
|
||||||
|
}
|
||||||
|
|
||||||
const server = http.createServer(app)
|
const server = http.createServer(app)
|
||||||
|
|
||||||
|
const { getStats } = require('./stats')
|
||||||
|
|
||||||
const wss = new WebSocketServer({ server, path: '/ws' })
|
const wss = new WebSocketServer({ server, path: '/ws' })
|
||||||
wss.on('connection', (ws) => {
|
wss.on('connection', (ws) => {
|
||||||
ws.send(JSON.stringify({ type: 'situation', data: getSituation() }))
|
ws.send(JSON.stringify({ type: 'situation', data: getSituation(), stats: getStats() }))
|
||||||
})
|
})
|
||||||
|
|
||||||
function broadcastSituation() {
|
function broadcastSituation() {
|
||||||
try {
|
try {
|
||||||
const data = JSON.stringify({ type: 'situation', data: getSituation() })
|
const data = JSON.stringify({ type: 'situation', data: getSituation(), stats: getStats() })
|
||||||
wss.clients.forEach((c) => {
|
wss.clients.forEach((c) => {
|
||||||
if (c.readyState === 1) c.send(data)
|
if (c.readyState === 1) c.send(data)
|
||||||
})
|
})
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
}
|
}
|
||||||
setInterval(broadcastSituation, 5000)
|
app.set('broadcastSituation', broadcastSituation)
|
||||||
|
setInterval(broadcastSituation, 3000)
|
||||||
|
|
||||||
server.listen(PORT, () => {
|
// 供爬虫调用:先从磁盘重载 DB(纳入爬虫写入),再更新 updated_at 并立即广播
|
||||||
console.log(`API + WebSocket running at http://localhost:${PORT}`)
|
function notifyCrawlerUpdate() {
|
||||||
|
try {
|
||||||
|
const db = require('./db')
|
||||||
|
db.reloadFromFile()
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastSituation()
|
||||||
|
const n = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c
|
||||||
|
console.log('[crawler/notify] DB 已重载并广播,situation_update 条数:', n)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[crawler/notify]', e?.message || e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
db.initDb().then(() => {
|
||||||
|
server.listen(PORT, () => {
|
||||||
|
console.log(`API + WebSocket running at http://localhost:${PORT}`)
|
||||||
|
console.log(`Swagger docs at http://localhost:${PORT}/api-docs`)
|
||||||
|
})
|
||||||
|
}).catch((err) => {
|
||||||
|
console.error('DB init failed:', err)
|
||||||
|
process.exit(1)
|
||||||
})
|
})
|
||||||
|
|||||||
143
server/openapi.js
Normal file
143
server/openapi.js
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
/**
|
||||||
|
* OpenAPI 3.0 规范,供 Swagger UI 展示
|
||||||
|
*/
|
||||||
|
const PORT = process.env.API_PORT || 3001
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
openapi: '3.0.3',
|
||||||
|
info: {
|
||||||
|
title: 'US-Iran Military Dashboard API',
|
||||||
|
version: '1.0.0',
|
||||||
|
description: '美伊军事态势面板后端接口',
|
||||||
|
},
|
||||||
|
servers: [{ url: `http://localhost:${PORT}`, description: '本地' }],
|
||||||
|
paths: {
|
||||||
|
'/api/situation': {
|
||||||
|
get: {
|
||||||
|
summary: '获取态势数据',
|
||||||
|
description: '战损、基地、新闻、冲突事件等完整态势',
|
||||||
|
tags: ['态势'],
|
||||||
|
responses: {
|
||||||
|
200: {
|
||||||
|
description: '态势 JSON',
|
||||||
|
content: { 'application/json': { schema: { type: 'object' } } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/news': {
|
||||||
|
get: {
|
||||||
|
summary: '资讯内容',
|
||||||
|
description: '从 news_content 表读取,支持 ?limit=50 分页',
|
||||||
|
tags: ['资讯'],
|
||||||
|
parameters: [{ in: 'query', name: 'limit', schema: { type: 'integer', default: 50 } }],
|
||||||
|
responses: { 200: { description: 'items 数组' } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/db/dashboard': {
|
||||||
|
get: {
|
||||||
|
summary: '数据库面板',
|
||||||
|
description: '各表原始数据,供 /db 调试页',
|
||||||
|
tags: ['调试'],
|
||||||
|
responses: {
|
||||||
|
200: {
|
||||||
|
description: '各表数据',
|
||||||
|
content: { 'application/json': { schema: { type: 'object' } } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/visit': {
|
||||||
|
post: {
|
||||||
|
summary: '来访统计',
|
||||||
|
description: '记录 IP,返回当前在看人数和看过人数',
|
||||||
|
tags: ['统计'],
|
||||||
|
responses: {
|
||||||
|
200: {
|
||||||
|
description: 'OK',
|
||||||
|
content: {
|
||||||
|
'application/json': {
|
||||||
|
schema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
viewers: { type: 'number' },
|
||||||
|
cumulative: { type: 'number' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/feedback': {
|
||||||
|
post: {
|
||||||
|
summary: '提交反馈',
|
||||||
|
description: '留言 1–2000 字',
|
||||||
|
tags: ['反馈'],
|
||||||
|
requestBody: {
|
||||||
|
required: true,
|
||||||
|
content: {
|
||||||
|
'application/json': {
|
||||||
|
schema: {
|
||||||
|
type: 'object',
|
||||||
|
required: ['content'],
|
||||||
|
properties: { content: { type: 'string', maxLength: 2000 } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
responses: {
|
||||||
|
200: { description: 'ok: true' },
|
||||||
|
400: { description: '内容超长或为空' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/health': {
|
||||||
|
get: {
|
||||||
|
summary: '健康检查',
|
||||||
|
tags: ['系统'],
|
||||||
|
responses: { 200: { description: 'ok: true' } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/crawler/notify': {
|
||||||
|
post: {
|
||||||
|
summary: '爬虫通知',
|
||||||
|
description: '爬虫更新后调用,触发前端推送',
|
||||||
|
tags: ['系统'],
|
||||||
|
responses: { 200: { description: 'ok' } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/stats': {
|
||||||
|
get: {
|
||||||
|
summary: '统计快照',
|
||||||
|
description: 'viewers / cumulative,不写入',
|
||||||
|
tags: ['统计'],
|
||||||
|
responses: {
|
||||||
|
200: {
|
||||||
|
content: {
|
||||||
|
'application/json': {
|
||||||
|
schema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
viewers: { type: 'number' },
|
||||||
|
cumulative: { type: 'number' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'/api/events': {
|
||||||
|
get: {
|
||||||
|
summary: '冲突事件',
|
||||||
|
description: '冲突事件列表及统计',
|
||||||
|
tags: ['态势'],
|
||||||
|
responses: { 200: { description: 'events + conflict_stats' } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
tags: [{ name: '态势' }, { name: '资讯' }, { name: '统计' }, { name: '反馈' }, { name: '调试' }, { name: '系统' }],
|
||||||
|
}
|
||||||
399
server/routes.js
399
server/routes.js
@@ -1,8 +1,94 @@
|
|||||||
const express = require('express')
|
const express = require('express')
|
||||||
const { getSituation } = require('./situationData')
|
const { getSituation } = require('./situationData')
|
||||||
|
const { getStats } = require('./stats')
|
||||||
|
const db = require('./db')
|
||||||
|
|
||||||
const router = express.Router()
|
const router = express.Router()
|
||||||
|
|
||||||
|
// 简单鉴权:通过环境变量配置的 API_ADMIN_KEY 保护敏感接口(不返回真实密钥)
|
||||||
|
const ADMIN_API_KEY = process.env.API_ADMIN_KEY || ''
|
||||||
|
|
||||||
|
function requireAdmin(req, res, next) {
|
||||||
|
if (!ADMIN_API_KEY) {
|
||||||
|
return res.status(500).json({ error: 'admin key not configured' })
|
||||||
|
}
|
||||||
|
const token = req.headers['x-api-key']
|
||||||
|
if (typeof token !== 'string' || token !== ADMIN_API_KEY) {
|
||||||
|
return res.status(401).json({ error: 'unauthorized' })
|
||||||
|
}
|
||||||
|
return next()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 数据库 Dashboard:返回各表原始数据(需 admin 鉴权)
|
||||||
|
router.get('/db/dashboard', requireAdmin, (req, res) => {
|
||||||
|
try {
|
||||||
|
const tables = [
|
||||||
|
'feedback',
|
||||||
|
'situation',
|
||||||
|
'force_summary',
|
||||||
|
'power_index',
|
||||||
|
'force_asset',
|
||||||
|
'key_location',
|
||||||
|
'combat_losses',
|
||||||
|
'wall_street_trend',
|
||||||
|
'retaliation_current',
|
||||||
|
'retaliation_history',
|
||||||
|
'situation_update',
|
||||||
|
'news_content',
|
||||||
|
'gdelt_events',
|
||||||
|
'conflict_stats',
|
||||||
|
]
|
||||||
|
const data = {}
|
||||||
|
const timeSort = {
|
||||||
|
feedback: 'created_at DESC',
|
||||||
|
situation: 'updated_at DESC',
|
||||||
|
situation_update: 'timestamp DESC',
|
||||||
|
news_content: 'published_at DESC',
|
||||||
|
gdelt_events: 'event_time DESC',
|
||||||
|
wall_street_trend: 'time DESC',
|
||||||
|
retaliation_history: 'time DESC',
|
||||||
|
conflict_stats: 'updated_at DESC',
|
||||||
|
}
|
||||||
|
for (const name of tables) {
|
||||||
|
try {
|
||||||
|
const order = timeSort[name]
|
||||||
|
let rows
|
||||||
|
try {
|
||||||
|
rows = order
|
||||||
|
? db.prepare(`SELECT * FROM ${name} ORDER BY ${order}`).all()
|
||||||
|
: db.prepare(`SELECT * FROM ${name}`).all()
|
||||||
|
} catch (qerr) {
|
||||||
|
rows = db.prepare(`SELECT * FROM ${name}`).all()
|
||||||
|
}
|
||||||
|
data[name] = rows
|
||||||
|
} catch (e) {
|
||||||
|
data[name] = { error: e.message }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res.json(data)
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// 资讯内容(独立表,供后续消费,可选 admin key;若配置了 ADMIN_API_KEY 则也要求鉴权)
|
||||||
|
router.get('/news', (req, res) => {
|
||||||
|
if (ADMIN_API_KEY) {
|
||||||
|
const token = req.headers['x-api-key']
|
||||||
|
if (typeof token !== 'string' || token !== ADMIN_API_KEY) {
|
||||||
|
return res.status(401).json({ error: 'unauthorized' })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const limit = Math.min(parseInt(req.query.limit, 10) || 50, 200)
|
||||||
|
const rows = db.prepare('SELECT id, title, summary, url, source, published_at, category, severity, created_at FROM news_content ORDER BY published_at DESC LIMIT ?').all(limit)
|
||||||
|
res.json({ items: rows })
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
router.get('/situation', (req, res) => {
|
router.get('/situation', (req, res) => {
|
||||||
try {
|
try {
|
||||||
res.json(getSituation())
|
res.json(getSituation())
|
||||||
@@ -12,4 +98,317 @@ router.get('/situation', (req, res) => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 来访统计:记录 IP(或开发环境下每标签 viewer-id),返回在看/看过
|
||||||
|
function getClientIp(req) {
|
||||||
|
const forwarded = req.headers['x-forwarded-for']
|
||||||
|
if (forwarded) return forwarded.split(',')[0].trim()
|
||||||
|
return req.ip || req.socket?.remoteAddress || 'unknown'
|
||||||
|
}
|
||||||
|
|
||||||
|
// 优先用前端传来的 viewer-id 去重(每设备一个),否则用 IP,这样多设备同 WiFi 也能正确统计「在看」
|
||||||
|
function getVisitKey(req) {
|
||||||
|
const vid = req.headers['x-viewer-id']
|
||||||
|
const ip = getClientIp(req)
|
||||||
|
if (typeof vid === 'string' && vid.trim().length > 0) {
|
||||||
|
return 'vid:' + vid.trim().slice(0, 64)
|
||||||
|
}
|
||||||
|
return ip
|
||||||
|
}
|
||||||
|
|
||||||
|
router.post('/visit', (req, res) => {
|
||||||
|
try {
|
||||||
|
const visitKey = getVisitKey(req)
|
||||||
|
db.prepare(
|
||||||
|
"INSERT OR REPLACE INTO visits (ip, last_seen) VALUES (?, datetime('now'))"
|
||||||
|
).run(visitKey)
|
||||||
|
db.prepare(
|
||||||
|
'INSERT INTO visitor_count (id, total) VALUES (1, 1) ON CONFLICT(id) DO UPDATE SET total = total + 1'
|
||||||
|
).run()
|
||||||
|
const broadcast = req.app?.get?.('broadcastSituation')
|
||||||
|
if (typeof broadcast === 'function') broadcast()
|
||||||
|
res.json(getStats())
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ viewers: 0, cumulative: 0, feedbackCount: 0, shareCount: 0, likeCount: 0 })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
router.post('/feedback', (req, res) => {
|
||||||
|
try {
|
||||||
|
const content = (req.body?.content ?? '').toString().trim()
|
||||||
|
if (!content || content.length > 2000) {
|
||||||
|
return res.status(400).json({ ok: false, error: '留言内容 1–2000 字' })
|
||||||
|
}
|
||||||
|
const ip = getClientIp(req)
|
||||||
|
db.prepare(
|
||||||
|
'INSERT INTO feedback (content, ip) VALUES (?, ?)'
|
||||||
|
).run(content.slice(0, 2000), ip)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ ok: false, error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
router.post('/share', (req, res) => {
|
||||||
|
try {
|
||||||
|
db.prepare(
|
||||||
|
'INSERT INTO share_count (id, total) VALUES (1, 1) ON CONFLICT(id) DO UPDATE SET total = total + 1'
|
||||||
|
).run()
|
||||||
|
const shareCount = db.prepare('SELECT total FROM share_count WHERE id = 1').get()?.total ?? 0
|
||||||
|
res.json({ ok: true, shareCount })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ ok: false, shareCount: 0 })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
router.post('/like', (req, res) => {
|
||||||
|
try {
|
||||||
|
db.prepare(
|
||||||
|
'INSERT INTO like_count (id, total) VALUES (1, 1) ON CONFLICT(id) DO UPDATE SET total = total + 1'
|
||||||
|
).run()
|
||||||
|
res.json(getStats())
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ viewers: 0, cumulative: 0, feedbackCount: 0, shareCount: 0, likeCount: 0 })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
router.get('/stats', (req, res) => {
|
||||||
|
try {
|
||||||
|
res.json(getStats())
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ viewers: 0, cumulative: 0, feedbackCount: 0, shareCount: 0, likeCount: 0 })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
router.get('/events', (req, res) => {
|
||||||
|
try {
|
||||||
|
const s = getSituation()
|
||||||
|
res.json({
|
||||||
|
updated_at: s.lastUpdated,
|
||||||
|
count: (s.conflictEvents || []).length,
|
||||||
|
events: s.conflictEvents || [],
|
||||||
|
conflict_stats: s.conflictStats || {},
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---------- 手动修正看板数据(编辑页用) ----------
|
||||||
|
function broadcastAfterEdit(req) {
|
||||||
|
try {
|
||||||
|
const broadcast = req.app?.get?.('broadcastSituation')
|
||||||
|
if (typeof broadcast === 'function') broadcast()
|
||||||
|
} catch (_) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** GET 原始可编辑数据:战损、据点、事件脉络、军力概要 */
|
||||||
|
router.get('/edit/raw', (req, res) => {
|
||||||
|
try {
|
||||||
|
const lossesUs = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('us')
|
||||||
|
const lossesIr = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('iran')
|
||||||
|
const locUs = db.prepare('SELECT id, side, name, lat, lng, type, region, status, damage_level FROM key_location WHERE side = ?').all('us')
|
||||||
|
const locIr = db.prepare('SELECT id, side, name, lat, lng, type, region, status, damage_level FROM key_location WHERE side = ?').all('iran')
|
||||||
|
const updates = db.prepare('SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT 80').all()
|
||||||
|
const summaryUs = db.prepare('SELECT * FROM force_summary WHERE side = ?').get('us')
|
||||||
|
const summaryIr = db.prepare('SELECT * FROM force_summary WHERE side = ?').get('iran')
|
||||||
|
let displayStats = null
|
||||||
|
try {
|
||||||
|
displayStats = db.prepare('SELECT viewers, cumulative, share_count, like_count, feedback_count FROM display_stats WHERE id = 1').get()
|
||||||
|
} catch (_) {}
|
||||||
|
const realCumulative = db.prepare('SELECT total FROM visitor_count WHERE id = 1').get()?.total ?? 0
|
||||||
|
const realShare = db.prepare('SELECT total FROM share_count WHERE id = 1').get()?.total ?? 0
|
||||||
|
const liveViewers = db.prepare(
|
||||||
|
"SELECT COUNT(*) as n FROM visits WHERE last_seen > datetime('now', '-2 minutes')"
|
||||||
|
).get()?.n ?? 0
|
||||||
|
const realFeedback = db.prepare('SELECT COUNT(*) as n FROM feedback').get()?.n ?? 0
|
||||||
|
let realLikeCount = 0
|
||||||
|
try {
|
||||||
|
realLikeCount = db.prepare('SELECT total FROM like_count WHERE id = 1').get()?.total ?? 0
|
||||||
|
} catch (_) {}
|
||||||
|
res.json({
|
||||||
|
combatLosses: { us: lossesUs || null, iran: lossesIr || null },
|
||||||
|
keyLocations: { us: locUs || [], iran: locIr || [] },
|
||||||
|
situationUpdates: updates || [],
|
||||||
|
forceSummary: { us: summaryUs || null, iran: summaryIr || null },
|
||||||
|
displayStats: {
|
||||||
|
viewers: displayStats?.viewers ?? liveViewers,
|
||||||
|
cumulative: displayStats?.cumulative ?? realCumulative,
|
||||||
|
shareCount: displayStats?.share_count ?? realShare,
|
||||||
|
likeCount: displayStats?.like_count ?? realLikeCount,
|
||||||
|
feedbackCount: displayStats?.feedback_count ?? realFeedback,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** PUT 更新战损(美/伊) */
|
||||||
|
router.put('/edit/combat-losses', (req, res) => {
|
||||||
|
try {
|
||||||
|
const side = req.body?.side
|
||||||
|
if (side !== 'us' && side !== 'iran') {
|
||||||
|
return res.status(400).json({ error: 'side must be us or iran' })
|
||||||
|
}
|
||||||
|
const row = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get(side)
|
||||||
|
if (!row) return res.status(404).json({ error: 'combat_losses row not found' })
|
||||||
|
const cols = ['bases_destroyed', 'bases_damaged', 'personnel_killed', 'personnel_wounded',
|
||||||
|
'civilian_killed', 'civilian_wounded', 'aircraft', 'warships', 'armor', 'vehicles',
|
||||||
|
'drones', 'missiles', 'helicopters', 'submarines', 'tanks', 'carriers', 'civilian_ships', 'airport_port']
|
||||||
|
const updates = []
|
||||||
|
const values = []
|
||||||
|
for (const c of cols) {
|
||||||
|
if (req.body[c] !== undefined) {
|
||||||
|
updates.push(`${c} = ?`)
|
||||||
|
values.push(Number(req.body[c]) || 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (updates.length === 0) return res.status(400).json({ error: 'no fields to update' })
|
||||||
|
values.push(side)
|
||||||
|
db.prepare(`UPDATE combat_losses SET ${updates.join(', ')} WHERE side = ?`).run(...values)
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** PATCH 更新单个据点 */
|
||||||
|
router.patch('/edit/key-location/:id', (req, res) => {
|
||||||
|
try {
|
||||||
|
const id = parseInt(req.params.id, 10)
|
||||||
|
if (!Number.isFinite(id)) return res.status(400).json({ error: 'invalid id' })
|
||||||
|
const row = db.prepare('SELECT id FROM key_location WHERE id = ?').get(id)
|
||||||
|
if (!row) return res.status(404).json({ error: 'key_location not found' })
|
||||||
|
const allowed = ['name', 'lat', 'lng', 'type', 'region', 'status', 'damage_level']
|
||||||
|
const updates = []
|
||||||
|
const values = []
|
||||||
|
for (const k of allowed) {
|
||||||
|
if (req.body[k] !== undefined) {
|
||||||
|
if (k === 'status' && !['operational', 'damaged', 'attacked'].includes(req.body[k])) continue
|
||||||
|
updates.push(`${k} = ?`)
|
||||||
|
values.push(k === 'lat' || k === 'lng' ? Number(req.body[k]) : req.body[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (updates.length === 0) return res.status(400).json({ error: 'no fields to update' })
|
||||||
|
values.push(id)
|
||||||
|
db.prepare(`UPDATE key_location SET ${updates.join(', ')} WHERE id = ?`).run(...values)
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** POST 新增事件脉络 */
|
||||||
|
router.post('/edit/situation-update', (req, res) => {
|
||||||
|
try {
|
||||||
|
const id = (req.body?.id || '').toString().trim() || `man_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`
|
||||||
|
const timestamp = (req.body?.timestamp || new Date().toISOString()).toString().trim()
|
||||||
|
const category = (req.body?.category || 'other').toString().toLowerCase()
|
||||||
|
const summary = (req.body?.summary || '').toString().trim().slice(0, 500)
|
||||||
|
const severity = (req.body?.severity || 'medium').toString().toLowerCase()
|
||||||
|
if (!summary) return res.status(400).json({ error: 'summary required' })
|
||||||
|
const validCat = ['deployment', 'alert', 'intel', 'diplomatic', 'other'].includes(category) ? category : 'other'
|
||||||
|
const validSev = ['low', 'medium', 'high', 'critical'].includes(severity) ? severity : 'medium'
|
||||||
|
db.prepare('INSERT OR REPLACE INTO situation_update (id, timestamp, category, summary, severity) VALUES (?, ?, ?, ?, ?)').run(id, timestamp, validCat, summary, validSev)
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true, id })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** DELETE 删除一条事件脉络 */
|
||||||
|
router.delete('/edit/situation-update/:id', (req, res) => {
|
||||||
|
try {
|
||||||
|
const id = (req.params.id || '').toString().trim()
|
||||||
|
if (!id) return res.status(400).json({ error: 'id required' })
|
||||||
|
const r = db.prepare('DELETE FROM situation_update WHERE id = ?').run(id)
|
||||||
|
if (r.changes === 0) return res.status(404).json({ error: 'not found' })
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** PUT 更新军力概要(美/伊) */
|
||||||
|
router.put('/edit/force-summary', (req, res) => {
|
||||||
|
try {
|
||||||
|
const side = req.body?.side
|
||||||
|
if (side !== 'us' && side !== 'iran') {
|
||||||
|
return res.status(400).json({ error: 'side must be us or iran' })
|
||||||
|
}
|
||||||
|
const cols = ['total_assets', 'personnel', 'naval_ships', 'aircraft', 'ground_units', 'uav', 'missile_consumed', 'missile_stock']
|
||||||
|
const updates = []
|
||||||
|
const values = []
|
||||||
|
for (const c of cols) {
|
||||||
|
if (req.body[c] !== undefined) {
|
||||||
|
updates.push(`${c} = ?`)
|
||||||
|
values.push(Number(req.body[c]) || 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (updates.length === 0) return res.status(400).json({ error: 'no fields to update' })
|
||||||
|
values.push(side)
|
||||||
|
db.prepare(`UPDATE force_summary SET ${updates.join(', ')} WHERE side = ?`).run(...values)
|
||||||
|
db.prepare("INSERT OR REPLACE INTO situation (id, data, updated_at) VALUES (1, '{}', ?)").run(new Date().toISOString())
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(500).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
/** PUT 更新展示统计(看过、在看、分享、点赞、留言)。传 null 表示清除覆盖、改用实时统计 */
|
||||||
|
router.put('/edit/display-stats', (req, res) => {
|
||||||
|
try {
|
||||||
|
db.prepare('INSERT OR IGNORE INTO display_stats (id) VALUES (1)').run()
|
||||||
|
const updates = []
|
||||||
|
const values = []
|
||||||
|
const setField = (key, bodyKey) => {
|
||||||
|
const v = req.body?.[bodyKey ?? key]
|
||||||
|
if (v === undefined) return
|
||||||
|
if (v === null) {
|
||||||
|
updates.push(`${key} = ?`)
|
||||||
|
values.push(null)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const n = Math.max(0, parseInt(v, 10))
|
||||||
|
if (!Number.isFinite(n)) throw new Error(`${bodyKey ?? key} must be number`)
|
||||||
|
updates.push(`${key} = ?`)
|
||||||
|
values.push(n)
|
||||||
|
}
|
||||||
|
setField('viewers')
|
||||||
|
setField('cumulative')
|
||||||
|
setField('share_count', 'shareCount')
|
||||||
|
setField('like_count', 'likeCount')
|
||||||
|
setField('feedback_count', 'feedbackCount')
|
||||||
|
if (updates.length === 0) return res.status(400).json({ error: 'no fields to update' })
|
||||||
|
values.push(1)
|
||||||
|
db.prepare(`UPDATE display_stats SET ${updates.join(', ')} WHERE id = ?`).run(...values)
|
||||||
|
broadcastAfterEdit(req)
|
||||||
|
res.json({ ok: true })
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err)
|
||||||
|
res.status(400).json({ error: err.message })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
module.exports = router
|
module.exports = router
|
||||||
|
|||||||
@@ -134,17 +134,32 @@ function seed() {
|
|||||||
insertLoc.run('us', loc.name, loc.lat, loc.lng, loc.type, loc.region, loc.status, loc.damage_level)
|
insertLoc.run('us', loc.name, loc.lat, loc.lng, loc.type, loc.region, loc.status, loc.damage_level)
|
||||||
}
|
}
|
||||||
const iranLocs = [
|
const iranLocs = [
|
||||||
['iran', '阿巴斯港', 27.1832, 56.2666, 'Port', '伊朗', null, null],
|
['iran', '阿巴斯港海军司令部', 27.18, 56.27, 'Port', '伊朗', 'attacked', 3],
|
||||||
['iran', '德黑兰', 35.6892, 51.389, 'Capital', '伊朗', null, null],
|
['iran', '德黑兰', 35.6892, 51.389, 'Capital', '伊朗', 'attacked', 3],
|
||||||
['iran', '布什尔', 28.9681, 50.838, 'Base', '伊朗', null, null],
|
['iran', '布什尔核电站', 28.9681, 50.838, 'Nuclear', '伊朗', 'attacked', 2],
|
||||||
|
['iran', '伊斯法罕核设施', 32.654, 51.667, 'Nuclear', '伊朗', 'attacked', 2],
|
||||||
|
['iran', '纳坦兹铀浓缩', 33.666, 51.916, 'Nuclear', '伊朗', 'attacked', 2],
|
||||||
|
['iran', '米纳布岸防', 27.13, 57.08, 'Base', '伊朗', 'damaged', 2],
|
||||||
|
['iran', '卡拉季无人机厂', 35.808, 51.002, 'Base', '伊朗', 'attacked', 2],
|
||||||
|
['iran', '克尔曼沙赫导弹阵地', 34.314, 47.076, 'Missile', '伊朗', 'attacked', 2],
|
||||||
|
['iran', '大不里士空军基地', 38.08, 46.29, 'Base', '伊朗', 'damaged', 1],
|
||||||
|
['iran', '霍尔木兹岸防阵地', 27.0, 56.5, 'Base', '伊朗', 'operational', null],
|
||||||
]
|
]
|
||||||
iranLocs.forEach((r) => insertLoc.run(...r))
|
iranLocs.forEach((r) => insertLoc.run(...r))
|
||||||
|
|
||||||
db.exec(`
|
try {
|
||||||
INSERT OR REPLACE INTO combat_losses (side, bases_destroyed, bases_damaged, personnel_killed, personnel_wounded, aircraft, warships, armor, vehicles) VALUES
|
db.exec(`
|
||||||
('us', 0, 27, 127, 384, 2, 0, 0, 8),
|
INSERT OR REPLACE INTO combat_losses (side, bases_destroyed, bases_damaged, personnel_killed, personnel_wounded, civilian_killed, civilian_wounded, aircraft, warships, armor, vehicles, drones, missiles, helicopters, submarines, tanks, carriers, civilian_ships, airport_port) VALUES
|
||||||
('iran', 3, 8, 2847, 5620, 24, 12, 18, 42);
|
('us', 0, 27, 127, 384, 18, 52, 2, 0, 0, 8, 4, 12, 1, 0, 0, 0, 0, 0),
|
||||||
`)
|
('iran', 3, 8, 2847, 5620, 412, 1203, 24, 12, 18, 42, 28, 156, 8, 2, 0, 0, 0, 0);
|
||||||
|
`)
|
||||||
|
} catch (_) {
|
||||||
|
db.exec(`
|
||||||
|
INSERT OR REPLACE INTO combat_losses (side, bases_destroyed, bases_damaged, personnel_killed, personnel_wounded, aircraft, warships, armor, vehicles) VALUES
|
||||||
|
('us', 0, 27, 127, 384, 2, 0, 0, 8),
|
||||||
|
('iran', 3, 8, 2847, 5620, 24, 12, 18, 42);
|
||||||
|
`)
|
||||||
|
}
|
||||||
|
|
||||||
db.exec('DELETE FROM wall_street_trend')
|
db.exec('DELETE FROM wall_street_trend')
|
||||||
const trendRows = [['2025-03-01T00:00:00', 82], ['2025-03-01T03:00:00', 85], ['2025-03-01T06:00:00', 88], ['2025-03-01T09:00:00', 90], ['2025-03-01T12:00:00', 92], ['2025-03-01T15:00:00', 94], ['2025-03-01T18:00:00', 95], ['2025-03-01T21:00:00', 96], ['2025-03-01T23:00:00', 98]]
|
const trendRows = [['2025-03-01T00:00:00', 82], ['2025-03-01T03:00:00', 85], ['2025-03-01T06:00:00', 88], ['2025-03-01T09:00:00', 90], ['2025-03-01T12:00:00', 92], ['2025-03-01T15:00:00', 94], ['2025-03-01T18:00:00', 95], ['2025-03-01T21:00:00', 96], ['2025-03-01T23:00:00', 98]]
|
||||||
@@ -171,4 +186,7 @@ function seed() {
|
|||||||
console.log('Seed completed.')
|
console.log('Seed completed.')
|
||||||
}
|
}
|
||||||
|
|
||||||
seed()
|
require('./db').initDb().then(() => seed()).catch((err) => {
|
||||||
|
console.error('Seed failed:', err)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
|
|||||||
@@ -15,20 +15,36 @@ function toLosses(row) {
|
|||||||
return {
|
return {
|
||||||
bases: { destroyed: row.bases_destroyed, damaged: row.bases_damaged },
|
bases: { destroyed: row.bases_destroyed, damaged: row.bases_damaged },
|
||||||
personnelCasualties: { killed: row.personnel_killed, wounded: row.personnel_wounded },
|
personnelCasualties: { killed: row.personnel_killed, wounded: row.personnel_wounded },
|
||||||
|
civilianCasualties: { killed: row.civilian_killed ?? 0, wounded: row.civilian_wounded ?? 0 },
|
||||||
aircraft: row.aircraft,
|
aircraft: row.aircraft,
|
||||||
warships: row.warships,
|
warships: row.warships,
|
||||||
armor: row.armor,
|
armor: row.armor,
|
||||||
vehicles: row.vehicles,
|
vehicles: row.vehicles,
|
||||||
|
drones: row.drones ?? 0,
|
||||||
|
missiles: row.missiles ?? 0,
|
||||||
|
helicopters: row.helicopters ?? 0,
|
||||||
|
submarines: row.submarines ?? 0,
|
||||||
|
carriers: row.carriers ?? row.tanks ?? 0,
|
||||||
|
civilianShips: row.civilian_ships ?? 0,
|
||||||
|
airportPort: row.airport_port ?? 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaultLosses = {
|
const defaultLosses = {
|
||||||
bases: { destroyed: 0, damaged: 0 },
|
bases: { destroyed: 0, damaged: 0 },
|
||||||
personnelCasualties: { killed: 0, wounded: 0 },
|
personnelCasualties: { killed: 0, wounded: 0 },
|
||||||
|
civilianCasualties: { killed: 0, wounded: 0 },
|
||||||
aircraft: 0,
|
aircraft: 0,
|
||||||
warships: 0,
|
warships: 0,
|
||||||
armor: 0,
|
armor: 0,
|
||||||
vehicles: 0,
|
vehicles: 0,
|
||||||
|
drones: 0,
|
||||||
|
missiles: 0,
|
||||||
|
helicopters: 0,
|
||||||
|
submarines: 0,
|
||||||
|
carriers: 0,
|
||||||
|
civilianShips: 0,
|
||||||
|
airportPort: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSituation() {
|
function getSituation() {
|
||||||
@@ -39,15 +55,41 @@ function getSituation() {
|
|||||||
const assetsUs = db.prepare('SELECT * FROM force_asset WHERE side = ? ORDER BY id').all('us')
|
const assetsUs = db.prepare('SELECT * FROM force_asset WHERE side = ? ORDER BY id').all('us')
|
||||||
const assetsIr = db.prepare('SELECT * FROM force_asset WHERE side = ? ORDER BY id').all('iran')
|
const assetsIr = db.prepare('SELECT * FROM force_asset WHERE side = ? ORDER BY id').all('iran')
|
||||||
const locUs = db.prepare('SELECT id, name, lat, lng, type, region, status, damage_level FROM key_location WHERE side = ?').all('us')
|
const locUs = db.prepare('SELECT id, name, lat, lng, type, region, status, damage_level FROM key_location WHERE side = ?').all('us')
|
||||||
const locIr = db.prepare('SELECT id, name, lat, lng, type, region FROM key_location WHERE side = ?').all('iran')
|
const locIr = db.prepare('SELECT id, name, lat, lng, type, region, status, damage_level FROM key_location WHERE side = ?').all('iran')
|
||||||
const lossesUs = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('us')
|
const lossesUs = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('us')
|
||||||
const lossesIr = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('iran')
|
const lossesIr = db.prepare('SELECT * FROM combat_losses WHERE side = ?').get('iran')
|
||||||
const trend = db.prepare('SELECT time, value FROM wall_street_trend ORDER BY time').all()
|
const trend = db.prepare('SELECT time, value FROM wall_street_trend ORDER BY time').all()
|
||||||
const retaliationCur = db.prepare('SELECT value FROM retaliation_current WHERE id = 1').get()
|
const retaliationCur = db.prepare('SELECT value FROM retaliation_current WHERE id = 1').get()
|
||||||
const retaliationHist = db.prepare('SELECT time, value FROM retaliation_history ORDER BY time').all()
|
const retaliationHist = db.prepare('SELECT time, value FROM retaliation_history ORDER BY time').all()
|
||||||
const updates = db.prepare('SELECT * FROM situation_update ORDER BY timestamp DESC').all()
|
const updates = db.prepare('SELECT * FROM situation_update ORDER BY timestamp DESC LIMIT 50').all()
|
||||||
const meta = db.prepare('SELECT updated_at FROM situation WHERE id = 1').get()
|
const meta = db.prepare('SELECT updated_at FROM situation WHERE id = 1').get()
|
||||||
|
|
||||||
|
let conflictEvents = []
|
||||||
|
let conflictStats = { total_events: 0, high_impact_events: 0, estimated_casualties: 0, estimated_strike_count: 0 }
|
||||||
|
try {
|
||||||
|
conflictEvents = db.prepare('SELECT event_id, event_time, title, lat, lng, impact_score, url FROM gdelt_events ORDER BY event_time DESC LIMIT 30').all()
|
||||||
|
const statsRow = db.prepare('SELECT total_events, high_impact_events, estimated_casualties, estimated_strike_count FROM conflict_stats WHERE id = 1').get()
|
||||||
|
if (statsRow) conflictStats = statsRow
|
||||||
|
} catch (_) {}
|
||||||
|
|
||||||
|
// 平民伤亡:合计显示,不区分阵营
|
||||||
|
const civUsK = lossesUs?.civilian_killed ?? 0
|
||||||
|
const civUsW = lossesUs?.civilian_wounded ?? 0
|
||||||
|
const civIrK = lossesIr?.civilian_killed ?? 0
|
||||||
|
const civIrW = lossesIr?.civilian_wounded ?? 0
|
||||||
|
const dbKilled = civUsK + civIrK
|
||||||
|
const dbWounded = civUsW + civIrW
|
||||||
|
const est = conflictStats.estimated_casualties || 0
|
||||||
|
const civilianCasualtiesTotal = {
|
||||||
|
killed: est > 0 ? Math.max(dbKilled, est) : dbKilled,
|
||||||
|
wounded: dbWounded,
|
||||||
|
}
|
||||||
|
|
||||||
|
const usLossesBase = lossesUs ? toLosses(lossesUs) : defaultLosses
|
||||||
|
const irLossesBase = lossesIr ? toLosses(lossesIr) : defaultLosses
|
||||||
|
const usLosses = { ...usLossesBase, civilianCasualties: { killed: 0, wounded: 0 } }
|
||||||
|
const irLosses = { ...irLossesBase, civilianCasualties: { killed: 0, wounded: 0 } }
|
||||||
|
|
||||||
return {
|
return {
|
||||||
lastUpdated: meta?.updated_at || new Date().toISOString(),
|
lastUpdated: meta?.updated_at || new Date().toISOString(),
|
||||||
usForces: {
|
usForces: {
|
||||||
@@ -69,7 +111,7 @@ function getSituation() {
|
|||||||
},
|
},
|
||||||
assets: (assetsUs || []).map(toAsset),
|
assets: (assetsUs || []).map(toAsset),
|
||||||
keyLocations: locUs || [],
|
keyLocations: locUs || [],
|
||||||
combatLosses: lossesUs ? toLosses(lossesUs) : defaultLosses,
|
combatLosses: usLosses,
|
||||||
wallStreetInvestmentTrend: trend || [],
|
wallStreetInvestmentTrend: trend || [],
|
||||||
},
|
},
|
||||||
iranForces: {
|
iranForces: {
|
||||||
@@ -91,7 +133,7 @@ function getSituation() {
|
|||||||
},
|
},
|
||||||
assets: (assetsIr || []).map(toAsset),
|
assets: (assetsIr || []).map(toAsset),
|
||||||
keyLocations: locIr || [],
|
keyLocations: locIr || [],
|
||||||
combatLosses: lossesIr ? toLosses(lossesIr) : defaultLosses,
|
combatLosses: irLosses,
|
||||||
retaliationSentiment: retaliationCur?.value ?? 0,
|
retaliationSentiment: retaliationCur?.value ?? 0,
|
||||||
retaliationSentimentHistory: retaliationHist || [],
|
retaliationSentimentHistory: retaliationHist || [],
|
||||||
},
|
},
|
||||||
@@ -102,6 +144,19 @@ function getSituation() {
|
|||||||
summary: u.summary,
|
summary: u.summary,
|
||||||
severity: u.severity,
|
severity: u.severity,
|
||||||
})),
|
})),
|
||||||
|
conflictEvents: conflictEvents.map((e) => ({
|
||||||
|
event_id: e.event_id,
|
||||||
|
event_time: e.event_time,
|
||||||
|
title: e.title,
|
||||||
|
lat: e.lat,
|
||||||
|
lng: e.lng,
|
||||||
|
impact_score: e.impact_score,
|
||||||
|
url: e.url,
|
||||||
|
})),
|
||||||
|
conflictStats,
|
||||||
|
civilianCasualtiesTotal,
|
||||||
|
// 顶层聚合,便于 sit.combatLosses.us / sit.combatLosses.iran 与 usForces/iranForces 内保持一致
|
||||||
|
combatLosses: { us: usLosses, iran: irLosses },
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
40
server/stats.js
Normal file
40
server/stats.js
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
const db = require('./db')
|
||||||
|
|
||||||
|
function toNum(v) {
|
||||||
|
if (v == null || v === '') return 0
|
||||||
|
const n = Number(v)
|
||||||
|
return Number.isFinite(n) ? Math.max(0, Math.floor(n)) : 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function getStats() {
|
||||||
|
const viewersRow = db.prepare(
|
||||||
|
"SELECT COUNT(*) as n FROM visits WHERE last_seen > datetime('now', '-2 minutes')"
|
||||||
|
).get()
|
||||||
|
const cumulativeRow = db.prepare('SELECT total FROM visitor_count WHERE id = 1').get()
|
||||||
|
const feedbackRow = db.prepare('SELECT COUNT(*) as n FROM feedback').get()
|
||||||
|
const shareRow = db.prepare('SELECT total FROM share_count WHERE id = 1').get()
|
||||||
|
let realLikeCount = 0
|
||||||
|
try {
|
||||||
|
realLikeCount = toNum(db.prepare('SELECT total FROM like_count WHERE id = 1').get()?.total)
|
||||||
|
} catch (_) {}
|
||||||
|
let viewers = toNum(viewersRow?.n)
|
||||||
|
let cumulative = toNum(cumulativeRow?.total)
|
||||||
|
let feedbackCount = toNum(feedbackRow?.n)
|
||||||
|
let shareCount = toNum(shareRow?.total)
|
||||||
|
let likeCount = realLikeCount
|
||||||
|
let display = null
|
||||||
|
try {
|
||||||
|
display = db.prepare('SELECT viewers, cumulative, share_count, like_count, feedback_count FROM display_stats WHERE id = 1').get()
|
||||||
|
} catch (_) {}
|
||||||
|
if (display) {
|
||||||
|
if (display.viewers != null) viewers = toNum(display.viewers)
|
||||||
|
if (display.cumulative != null) cumulative = toNum(display.cumulative)
|
||||||
|
if (display.share_count != null) shareCount = toNum(display.share_count)
|
||||||
|
if (display.like_count != null) likeCount = toNum(display.like_count)
|
||||||
|
else likeCount = realLikeCount
|
||||||
|
if (display.feedback_count != null) feedbackCount = toNum(display.feedback_count)
|
||||||
|
}
|
||||||
|
return { viewers, cumulative, feedbackCount, shareCount, likeCount }
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { getStats }
|
||||||
11
src/App.tsx
11
src/App.tsx
@@ -1,12 +1,19 @@
|
|||||||
|
import { Routes, Route } from 'react-router-dom'
|
||||||
import { Dashboard } from '@/pages/Dashboard'
|
import { Dashboard } from '@/pages/Dashboard'
|
||||||
|
import { DbDashboard } from '@/pages/DbDashboard'
|
||||||
|
import { EditDashboard } from '@/pages/EditDashboard'
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
className="min-h-screen w-full bg-military-dark overflow-hidden"
|
className="min-h-screen w-full max-w-full overflow-x-hidden bg-military-dark overflow-hidden"
|
||||||
style={{ background: '#0A0F1C' }}
|
style={{ background: '#0A0F1C' }}
|
||||||
>
|
>
|
||||||
<Dashboard />
|
<Routes>
|
||||||
|
<Route path="/" element={<Dashboard />} />
|
||||||
|
<Route path="/db" element={<DbDashboard />} />
|
||||||
|
<Route path="/edit" element={<EditDashboard />} />
|
||||||
|
</Routes>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
153
src/api/edit.ts
Normal file
153
src/api/edit.ts
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
/** 手动修正看板数据 API */
|
||||||
|
|
||||||
|
export interface CombatLossesRow {
|
||||||
|
side: string
|
||||||
|
bases_destroyed: number
|
||||||
|
bases_damaged: number
|
||||||
|
personnel_killed: number
|
||||||
|
personnel_wounded: number
|
||||||
|
civilian_killed?: number
|
||||||
|
civilian_wounded?: number
|
||||||
|
aircraft: number
|
||||||
|
warships: number
|
||||||
|
armor: number
|
||||||
|
vehicles: number
|
||||||
|
drones?: number
|
||||||
|
missiles?: number
|
||||||
|
helicopters?: number
|
||||||
|
submarines?: number
|
||||||
|
tanks?: number
|
||||||
|
carriers?: number
|
||||||
|
civilian_ships?: number
|
||||||
|
airport_port?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KeyLocationRow {
|
||||||
|
id: number
|
||||||
|
side: string
|
||||||
|
name: string
|
||||||
|
lat: number
|
||||||
|
lng: number
|
||||||
|
type?: string | null
|
||||||
|
region?: string | null
|
||||||
|
status?: string | null
|
||||||
|
damage_level?: number | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SituationUpdateRow {
|
||||||
|
id: string
|
||||||
|
timestamp: string
|
||||||
|
category: string
|
||||||
|
summary: string
|
||||||
|
severity: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ForceSummaryRow {
|
||||||
|
side: string
|
||||||
|
total_assets: number
|
||||||
|
personnel: number
|
||||||
|
naval_ships: number
|
||||||
|
aircraft: number
|
||||||
|
ground_units: number
|
||||||
|
uav: number
|
||||||
|
missile_consumed: number
|
||||||
|
missile_stock: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DisplayStatsRow {
|
||||||
|
viewers: number
|
||||||
|
cumulative: number
|
||||||
|
shareCount: number
|
||||||
|
likeCount: number
|
||||||
|
feedbackCount: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EditRawData {
|
||||||
|
combatLosses: { us: CombatLossesRow | null; iran: CombatLossesRow | null }
|
||||||
|
keyLocations: { us: KeyLocationRow[]; iran: KeyLocationRow[] }
|
||||||
|
situationUpdates: SituationUpdateRow[]
|
||||||
|
forceSummary: { us: ForceSummaryRow | null; iran: ForceSummaryRow | null }
|
||||||
|
displayStats?: DisplayStatsRow
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchEditRaw(): Promise<EditRawData> {
|
||||||
|
const res = await fetch('/api/edit/raw', { cache: 'no-store' })
|
||||||
|
if (!res.ok) throw new Error(`API error: ${res.status}`)
|
||||||
|
return res.json()
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function putCombatLosses(side: 'us' | 'iran', body: Partial<CombatLossesRow>): Promise<void> {
|
||||||
|
const res = await fetch('/api/edit/combat-losses', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ side, ...body }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function patchKeyLocation(id: number, body: Partial<KeyLocationRow>): Promise<void> {
|
||||||
|
const res = await fetch(`/api/edit/key-location/${id}`, {
|
||||||
|
method: 'PATCH',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function postSituationUpdate(body: {
|
||||||
|
id?: string
|
||||||
|
timestamp?: string
|
||||||
|
category: string
|
||||||
|
summary: string
|
||||||
|
severity?: string
|
||||||
|
}): Promise<{ id: string }> {
|
||||||
|
const res = await fetch('/api/edit/situation-update', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
return res.json()
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function deleteSituationUpdate(id: string): Promise<void> {
|
||||||
|
const res = await fetch(`/api/edit/situation-update/${encodeURIComponent(id)}`, { method: 'DELETE' })
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function putForceSummary(side: 'us' | 'iran', body: Partial<ForceSummaryRow>): Promise<void> {
|
||||||
|
const res = await fetch('/api/edit/force-summary', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ side, ...body }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 传 null 的字段会清除覆盖,改回实时统计 */
|
||||||
|
export async function putDisplayStats(body: Partial<{ [K in keyof DisplayStatsRow]: number | null }>): Promise<void> {
|
||||||
|
const res = await fetch('/api/edit/display-stats', {
|
||||||
|
method: 'PUT',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const e = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((e as { error?: string }).error || res.statusText)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import type { MilitarySituation } from '@/data/mockData'
|
import type { MilitarySituation } from '@/data/mockData'
|
||||||
|
|
||||||
export async function fetchSituation(): Promise<MilitarySituation> {
|
export async function fetchSituation(): Promise<MilitarySituation> {
|
||||||
const res = await fetch('/api/situation')
|
const res = await fetch(`/api/situation?t=${Date.now()}`, { cache: 'no-store' })
|
||||||
if (!res.ok) throw new Error(`API error: ${res.status}`)
|
if (!res.ok) throw new Error(`API error: ${res.status}`)
|
||||||
return res.json()
|
return res.json()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ export function connectSituationWebSocket(onData: Handler): () => void {
|
|||||||
ws.onmessage = (e) => {
|
ws.onmessage = (e) => {
|
||||||
try {
|
try {
|
||||||
const msg = JSON.parse(e.data)
|
const msg = JSON.parse(e.data)
|
||||||
if (msg.type === 'situation' && msg.data) handler?.(msg.data)
|
if (msg.type === 'situation') handler?.({ situation: msg.data, stats: msg.stats })
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
}
|
}
|
||||||
ws.onclose = () => {
|
ws.onclose = () => {
|
||||||
|
|||||||
@@ -7,11 +7,10 @@ interface BaseStatusPanelProps {
|
|||||||
className?: string
|
className?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOTAL_BASES = 62
|
|
||||||
|
|
||||||
export function BaseStatusPanel({ keyLocations, className = '' }: BaseStatusPanelProps) {
|
export function BaseStatusPanel({ keyLocations, className = '' }: BaseStatusPanelProps) {
|
||||||
const stats = useMemo(() => {
|
const stats = useMemo(() => {
|
||||||
const bases = (keyLocations || []).filter((loc) => loc.type === 'Base')
|
const bases = (keyLocations || []).filter((loc) => loc.type === 'Base')
|
||||||
|
const total = bases.length
|
||||||
let attacked = 0
|
let attacked = 0
|
||||||
let severe = 0
|
let severe = 0
|
||||||
let moderate = 0
|
let moderate = 0
|
||||||
@@ -24,7 +23,7 @@ export function BaseStatusPanel({ keyLocations, className = '' }: BaseStatusPane
|
|||||||
else if (lvl === 2) moderate++
|
else if (lvl === 2) moderate++
|
||||||
else if (lvl === 1) light++
|
else if (lvl === 1) light++
|
||||||
}
|
}
|
||||||
return { attacked, severe, moderate, light }
|
return { total, attacked, severe, moderate, light }
|
||||||
}, [keyLocations])
|
}, [keyLocations])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -38,7 +37,7 @@ export function BaseStatusPanel({ keyLocations, className = '' }: BaseStatusPane
|
|||||||
<div className="flex flex-col gap-1.5 text-xs tabular-nums">
|
<div className="flex flex-col gap-1.5 text-xs tabular-nums">
|
||||||
<div className="flex items-center justify-between gap-2">
|
<div className="flex items-center justify-between gap-2">
|
||||||
<span className="text-military-text-secondary">总基地数</span>
|
<span className="text-military-text-secondary">总基地数</span>
|
||||||
<strong>{TOTAL_BASES}</strong>
|
<strong>{stats.total}</strong>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center justify-between gap-2">
|
<div className="flex items-center justify-between gap-2">
|
||||||
<span className="flex items-center gap-1 text-military-text-secondary">
|
<span className="flex items-center gap-1 text-military-text-secondary">
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user