fix:优化爬虫配置,单独使用docker容器运行
This commit is contained in:
21
scripts/production-start.sh
Normal file
21
scripts/production-start.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# 生产环境一键:构建爬虫镜像 + 以「仅爬虫 Docker、API 在宿主机」方式启动,并输出数据对齐说明。
|
||||
# 使用前:API 已用 PM2 等方式在宿主机 3001 端口运行,且 server/data.db 已存在(或先执行 npm run api:seed)。
|
||||
set -e
|
||||
cd "$(dirname "$0")/.."
|
||||
PROJECT_ROOT="${PROJECT_ROOT:-$(pwd)}"
|
||||
REGISTRY="${REGISTRY:-}"
|
||||
|
||||
echo "==> Building crawler image..."
|
||||
docker build -t usa-dashboard-crawler:latest \
|
||||
${REGISTRY:+--build-arg REGISTRY="$REGISTRY"} \
|
||||
-f Dockerfile.crawler .
|
||||
|
||||
echo ""
|
||||
./scripts/run-crawler-docker-standalone.sh
|
||||
|
||||
echo ""
|
||||
echo "==> Data alignment (生产数据对齐)"
|
||||
echo " API (host) DB_PATH = $PROJECT_ROOT/server/data.db (或 env DB_PATH)"
|
||||
echo " Crawler /data/data.db = 挂载自上述同一文件"
|
||||
echo " 二者必须指向同一 SQLite 文件,前端/API 与爬虫才能数据一致。"
|
||||
55
scripts/run-crawler-docker-standalone.sh
Normal file
55
scripts/run-crawler-docker-standalone.sh
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
# 生产:仅用 Docker 跑爬虫,API 在宿主机(如 PM2)时使用。
|
||||
# 保证爬虫与 API 使用同一 SQLite 文件(数据对齐)。
|
||||
set -e
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
||||
DB_FILE="${DB_FILE:-$PROJECT_ROOT/server/data.db}"
|
||||
API_BASE="${API_BASE:-http://host.docker.internal:3001}"
|
||||
CRAWLER_IMAGE="${CRAWLER_IMAGE:-usa-dashboard-crawler:latest}"
|
||||
CONTAINER_NAME="${CONTAINER_NAME:-usa-crawler}"
|
||||
|
||||
# 可选:从 .env 加载 DASHSCOPE_API_KEY 等
|
||||
if [ -f "$PROJECT_ROOT/.env" ]; then
|
||||
set -a
|
||||
# shellcheck source=../.env
|
||||
. "$PROJECT_ROOT/.env"
|
||||
set +a
|
||||
fi
|
||||
|
||||
# 宿主机 DB 必须存在(API 已初始化或先 seed)
|
||||
if [ ! -f "$DB_FILE" ]; then
|
||||
echo "ERROR: DB file not found: $DB_FILE"
|
||||
echo " Create it first: DB_PATH=$DB_FILE node server/seed.js"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Linux 下 Docker 默认无 host.docker.internal,需显式添加
|
||||
DOCKER_EXTRA=()
|
||||
if [ "$(uname -s)" = "Linux" ]; then
|
||||
DOCKER_EXTRA+=(--add-host=host.docker.internal:host-gateway)
|
||||
fi
|
||||
|
||||
# 若已存在同名容器则先删
|
||||
docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
|
||||
|
||||
echo "==> Starting crawler container (standalone)"
|
||||
echo " DB: $DB_FILE -> /data/data.db"
|
||||
echo " API_BASE: $API_BASE"
|
||||
echo " Image: $CRAWLER_IMAGE"
|
||||
docker run -d \
|
||||
--name "$CONTAINER_NAME" \
|
||||
--restart unless-stopped \
|
||||
-p 8000:8000 \
|
||||
-v "$DB_FILE:/data/data.db" \
|
||||
-e DB_PATH=/data/data.db \
|
||||
-e API_BASE="$API_BASE" \
|
||||
-e GDELT_DISABLED=1 \
|
||||
-e RSS_INTERVAL_SEC=60 \
|
||||
${DASHSCOPE_API_KEY:+ -e DASHSCOPE_API_KEY="$DASHSCOPE_API_KEY"} \
|
||||
"${DOCKER_EXTRA[@]}" \
|
||||
"$CRAWLER_IMAGE"
|
||||
|
||||
echo " Container: $CONTAINER_NAME"
|
||||
echo " Logs: docker logs -f $CONTAINER_NAME"
|
||||
echo " Status: curl -s http://localhost:8000/crawler/status | jq ."
|
||||
Reference in New Issue
Block a user