#!/usr/bin/env bash # 生产:仅用 Docker 跑爬虫,API 在宿主机(如 PM2)时使用。 # 保证爬虫与 API 使用同一 SQLite 文件(数据对齐)。 set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" DB_FILE="${DB_FILE:-$PROJECT_ROOT/server/data.db}" API_BASE="${API_BASE:-http://host.docker.internal:3001}" CRAWLER_IMAGE="${CRAWLER_IMAGE:-usa-dashboard-crawler:latest}" CONTAINER_NAME="${CONTAINER_NAME:-usa-crawler}" # 可选:从 .env 加载 DASHSCOPE_API_KEY 等 if [ -f "$PROJECT_ROOT/.env" ]; then set -a # shellcheck source=../.env . "$PROJECT_ROOT/.env" set +a fi # 宿主机 DB 必须存在(API 已初始化或先 seed) if [ ! -f "$DB_FILE" ]; then echo "ERROR: DB file not found: $DB_FILE" echo " Create it first: DB_PATH=$DB_FILE node server/seed.js" exit 1 fi # Linux 下 Docker 默认无 host.docker.internal,需显式添加 DOCKER_EXTRA=() if [ "$(uname -s)" = "Linux" ]; then DOCKER_EXTRA+=(--add-host=host.docker.internal:host-gateway) fi # 若已存在同名容器则先删 docker rm -f "$CONTAINER_NAME" 2>/dev/null || true echo "==> Starting crawler container (standalone)" echo " DB: $DB_FILE -> /data/data.db" echo " API_BASE: $API_BASE" echo " Image: $CRAWLER_IMAGE" docker run -d \ --name "$CONTAINER_NAME" \ --restart unless-stopped \ -p 8000:8000 \ -v "$DB_FILE:/data/data.db" \ -e DB_PATH=/data/data.db \ -e API_BASE="$API_BASE" \ -e GDELT_DISABLED=1 \ -e RSS_INTERVAL_SEC=60 \ ${DASHSCOPE_API_KEY:+ -e DASHSCOPE_API_KEY="$DASHSCOPE_API_KEY"} \ "${DOCKER_EXTRA[@]}" \ "$CRAWLER_IMAGE" echo " Container: $CONTAINER_NAME" echo " Logs: docker logs -f $CONTAINER_NAME" echo " Status: curl -s http://localhost:8000/crawler/status | jq ."