fix:优化爬虫配置,单独使用docker容器运行
This commit is contained in:
55
scripts/run-crawler-docker-standalone.sh
Normal file
55
scripts/run-crawler-docker-standalone.sh
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
# 生产:仅用 Docker 跑爬虫,API 在宿主机(如 PM2)时使用。
|
||||
# 保证爬虫与 API 使用同一 SQLite 文件(数据对齐)。
|
||||
set -e
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}"
|
||||
DB_FILE="${DB_FILE:-$PROJECT_ROOT/server/data.db}"
|
||||
API_BASE="${API_BASE:-http://host.docker.internal:3001}"
|
||||
CRAWLER_IMAGE="${CRAWLER_IMAGE:-usa-dashboard-crawler:latest}"
|
||||
CONTAINER_NAME="${CONTAINER_NAME:-usa-crawler}"
|
||||
|
||||
# 可选:从 .env 加载 DASHSCOPE_API_KEY 等
|
||||
if [ -f "$PROJECT_ROOT/.env" ]; then
|
||||
set -a
|
||||
# shellcheck source=../.env
|
||||
. "$PROJECT_ROOT/.env"
|
||||
set +a
|
||||
fi
|
||||
|
||||
# 宿主机 DB 必须存在(API 已初始化或先 seed)
|
||||
if [ ! -f "$DB_FILE" ]; then
|
||||
echo "ERROR: DB file not found: $DB_FILE"
|
||||
echo " Create it first: DB_PATH=$DB_FILE node server/seed.js"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Linux 下 Docker 默认无 host.docker.internal,需显式添加
|
||||
DOCKER_EXTRA=()
|
||||
if [ "$(uname -s)" = "Linux" ]; then
|
||||
DOCKER_EXTRA+=(--add-host=host.docker.internal:host-gateway)
|
||||
fi
|
||||
|
||||
# 若已存在同名容器则先删
|
||||
docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
|
||||
|
||||
echo "==> Starting crawler container (standalone)"
|
||||
echo " DB: $DB_FILE -> /data/data.db"
|
||||
echo " API_BASE: $API_BASE"
|
||||
echo " Image: $CRAWLER_IMAGE"
|
||||
docker run -d \
|
||||
--name "$CONTAINER_NAME" \
|
||||
--restart unless-stopped \
|
||||
-p 8000:8000 \
|
||||
-v "$DB_FILE:/data/data.db" \
|
||||
-e DB_PATH=/data/data.db \
|
||||
-e API_BASE="$API_BASE" \
|
||||
-e GDELT_DISABLED=1 \
|
||||
-e RSS_INTERVAL_SEC=60 \
|
||||
${DASHSCOPE_API_KEY:+ -e DASHSCOPE_API_KEY="$DASHSCOPE_API_KEY"} \
|
||||
"${DOCKER_EXTRA[@]}" \
|
||||
"$CRAWLER_IMAGE"
|
||||
|
||||
echo " Container: $CONTAINER_NAME"
|
||||
echo " Logs: docker logs -f $CONTAINER_NAME"
|
||||
echo " Status: curl -s http://localhost:8000/crawler/status | jq ."
|
||||
Reference in New Issue
Block a user