Files
usa/scripts/run-crawler-range.sh
2026-03-03 22:42:21 +08:00

18 lines
641 B
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# 按时间范围跑一轮爬虫RSS仅保留指定起始时间之后的条目
# 用法:
# ./scripts/run-crawler-range.sh # 默认从 2026-02-28 0:00 到现在
# ./scripts/run-crawler-range.sh 2026-02-25T00:00:00
#
# GDELT 时间范围需在启动 gdelt 服务时设置,例如:
# GDELT_TIMESPAN=3d npm run gdelt
set -e
START="${1:-2026-02-28T00:00:00}"
cd "$(dirname "$0")/.."
echo "RSS 抓取时间范围: 仅保留 ${START} 之后"
echo "运行: cd crawler && CRAWL_START_DATE=${START} python run_once.py"
echo ""
export CRAWL_START_DATE="$START"
(cd crawler && python3 run_once.py)