18 lines
641 B
Bash
Executable File
18 lines
641 B
Bash
Executable File
#!/usr/bin/env bash
|
||
# 按时间范围跑一轮爬虫(RSS:仅保留指定起始时间之后的条目)
|
||
# 用法:
|
||
# ./scripts/run-crawler-range.sh # 默认从 2026-02-28 0:00 到现在
|
||
# ./scripts/run-crawler-range.sh 2026-02-25T00:00:00
|
||
#
|
||
# GDELT 时间范围需在启动 gdelt 服务时设置,例如:
|
||
# GDELT_TIMESPAN=3d npm run gdelt
|
||
|
||
set -e
|
||
START="${1:-2026-02-28T00:00:00}"
|
||
cd "$(dirname "$0")/.."
|
||
echo "RSS 抓取时间范围: 仅保留 ${START} 之后"
|
||
echo "运行: cd crawler && CRAWL_START_DATE=${START} python run_once.py"
|
||
echo ""
|
||
export CRAWL_START_DATE="$START"
|
||
(cd crawler && python3 run_once.py)
|