fix:增面

This commit is contained in:
Daniel
2026-03-03 22:42:21 +08:00
parent 09ec2e3a69
commit 86e50debec
13 changed files with 1486 additions and 0 deletions

17
scripts/run-crawler-range.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# 按时间范围跑一轮爬虫RSS仅保留指定起始时间之后的条目
# 用法:
# ./scripts/run-crawler-range.sh # 默认从 2026-02-28 0:00 到现在
# ./scripts/run-crawler-range.sh 2026-02-25T00:00:00
#
# GDELT 时间范围需在启动 gdelt 服务时设置,例如:
# GDELT_TIMESPAN=3d npm run gdelt
set -e
START="${1:-2026-02-28T00:00:00}"
cd "$(dirname "$0")/.."
echo "RSS 抓取时间范围: 仅保留 ${START} 之后"
echo "运行: cd crawler && CRAWL_START_DATE=${START} python run_once.py"
echo ""
export CRAWL_START_DATE="$START"
(cd crawler && python3 run_once.py)