34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""爬虫配置"""
|
||
import os
|
||
from pathlib import Path
|
||
|
||
# 数据库路径(与 server 共用 SQLite)
|
||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||
|
||
# Node API 地址(用于通知推送)
|
||
API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
|
||
|
||
# 抓取间隔(秒)
|
||
CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
|
||
|
||
# RSS 源(美伊/中东相关,多源保证实时事件脉络)
|
||
RSS_FEEDS = [
|
||
"https://feeds.reuters.com/reuters/topNews",
|
||
"https://feeds.bbci.co.uk/news/world/rss.xml",
|
||
"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
|
||
"https://www.aljazeera.com/xml/rss/all.xml",
|
||
"https://www.aljazeera.com/xml/rss/middleeast.xml",
|
||
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
||
]
|
||
|
||
# 关键词过滤:至少匹配一个才会入库
|
||
KEYWORDS = [
|
||
"iran", "iranian", "tehran", "以色列", "israel",
|
||
"usa", "us ", "american", "美军", "美国",
|
||
"middle east", "中东", "persian gulf", "波斯湾",
|
||
"strike", "attack", "military", "missile", "核", "nuclear",
|
||
"carrier", "航母", "houthi", "胡塞", "hamas",
|
||
]
|