fix:优化数据来源
This commit is contained in:
33
crawler/config.py
Normal file
33
crawler/config.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""爬虫配置"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# 数据库路径(与 server 共用 SQLite)
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||||
|
||||
# Node API 地址(用于通知推送)
|
||||
API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
|
||||
|
||||
# 抓取间隔(秒)
|
||||
CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
|
||||
|
||||
# RSS 源(美伊/中东相关,多源保证实时事件脉络)
|
||||
RSS_FEEDS = [
|
||||
"https://feeds.reuters.com/reuters/topNews",
|
||||
"https://feeds.bbci.co.uk/news/world/rss.xml",
|
||||
"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
|
||||
"https://www.aljazeera.com/xml/rss/all.xml",
|
||||
"https://www.aljazeera.com/xml/rss/middleeast.xml",
|
||||
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
||||
]
|
||||
|
||||
# 关键词过滤:至少匹配一个才会入库
|
||||
KEYWORDS = [
|
||||
"iran", "iranian", "tehran", "以色列", "israel",
|
||||
"usa", "us ", "american", "美军", "美国",
|
||||
"middle east", "中东", "persian gulf", "波斯湾",
|
||||
"strike", "attack", "military", "missile", "核", "nuclear",
|
||||
"carrier", "航母", "houthi", "胡塞", "hamas",
|
||||
]
|
||||
Reference in New Issue
Block a user