76 lines
3.1 KiB
Python
76 lines
3.1 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""爬虫配置"""
|
||
import os
|
||
from pathlib import Path
|
||
|
||
# 数据库路径(与 server 共用 SQLite)
|
||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||
DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db"))
|
||
|
||
# Node API 地址(用于通知推送)
|
||
API_BASE = os.environ.get("API_BASE", "http://localhost:3001")
|
||
|
||
# 抓取间隔(秒)
|
||
CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
|
||
|
||
# RSS 源:世界主流媒体,覆盖美伊/中东多视角
|
||
RSS_FEEDS = [
|
||
# 美国
|
||
"https://feeds.reuters.com/reuters/topNews",
|
||
"https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
||
# 英国
|
||
"https://feeds.bbci.co.uk/news/world/rss.xml",
|
||
"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
|
||
"https://www.theguardian.com/world/rss",
|
||
# 法国
|
||
"https://www.france24.com/en/rss",
|
||
# 德国
|
||
"https://rss.dw.com/xml/rss-en-world",
|
||
# 俄罗斯
|
||
"https://tass.com/rss/v2.xml",
|
||
"https://www.rt.com/rss/",
|
||
# 中国
|
||
"https://english.news.cn/rss/world.xml",
|
||
"https://www.cgtn.com/rss/world",
|
||
# 伊朗
|
||
"https://www.presstv.ir/rss",
|
||
# 卡塔尔(中东)
|
||
"https://www.aljazeera.com/xml/rss/all.xml",
|
||
"https://www.aljazeera.com/xml/rss/middleeast.xml",
|
||
]
|
||
|
||
# 关键词过滤:至少匹配一个才会入库(与地图区域对应:伊拉克/叙利亚/海湾/红海/地中海等)
|
||
KEYWORDS = [
|
||
# 伊朗
|
||
"iran", "iranian", "tehran", "德黑兰", "bushehr", "布什尔", "abbas", "阿巴斯",
|
||
# 以色列 / 巴勒斯坦
|
||
"israel", "以色列", "hamas", "gaza", "加沙", "hezbollah", "真主党",
|
||
# 美国
|
||
"usa", "us ", "american", "美军", "美国", "pentagon",
|
||
# 区域(地图覆盖)
|
||
"middle east", "中东", "persian gulf", "波斯湾", "gulf of oman", "阿曼湾",
|
||
"arabian sea", "阿拉伯海", "red sea", "红海", "mediterranean", "地中海",
|
||
"strait of hormuz", "霍尔木兹",
|
||
# 伊拉克 / 叙利亚
|
||
"iraq", "伊拉克", "baghdad", "巴格达", "erbil", "埃尔比勒", "basra", "巴士拉",
|
||
"syria", "叙利亚", "damascus", "大马士革", "deir", "代尔祖尔",
|
||
# 海湾国家
|
||
"saudi", "沙特", "riyadh", "利雅得", "qatar", "卡塔尔", "doha", "多哈",
|
||
"uae", "emirates", "阿联酋", "dubai", "迪拜", "abu dhabi",
|
||
"bahrain", "巴林", "kuwait", "科威特", "oman", "阿曼", "yemen", "也门",
|
||
# 约旦 / 土耳其 / 埃及 / 吉布提 / 黎巴嫩
|
||
"jordan", "约旦", "amman", "安曼",
|
||
"lebanon", "黎巴嫩",
|
||
"turkey", "土耳其", "incirlik", "因吉尔利克",
|
||
"egypt", "埃及", "cairo", "开罗", "sinai", "西奈",
|
||
"djibouti", "吉布提",
|
||
# 军事 / 基地
|
||
"al-asad", "al asad", "阿萨德", "al udeid", "乌代德", "incirlik",
|
||
"strike", "attack", "military", "missile", "核", "nuclear",
|
||
"carrier", "航母", "drone", "uav", "无人机", "retaliation", "报复",
|
||
"base", "基地", "troops", "troop", "soldier", "personnel",
|
||
# 胡塞 / 武装 / 军力
|
||
"houthi", "胡塞", "houthis",
|
||
"idf", "irgc", "革命卫队", "qassem soleimani", "苏莱曼尼",
|
||
]
|