fix: 优化后端数据更新机制

This commit is contained in:
Daniel
2026-03-03 13:02:28 +08:00
parent 7284a1a60d
commit fa6f7407f0
20 changed files with 592 additions and 201 deletions

View File

@@ -16,7 +16,11 @@ DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "")
# 抓取间隔(秒)
CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300"))
# 单源抓取超时(秒),避免某源卡住拖垮整轮
FEED_TIMEOUT = int(os.environ.get("FEED_TIMEOUT", "12"))
# RSS 源:世界主流媒体,覆盖美伊/中东多视角
# 每项为 URL 字符串,或 {"name": "显示名", "url": "..."} 便于日志与排查
RSS_FEEDS = [
# 美国
"https://feeds.reuters.com/reuters/topNews",
@@ -35,6 +39,9 @@ RSS_FEEDS = [
# 中国
"https://english.news.cn/rss/world.xml",
"https://www.cgtn.com/rss/world",
# 凤凰网(军事 + 国际,中文视角)
{"name": "凤凰军事", "url": "https://feedx.net/rss/ifengmil.xml"},
{"name": "凤凰国际", "url": "https://feedx.net/rss/ifengworld.xml"},
# 伊朗
"https://www.presstv.ir/rss",
# 卡塔尔(中东)
@@ -42,6 +49,22 @@ RSS_FEEDS = [
"https://www.aljazeera.com/xml/rss/middleeast.xml",
]
def get_feed_sources():
"""返回 [(name, url), ...]name 用于日志,缺省为 URL 的 host"""
import urllib.parse
out = []
for raw in RSS_FEEDS:
if isinstance(raw, dict):
name = raw.get("name") or "rss"
url = raw.get("url", "").strip()
else:
url = (raw or "").strip()
name = urllib.parse.urlparse(url).netloc or "rss"
if url:
out.append((name, url))
return out
# 关键词过滤:至少匹配一个才会入库(与地图区域对应:伊拉克/叙利亚/海湾/红海/地中海等)
KEYWORDS = [
# 伊朗