Files
usa/crawler/main.py
2026-03-03 13:02:28 +08:00

42 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""爬虫入口:定时执行完整写库流水线(抓取 → 清洗 → 去重 → 映射 → 更新表 → 通知 API"""
import time
import sys
from pathlib import Path
# 确保能导入 config
sys.path.insert(0, str(Path(__file__).resolve().parent))
from config import DB_PATH, API_BASE, CRAWL_INTERVAL
from pipeline import run_full_pipeline
def run_once() -> int:
"""执行一轮:抓取、清洗、去重、映射、写表、通知。返回本轮新增条数(面板或资讯)。"""
n_fetched, n_news, n_panel = run_full_pipeline(
db_path=DB_PATH,
api_base=API_BASE,
translate=True,
notify=True,
)
return n_panel or n_news
def main() -> None:
print("Crawler started. DB:", DB_PATH)
print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
while True:
try:
n = run_once()
if n > 0:
print(f"[{time.strftime('%H:%M:%S')}] 抓取完成,去重后新增 {n} 条,已写库并通知 API")
except KeyboardInterrupt:
break
except Exception as e:
print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
time.sleep(CRAWL_INTERVAL)
if __name__ == "__main__":
main()