# -*- coding: utf-8 -*-
"""爬虫入口：定时执行完整写库流水线（抓取 → 清洗 → 去重 → 映射 → 更新表 → 通知 API）"""
import time
import sys
from pathlib import Path

# 确保能导入 config
sys.path.insert(0, str(Path(__file__).resolve().parent))

from config import DB_PATH, API_BASE, CRAWL_INTERVAL
from pipeline import run_full_pipeline


def run_once() -> int:
    """执行一轮：抓取、清洗、去重、映射、写表、通知。返回本轮新增条数（面板或资讯）。"""
    n_fetched, n_news, n_panel = run_full_pipeline(
        db_path=DB_PATH,
        api_base=API_BASE,
        translate=True,
        notify=True,
    )
    return n_panel or n_news


def main() -> None:
    print("Crawler started. DB:", DB_PATH)
    print("API:", API_BASE, "| Interval:", CRAWL_INTERVAL, "s")
    while True:
        try:
            n = run_once()
            if n > 0:
                print(f"[{time.strftime('%H:%M:%S')}] 抓取完成，去重后新增 {n} 条，已写库并通知 API")
        except KeyboardInterrupt:
            break
        except Exception as e:
            print(f"[{time.strftime('%H:%M:%S')}] Error: {e}")
        time.sleep(CRAWL_INTERVAL)


if __name__ == "__main__":
    main()