#!/usr/bin/env node /** * 检查爬虫写入的数据:条数 + 最近内容(situation_update、news_content、gdelt_events) * 用法(项目根目录): node scripts/check-crawler-data.cjs * 可选:先启动爬虫 npm run gdelt,再启动 API 或直接运行本脚本读 DB */ const path = require('path') const http = require('http') const projectRoot = path.resolve(__dirname, '..') process.chdir(projectRoot) const db = require('../server/db') const CRAWLER_URL = process.env.CRAWLER_URL || 'http://localhost:8000' const SHOW_ROWS = 10 function fetchCrawlerStatus() { return new Promise((resolve) => { const url = new URL(`${CRAWLER_URL}/crawler/status`) const req = http.request( { hostname: url.hostname, port: url.port || 80, path: url.pathname, method: 'GET', timeout: 3000 }, (res) => { let body = '' res.on('data', (c) => (body += c)) res.on('end', () => { try { resolve(JSON.parse(body)) } catch { resolve(null) } }) } ) req.on('error', () => resolve(null)) req.end() }) } async function run() { console.log('========================================') console.log('爬虫数据检查(条数 + 最近内容)') console.log('========================================\n') // ---------- 爬虫服务状态(可选)---------- const status = await fetchCrawlerStatus() if (status) { console.log('--- 爬虫服务状态 GET /crawler/status ---') console.log(' db_path:', status.db_path) console.log(' db_exists:', status.db_exists) console.log(' situation_update_count:', status.situation_update_count) console.log(' last_fetch_items:', status.last_fetch_items, '(本轮抓取条数)') console.log(' last_fetch_inserted:', status.last_fetch_inserted, '(去重后新增)') if (status.last_fetch_error) console.log(' last_fetch_error:', status.last_fetch_error) console.log('') } else { console.log('--- 爬虫服务 ---') console.log(' 未启动或不可达:', CRAWLER_URL) console.log('') } // ---------- situation_update(事件脉络,看板「近期更新」)---------- let situationUpdateRows = [] let situationUpdateCount = 0 try { situationUpdateCount = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c situationUpdateRows = db .prepare( 'SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT ?' ) .all(SHOW_ROWS) } catch (e) { console.log('situation_update 表读取失败:', e.message) } console.log('--- situation_update(事件脉络)---') console.log(' 总条数:', situationUpdateCount) if (situationUpdateRows.length > 0) { console.log(' 最近', situationUpdateRows.length, '条:') situationUpdateRows.forEach((r, i) => { const summary = (r.summary || '').slice(0, 50) console.log(` ${i + 1}. [${r.timestamp}] ${r.category}/${r.severity} ${summary}${summary.length >= 50 ? '…' : ''}`) }) } console.log('') // ---------- news_content(资讯表,爬虫去重后写入)---------- let newsCount = 0 let newsRows = [] try { newsCount = db.prepare('SELECT COUNT(*) as c FROM news_content').get().c newsRows = db .prepare( 'SELECT title, summary, source, published_at, category, severity FROM news_content ORDER BY published_at DESC LIMIT ?' ) .all(SHOW_ROWS) } catch (e) { console.log('news_content 表读取失败:', e.message) } console.log('--- news_content(资讯表)---') console.log(' 总条数:', newsCount) if (newsRows.length > 0) { console.log(' 最近', newsRows.length, '条:') newsRows.forEach((r, i) => { const title = (r.title || '').slice(0, 45) console.log(` ${i + 1}. [${r.published_at || ''}] ${r.source || ''} ${title}${title.length >= 45 ? '…' : ''}`) if (r.summary) console.log(` summary: ${(r.summary || '').slice(0, 60)}…`) }) } console.log('') // ---------- gdelt_events(地图冲突点)---------- let gdeltCount = 0 let gdeltRows = [] try { gdeltCount = db.prepare('SELECT COUNT(*) as c FROM gdelt_events').get().c gdeltRows = db .prepare('SELECT event_id, event_time, title, impact_score FROM gdelt_events ORDER BY event_time DESC LIMIT 5') .all() } catch (e) { console.log('gdelt_events 表读取失败:', e.message) } console.log('--- gdelt_events(地图冲突点)---') console.log(' 总条数:', gdeltCount) if (gdeltRows.length > 0) { console.log(' 最近 5 条:') gdeltRows.forEach((r, i) => { const title = (r.title || '').slice(0, 50) console.log(` ${i + 1}. [${r.event_time}] impact=${r.impact_score} ${title}${title.length >= 50 ? '…' : ''}`) }) } console.log('========================================') } db.initDb().then(() => run()).catch((err) => { console.error('失败:', err.message) process.exit(1) })