fix:增面
This commit is contained in:
140
scripts/check-crawler-data.cjs
Normal file
140
scripts/check-crawler-data.cjs
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* 检查爬虫写入的数据:条数 + 最近内容(situation_update、news_content、gdelt_events)
|
||||
* 用法(项目根目录): node scripts/check-crawler-data.cjs
|
||||
* 可选:先启动爬虫 npm run gdelt,再启动 API 或直接运行本脚本读 DB
|
||||
*/
|
||||
const path = require('path')
|
||||
const http = require('http')
|
||||
|
||||
const projectRoot = path.resolve(__dirname, '..')
|
||||
process.chdir(projectRoot)
|
||||
|
||||
const db = require('../server/db')
|
||||
|
||||
const CRAWLER_URL = process.env.CRAWLER_URL || 'http://localhost:8000'
|
||||
const SHOW_ROWS = 10
|
||||
|
||||
function fetchCrawlerStatus() {
|
||||
return new Promise((resolve) => {
|
||||
const url = new URL(`${CRAWLER_URL}/crawler/status`)
|
||||
const req = http.request(
|
||||
{ hostname: url.hostname, port: url.port || 80, path: url.pathname, method: 'GET', timeout: 3000 },
|
||||
(res) => {
|
||||
let body = ''
|
||||
res.on('data', (c) => (body += c))
|
||||
res.on('end', () => {
|
||||
try {
|
||||
resolve(JSON.parse(body))
|
||||
} catch {
|
||||
resolve(null)
|
||||
}
|
||||
})
|
||||
}
|
||||
)
|
||||
req.on('error', () => resolve(null))
|
||||
req.end()
|
||||
})
|
||||
}
|
||||
|
||||
async function run() {
|
||||
console.log('========================================')
|
||||
console.log('爬虫数据检查(条数 + 最近内容)')
|
||||
console.log('========================================\n')
|
||||
|
||||
// ---------- 爬虫服务状态(可选)----------
|
||||
const status = await fetchCrawlerStatus()
|
||||
if (status) {
|
||||
console.log('--- 爬虫服务状态 GET /crawler/status ---')
|
||||
console.log(' db_path:', status.db_path)
|
||||
console.log(' db_exists:', status.db_exists)
|
||||
console.log(' situation_update_count:', status.situation_update_count)
|
||||
console.log(' last_fetch_items:', status.last_fetch_items, '(本轮抓取条数)')
|
||||
console.log(' last_fetch_inserted:', status.last_fetch_inserted, '(去重后新增)')
|
||||
if (status.last_fetch_error) console.log(' last_fetch_error:', status.last_fetch_error)
|
||||
console.log('')
|
||||
} else {
|
||||
console.log('--- 爬虫服务 ---')
|
||||
console.log(' 未启动或不可达:', CRAWLER_URL)
|
||||
console.log('')
|
||||
}
|
||||
|
||||
// ---------- situation_update(事件脉络,看板「近期更新」)----------
|
||||
let situationUpdateRows = []
|
||||
let situationUpdateCount = 0
|
||||
try {
|
||||
situationUpdateCount = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c
|
||||
situationUpdateRows = db
|
||||
.prepare(
|
||||
'SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT ?'
|
||||
)
|
||||
.all(SHOW_ROWS)
|
||||
} catch (e) {
|
||||
console.log('situation_update 表读取失败:', e.message)
|
||||
}
|
||||
|
||||
console.log('--- situation_update(事件脉络)---')
|
||||
console.log(' 总条数:', situationUpdateCount)
|
||||
if (situationUpdateRows.length > 0) {
|
||||
console.log(' 最近', situationUpdateRows.length, '条:')
|
||||
situationUpdateRows.forEach((r, i) => {
|
||||
const summary = (r.summary || '').slice(0, 50)
|
||||
console.log(` ${i + 1}. [${r.timestamp}] ${r.category}/${r.severity} ${summary}${summary.length >= 50 ? '…' : ''}`)
|
||||
})
|
||||
}
|
||||
console.log('')
|
||||
|
||||
// ---------- news_content(资讯表,爬虫去重后写入)----------
|
||||
let newsCount = 0
|
||||
let newsRows = []
|
||||
try {
|
||||
newsCount = db.prepare('SELECT COUNT(*) as c FROM news_content').get().c
|
||||
newsRows = db
|
||||
.prepare(
|
||||
'SELECT title, summary, source, published_at, category, severity FROM news_content ORDER BY published_at DESC LIMIT ?'
|
||||
)
|
||||
.all(SHOW_ROWS)
|
||||
} catch (e) {
|
||||
console.log('news_content 表读取失败:', e.message)
|
||||
}
|
||||
|
||||
console.log('--- news_content(资讯表)---')
|
||||
console.log(' 总条数:', newsCount)
|
||||
if (newsRows.length > 0) {
|
||||
console.log(' 最近', newsRows.length, '条:')
|
||||
newsRows.forEach((r, i) => {
|
||||
const title = (r.title || '').slice(0, 45)
|
||||
console.log(` ${i + 1}. [${r.published_at || ''}] ${r.source || ''} ${title}${title.length >= 45 ? '…' : ''}`)
|
||||
if (r.summary) console.log(` summary: ${(r.summary || '').slice(0, 60)}…`)
|
||||
})
|
||||
}
|
||||
console.log('')
|
||||
|
||||
// ---------- gdelt_events(地图冲突点)----------
|
||||
let gdeltCount = 0
|
||||
let gdeltRows = []
|
||||
try {
|
||||
gdeltCount = db.prepare('SELECT COUNT(*) as c FROM gdelt_events').get().c
|
||||
gdeltRows = db
|
||||
.prepare('SELECT event_id, event_time, title, impact_score FROM gdelt_events ORDER BY event_time DESC LIMIT 5')
|
||||
.all()
|
||||
} catch (e) {
|
||||
console.log('gdelt_events 表读取失败:', e.message)
|
||||
}
|
||||
|
||||
console.log('--- gdelt_events(地图冲突点)---')
|
||||
console.log(' 总条数:', gdeltCount)
|
||||
if (gdeltRows.length > 0) {
|
||||
console.log(' 最近 5 条:')
|
||||
gdeltRows.forEach((r, i) => {
|
||||
const title = (r.title || '').slice(0, 50)
|
||||
console.log(` ${i + 1}. [${r.event_time}] impact=${r.impact_score} ${title}${title.length >= 50 ? '…' : ''}`)
|
||||
})
|
||||
}
|
||||
console.log('========================================')
|
||||
}
|
||||
|
||||
db.initDb().then(() => run()).catch((err) => {
|
||||
console.error('失败:', err.message)
|
||||
process.exit(1)
|
||||
})
|
||||
Reference in New Issue
Block a user