fix:增面

This commit is contained in:
Daniel
2026-03-03 22:42:21 +08:00
parent 09ec2e3a69
commit 86e50debec
13 changed files with 1486 additions and 0 deletions

View File

@@ -0,0 +1,140 @@
#!/usr/bin/env node
/**
* 检查爬虫写入的数据:条数 + 最近内容situation_update、news_content、gdelt_events
* 用法(项目根目录): node scripts/check-crawler-data.cjs
* 可选:先启动爬虫 npm run gdelt再启动 API 或直接运行本脚本读 DB
*/
const path = require('path')
const http = require('http')
const projectRoot = path.resolve(__dirname, '..')
process.chdir(projectRoot)
const db = require('../server/db')
const CRAWLER_URL = process.env.CRAWLER_URL || 'http://localhost:8000'
const SHOW_ROWS = 10
function fetchCrawlerStatus() {
return new Promise((resolve) => {
const url = new URL(`${CRAWLER_URL}/crawler/status`)
const req = http.request(
{ hostname: url.hostname, port: url.port || 80, path: url.pathname, method: 'GET', timeout: 3000 },
(res) => {
let body = ''
res.on('data', (c) => (body += c))
res.on('end', () => {
try {
resolve(JSON.parse(body))
} catch {
resolve(null)
}
})
}
)
req.on('error', () => resolve(null))
req.end()
})
}
async function run() {
console.log('========================================')
console.log('爬虫数据检查(条数 + 最近内容)')
console.log('========================================\n')
// ---------- 爬虫服务状态(可选)----------
const status = await fetchCrawlerStatus()
if (status) {
console.log('--- 爬虫服务状态 GET /crawler/status ---')
console.log(' db_path:', status.db_path)
console.log(' db_exists:', status.db_exists)
console.log(' situation_update_count:', status.situation_update_count)
console.log(' last_fetch_items:', status.last_fetch_items, '(本轮抓取条数)')
console.log(' last_fetch_inserted:', status.last_fetch_inserted, '(去重后新增)')
if (status.last_fetch_error) console.log(' last_fetch_error:', status.last_fetch_error)
console.log('')
} else {
console.log('--- 爬虫服务 ---')
console.log(' 未启动或不可达:', CRAWLER_URL)
console.log('')
}
// ---------- situation_update事件脉络看板「近期更新」----------
let situationUpdateRows = []
let situationUpdateCount = 0
try {
situationUpdateCount = db.prepare('SELECT COUNT(*) as c FROM situation_update').get().c
situationUpdateRows = db
.prepare(
'SELECT id, timestamp, category, summary, severity FROM situation_update ORDER BY timestamp DESC LIMIT ?'
)
.all(SHOW_ROWS)
} catch (e) {
console.log('situation_update 表读取失败:', e.message)
}
console.log('--- situation_update事件脉络---')
console.log(' 总条数:', situationUpdateCount)
if (situationUpdateRows.length > 0) {
console.log(' 最近', situationUpdateRows.length, '条:')
situationUpdateRows.forEach((r, i) => {
const summary = (r.summary || '').slice(0, 50)
console.log(` ${i + 1}. [${r.timestamp}] ${r.category}/${r.severity} ${summary}${summary.length >= 50 ? '…' : ''}`)
})
}
console.log('')
// ---------- news_content资讯表爬虫去重后写入----------
let newsCount = 0
let newsRows = []
try {
newsCount = db.prepare('SELECT COUNT(*) as c FROM news_content').get().c
newsRows = db
.prepare(
'SELECT title, summary, source, published_at, category, severity FROM news_content ORDER BY published_at DESC LIMIT ?'
)
.all(SHOW_ROWS)
} catch (e) {
console.log('news_content 表读取失败:', e.message)
}
console.log('--- news_content资讯表---')
console.log(' 总条数:', newsCount)
if (newsRows.length > 0) {
console.log(' 最近', newsRows.length, '条:')
newsRows.forEach((r, i) => {
const title = (r.title || '').slice(0, 45)
console.log(` ${i + 1}. [${r.published_at || ''}] ${r.source || ''} ${title}${title.length >= 45 ? '…' : ''}`)
if (r.summary) console.log(` summary: ${(r.summary || '').slice(0, 60)}`)
})
}
console.log('')
// ---------- gdelt_events地图冲突点----------
let gdeltCount = 0
let gdeltRows = []
try {
gdeltCount = db.prepare('SELECT COUNT(*) as c FROM gdelt_events').get().c
gdeltRows = db
.prepare('SELECT event_id, event_time, title, impact_score FROM gdelt_events ORDER BY event_time DESC LIMIT 5')
.all()
} catch (e) {
console.log('gdelt_events 表读取失败:', e.message)
}
console.log('--- gdelt_events地图冲突点---')
console.log(' 总条数:', gdeltCount)
if (gdeltRows.length > 0) {
console.log(' 最近 5 条:')
gdeltRows.forEach((r, i) => {
const title = (r.title || '').slice(0, 50)
console.log(` ${i + 1}. [${r.event_time}] impact=${r.impact_score} ${title}${title.length >= 50 ? '…' : ''}`)
})
}
console.log('========================================')
}
db.initDb().then(() => run()).catch((err) => {
console.error('失败:', err.message)
process.exit(1)
})