This commit is contained in:
2026-03-18 20:43:42 +08:00
parent 92021eaa41
commit cf3422b1ca
8 changed files with 155 additions and 45 deletions

View File

@@ -3,6 +3,7 @@ import { sleep_ms } from '../flow_utils.js';
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
import { safe_json_stringify } from '../../json_utils.js';
import { close_browser } from '../../puppeteer/puppeteer_runner.js';
import { Op } from 'sequelize';
function build_batch_key(prefix) {
return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`;
@@ -14,6 +15,24 @@ function pick_asin_from_url(url) {
return m && m[1] ? m[1].toUpperCase() : null;
}
function get_today_start() {
const d = new Date();
d.setHours(0, 0, 0, 0);
return d;
}
async function has_detail_fetched_today(asin) {
if (!asin) return false;
const row = await amazon_product.findOne({
attributes: ['asin', 'updated_at'],
where: {
asin,
updated_at: { [Op.gte]: get_today_start() }
}
});
return !!row;
}
function unwrap_action_result(res) {
// 插件返回通常是 { ..., result: { stage, items... } }
if (res && typeof res === 'object' && res.result && typeof res.result === 'object') {
@@ -192,26 +211,25 @@ export async function run_amazon_search_detail_reviews_flow(flow_payload) {
if (gap_ms > 0) {
await sleep_ms(gap_ms);
}
const detail_res = await execute_action_and_record({
action_name: 'amazon_product_detail',
action_payload: { product_url: url },
const asin = pick_asin_from_url(url);
const skip_detail = asin ? await has_detail_fetched_today(asin) : false;
const res = await execute_action_and_record({
action_name: 'amazon_product_detail_reviews',
action_payload: { product_url: url, limit: reviews_limit, skip_detail },
source: 'cron'
});
await persist_detail(detail_res);
const r = unwrap_action_result(res);
const detail_part = r && r.detail ? r.detail : null;
const reviews_part = r && r.reviews ? r.reviews : null;
if (gap_ms > 0) {
await sleep_ms(gap_ms);
if (detail_part) {
await persist_detail(detail_part);
}
if (reviews_part) {
await persist_reviews(reviews_part);
}
const reviews_res = await execute_action_and_record({
action_name: 'amazon_product_reviews',
action_payload: { product_url: url, limit: reviews_limit },
source: 'cron',
keep_browser_open: true
});
await persist_reviews(reviews_res);
}

View File

@@ -1,24 +1,25 @@
import cron from 'node-cron';
import { cron_task_list } from '../config/cron_tasks.js';
import { execute_action_and_record } from './task_executor.js';
import { get_flow_runner } from './flows/flow_registry.js';
const cron_jobs = [];
const running_task_name_set = new Set();
function has_argv_flag(flag_name) {
const name = String(flag_name || '').trim();
if (!name) return false;
return process.argv.includes(name);
}
function should_run_cron_now() {
return has_argv_flag('--run_cron_now');
}
async function run_cron_task(task) {
if (!task || !task.type) {
throw new Error('cron_task 缺少 type');
}
if (task.type === 'action') {
await execute_action_and_record({
action_name: task.action_name,
action_payload: task.action_payload || {},
source: 'cron'
});
return;
}
if (task.type === 'flow') {
const run_flow = get_flow_runner(task.flow_name);
@@ -29,26 +30,41 @@ async function run_cron_task(task) {
throw new Error(`cron_task type 不支持: ${task.type}`);
}
async function run_cron_task_with_guard(task_name, task) {
if (running_task_name_set.has(task_name)) {
// eslint-disable-next-line no-console
console.log('[cron] skip (already running)', { name: task_name });
return;
}
running_task_name_set.add(task_name);
try {
await run_cron_task(task);
} catch (error) {
console.warn('[cron] error', { task_name, error });
} finally {
running_task_name_set.delete(task_name);
}
}
export async function start_all_cron_tasks() {
const run_now = should_run_cron_now();
for (const task of cron_task_list) {
const task_name = task && task.name ? String(task.name) : 'cron_task';
const job = cron.schedule(task.cron_expression, async () => {
if (running_task_name_set.has(task_name)) {
// eslint-disable-next-line no-console
console.log('[cron] skip (already running)', { name: task_name });
return;
}
const task_name = task && task.name ? String(task.name) : 'cron_task';
running_task_name_set.add(task_name);
try {
await run_cron_task(task);
} finally {
running_task_name_set.delete(task_name);
}
});
console.log('job', { task_name, });
if (run_now) {
// eslint-disable-next-line no-console
console.log('[cron] run_now', { task_name });
await run_cron_task_with_guard(task_name, task);
}
else {
const job = cron.schedule(task.cron_expression, async () => {
await run_cron_task_with_guard(task_name, task);
});
console.log('job', { task_name, });
cron_jobs.push(job);
cron_jobs.push(job);
}
}
}