From 37e39d35b8bdb3a1b0306ba5e58b7ae596b5adef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=88=90?= Date: Wed, 18 Mar 2026 15:25:34 +0800 Subject: [PATCH] 1 --- .gitignore | 4 +- server/.env.example | 2 + server/README.md | 40 ++++++++++ server/app.js | 6 +- server/config/cron_tasks.js | 14 ++++ server/models/amazon_product.js | 39 ++++++++++ server/models/amazon_review.js | 31 ++++++++ server/models/amazon_search_item.js | 30 ++++++++ server/models/crawl_run_record.js | 3 +- server/models/index.js | 10 ++- server/models/schedule_task.js | 20 +---- server/routes/schedule_task.js | 74 +------------------ server/services/amazon_persist.js | 110 ++++++++++++++++++++++++++++ server/services/cron_manager.js | 31 +------- server/services/puppeteer_runner.js | 64 +++++++++++++++- server/services/schedule_loader.js | 41 +++++------ server/services/task_executor.js | 16 ++-- 17 files changed, 368 insertions(+), 167 deletions(-) create mode 100644 server/README.md create mode 100644 server/config/cron_tasks.js create mode 100644 server/models/amazon_product.js create mode 100644 server/models/amazon_review.js create mode 100644 server/models/amazon_search_item.js create mode 100644 server/services/amazon_persist.js diff --git a/.gitignore b/.gitignore index 4041396..6876629 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ server/node_modules/* -.env \ No newline at end of file +server/puppeteer_profile/* +.env +chrome-win/* \ No newline at end of file diff --git a/server/.env.example b/server/.env.example index c20f82d..e305bfe 100644 --- a/server/.env.example +++ b/server/.env.example @@ -11,3 +11,5 @@ CRX_SRC_PATH=d:/项目/电商抓取项目/mv2_simple_crx/src SERVER_PORT=38080 ACTION_TIMEOUT_MS=300000 PUPPETEER_HEADLESS=false +# 可选:指定浏览器路径(不填默认用 ../chrome-win/chrome.exe) +CHROME_EXECUTABLE_PATH= diff --git a/server/README.md b/server/README.md new file mode 100644 index 0000000..140243b --- /dev/null +++ b/server/README.md @@ -0,0 +1,40 @@ +## server(Koa + Sequelize + MySQL) + +### 功能 +- `POST /api/crawl/run_action`:服务端调用扩展 action,返回结果,并按 `stage` 自动入库 +- **定时任务写死配置**:`config/cron_tasks.js`(不走数据库) + +### 运行 +1. 安装依赖 + +```bash +cd server +npm install +``` + +2. 配置环境变量 + +```bash +copy .env.example .env +``` + +3. 初始化/更新表结构 + +```bash +node scripts/db_sync.js +``` + +4. 启动 + +```bash +node app.js +``` + +### 定时任务 +编辑 `config/cron_tasks.js` 的 `cron_task_list`,重启服务即可生效。 + +### 落库表 +- `crawl_run_record`:所有 action 调用的原始请求/响应 +- `amazon_product`:`stage=detail` 详情 +- `amazon_search_item`:`stage=list` 列表 item +- `amazon_review`:`stage=reviews` 评论 diff --git a/server/app.js b/server/app.js index b51f309..25b7f80 100644 --- a/server/app.js +++ b/server/app.js @@ -4,8 +4,7 @@ import body_parser from 'koa-bodyparser'; import { sequelize } from './models/index.js'; import { crawl_router } from './routes/crawl.js'; -import { schedule_task_router } from './routes/schedule_task.js'; -import { reload_all_schedules } from './services/schedule_loader.js'; +import { start_all_cron_tasks } from './services/schedule_loader.js'; dotenv.config(); @@ -13,7 +12,6 @@ const app = new Koa(); app.use(body_parser({ jsonLimit: '10mb' })); app.use(crawl_router.routes()).use(crawl_router.allowedMethods()); -app.use(schedule_task_router.routes()).use(schedule_task_router.allowedMethods()); app.use(async (ctx) => { ctx.status = 404; @@ -24,7 +22,7 @@ const port = Number(process.env.SERVER_PORT || 38080); await sequelize.authenticate(); await sequelize.sync(); -await reload_all_schedules(); +start_all_cron_tasks(); app.listen(port); // eslint-disable-next-line no-console diff --git a/server/config/cron_tasks.js b/server/config/cron_tasks.js new file mode 100644 index 0000000..d049bad --- /dev/null +++ b/server/config/cron_tasks.js @@ -0,0 +1,14 @@ +/** + * 写死定时任务配置(不走数据库) + * cron_expression 参考 node-cron + */ + +export const cron_task_list = [ + // 示例:每 6 小时跑一次列表抓取 + // { + // name: 'amazon_search_list_every_6h', + // cron_expression: '0 */6 * * *', + // action_name: 'amazon_search_list', + // action_payload: { keyword: '午餐包', limit: 100 } + // } +]; diff --git a/server/models/amazon_product.js b/server/models/amazon_product.js new file mode 100644 index 0000000..65169ac --- /dev/null +++ b/server/models/amazon_product.js @@ -0,0 +1,39 @@ +import { DataTypes } from 'sequelize'; + +export function define_amazon_product(sequelize) { + return sequelize.define( + 'amazon_product', + { + id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true }, + asin: { type: DataTypes.STRING(32), allowNull: false }, + url: { type: DataTypes.TEXT, allowNull: false }, + title: { type: DataTypes.TEXT, allowNull: true }, + price: { type: DataTypes.STRING(64), allowNull: true }, + sku: { type: DataTypes.STRING(256), allowNull: true }, + sku_color: { type: DataTypes.STRING(128), allowNull: true }, + sku_size: { type: DataTypes.STRING(128), allowNull: true }, + brand_line: { type: DataTypes.TEXT, allowNull: true }, + brand_store_url: { type: DataTypes.TEXT, allowNull: true }, + ac_badge: { type: DataTypes.STRING(128), allowNull: true }, + bestseller_hint: { type: DataTypes.TEXT, allowNull: true }, + delivery_hint: { type: DataTypes.TEXT, allowNull: true }, + social_proof: { type: DataTypes.TEXT, allowNull: true }, + sustainability_hint: { type: DataTypes.TEXT, allowNull: true }, + rating_stars: { type: DataTypes.STRING(64), allowNull: true }, + review_count_text: { type: DataTypes.STRING(64), allowNull: true }, + main_image: { type: DataTypes.TEXT, allowNull: true }, + images_json: { type: DataTypes.TEXT('long'), allowNull: true }, + bullets_json: { type: DataTypes.TEXT('long'), allowNull: true }, + product_info_json: { type: DataTypes.TEXT('long'), allowNull: true }, + detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true } + }, + { + tableName: 'amazon_product', + indexes: [ + { unique: true, fields: ['asin'] }, + { fields: ['created_at'] }, + { fields: ['updated_at'] } + ] + } + ); +} diff --git a/server/models/amazon_review.js b/server/models/amazon_review.js new file mode 100644 index 0000000..aca3a7f --- /dev/null +++ b/server/models/amazon_review.js @@ -0,0 +1,31 @@ +import { DataTypes } from 'sequelize'; + +export function define_amazon_review(sequelize) { + return sequelize.define( + 'amazon_review', + { + id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true }, + asin: { type: DataTypes.STRING(32), allowNull: true }, + url: { type: DataTypes.TEXT, allowNull: false }, + review_id: { type: DataTypes.STRING(64), allowNull: false }, + author: { type: DataTypes.STRING(256), allowNull: true }, + title: { type: DataTypes.TEXT, allowNull: true }, + body: { type: DataTypes.TEXT('long'), allowNull: true }, + rating_text: { type: DataTypes.STRING(64), allowNull: true }, + review_date: { type: DataTypes.STRING(128), allowNull: true }, + review_index: { type: DataTypes.INTEGER, allowNull: true }, + batch_key: { type: DataTypes.STRING(64), allowNull: false }, + batch_total: { type: DataTypes.INTEGER, allowNull: true }, + batch_limit: { type: DataTypes.INTEGER, allowNull: true } + }, + { + tableName: 'amazon_review', + indexes: [ + { unique: true, fields: ['review_id'] }, + { fields: ['asin'] }, + { fields: ['batch_key'] }, + { fields: ['created_at'] } + ] + } + ); +} diff --git a/server/models/amazon_search_item.js b/server/models/amazon_search_item.js new file mode 100644 index 0000000..c515823 --- /dev/null +++ b/server/models/amazon_search_item.js @@ -0,0 +1,30 @@ +import { DataTypes } from 'sequelize'; + +export function define_amazon_search_item(sequelize) { + return sequelize.define( + 'amazon_search_item', + { + id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true }, + asin: { type: DataTypes.STRING(32), allowNull: false }, + url: { type: DataTypes.TEXT, allowNull: false }, + title: { type: DataTypes.TEXT, allowNull: true }, + price: { type: DataTypes.STRING(64), allowNull: true }, + rating: { type: DataTypes.FLOAT, allowNull: true }, + rating_text: { type: DataTypes.STRING(64), allowNull: true }, + review_count: { type: DataTypes.INTEGER, allowNull: true }, + review_count_text: { type: DataTypes.STRING(64), allowNull: true }, + rank_index: { type: DataTypes.INTEGER, allowNull: true, comment: '列表中的 index 字段' }, + batch_key: { type: DataTypes.STRING(64), allowNull: false, comment: '一次列表抓取的批次 key' }, + batch_total: { type: DataTypes.INTEGER, allowNull: true }, + batch_limit: { type: DataTypes.INTEGER, allowNull: true } + }, + { + tableName: 'amazon_search_item', + indexes: [ + { fields: ['asin'] }, + { fields: ['batch_key'] }, + { fields: ['created_at'] } + ] + } + ); +} diff --git a/server/models/crawl_run_record.js b/server/models/crawl_run_record.js index ccbcb22..637c6be 100644 --- a/server/models/crawl_run_record.js +++ b/server/models/crawl_run_record.js @@ -15,8 +15,7 @@ export function define_crawl_run_record(sequelize) { allowNull: false, defaultValue: 'api', comment: 'api | cron' - }, - schedule_task_id: { type: DataTypes.BIGINT.UNSIGNED, allowNull: true } + } }, { tableName: 'crawl_run_record', diff --git a/server/models/index.js b/server/models/index.js index 398a558..f32ac66 100644 --- a/server/models/index.js +++ b/server/models/index.js @@ -1,7 +1,9 @@ import { Sequelize } from 'sequelize'; import { get_sequelize_options } from '../config/database.js'; import { define_crawl_run_record } from './crawl_run_record.js'; -import { define_schedule_task } from './schedule_task.js'; +import { define_amazon_product } from './amazon_product.js'; +import { define_amazon_search_item } from './amazon_search_item.js'; +import { define_amazon_review } from './amazon_review.js'; const sequelize_options = get_sequelize_options(); const { database, username, password, ...rest } = sequelize_options; @@ -9,7 +11,7 @@ const { database, username, password, ...rest } = sequelize_options; export const sequelize = new Sequelize(database, username, password, rest); export const crawl_run_record = define_crawl_run_record(sequelize); -export const schedule_task = define_schedule_task(sequelize); -schedule_task.hasMany(crawl_run_record, { foreignKey: 'schedule_task_id', as: 'records' }); -crawl_run_record.belongsTo(schedule_task, { foreignKey: 'schedule_task_id', as: 'schedule_task' }); +export const amazon_product = define_amazon_product(sequelize); +export const amazon_search_item = define_amazon_search_item(sequelize); +export const amazon_review = define_amazon_review(sequelize); diff --git a/server/models/schedule_task.js b/server/models/schedule_task.js index 67a839e..0266733 100644 --- a/server/models/schedule_task.js +++ b/server/models/schedule_task.js @@ -1,19 +1 @@ -import { DataTypes } from 'sequelize'; - -export function define_schedule_task(sequelize) { - return sequelize.define( - 'schedule_task', - { - id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true }, - name: { type: DataTypes.STRING(128), allowNull: false }, - cron_expression: { type: DataTypes.STRING(64), allowNull: false }, - action_name: { type: DataTypes.STRING(128), allowNull: false }, - payload_json: { type: DataTypes.TEXT, allowNull: true }, - enabled: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: true }, - last_run_at: { type: DataTypes.DATE, allowNull: true } - }, - { - tableName: 'schedule_task' - } - ); -} +// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/routes/schedule_task.js b/server/routes/schedule_task.js index 294b067..0266733 100644 --- a/server/routes/schedule_task.js +++ b/server/routes/schedule_task.js @@ -1,73 +1 @@ -import Router from '@koa/router'; -import { schedule_task } from '../models/index.js'; -import { safe_json_parse, safe_json_stringify } from '../services/json_utils.js'; - -export const schedule_task_router = new Router(); - -schedule_task_router.post('/api/schedule_task/create', async (ctx) => { - const { name, cron_expression, action_name, payload } = ctx.request.body || {}; - if (!name || !cron_expression || !action_name) { - ctx.status = 400; - ctx.body = { ok: false, error: '缺少 name/cron_expression/action_name' }; - return; - } - - const payload_json = payload ? safe_json_stringify(payload) : null; - const row = await schedule_task.create({ - name, - cron_expression, - action_name, - payload_json, - enabled: true - }); - - ctx.body = { ok: true, data: { id: row.id } }; -}); - -schedule_task_router.post('/api/schedule_task/list', async (ctx) => { - const rows = await schedule_task.findAll({ order: [['id', 'desc']] }); - ctx.body = { - ok: true, - data: rows.map((r) => ({ - id: r.id, - name: r.name, - cron_expression: r.cron_expression, - action_name: r.action_name, - payload: safe_json_parse(r.payload_json), - enabled: r.enabled, - last_run_at: r.last_run_at - })) - }; -}); - -schedule_task_router.post('/api/schedule_task/set_enabled', async (ctx) => { - const { id, enabled } = ctx.request.body || {}; - if (!id || typeof enabled !== 'boolean') { - ctx.status = 400; - ctx.body = { ok: false, error: '缺少 id/enabled(boolean)' }; - return; - } - - const row = await schedule_task.findByPk(id); - if (!row) { - ctx.status = 404; - ctx.body = { ok: false, error: '任务不存在' }; - return; - } - - row.enabled = enabled; - await row.save(); - ctx.body = { ok: true }; -}); - -schedule_task_router.post('/api/schedule_task/delete', async (ctx) => { - const { id } = ctx.request.body || {}; - if (!id) { - ctx.status = 400; - ctx.body = { ok: false, error: '缺少 id' }; - return; - } - - await schedule_task.destroy({ where: { id } }); - ctx.body = { ok: true }; -}); +// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/services/amazon_persist.js b/server/services/amazon_persist.js new file mode 100644 index 0000000..c8ae549 --- /dev/null +++ b/server/services/amazon_persist.js @@ -0,0 +1,110 @@ +import { + amazon_product, + amazon_search_item, + amazon_review +} from '../models/index.js'; +import { safe_json_stringify } from './json_utils.js'; + +function build_batch_key(prefix) { + return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`; +} + +function pick_asin_from_url(url) { + if (!url) return null; + const m = String(url).match(/\/dp\/([A-Z0-9]{8,16})/i); + return m && m[1] ? m[1].toUpperCase() : null; +} + +export async function persist_amazon_result(result) { + if (!result || !result.stage) { + return; + } + + if (result.stage === 'detail') { + const asin = result.asin || pick_asin_from_url(result.url); + if (!asin) { + return; + } + + await amazon_product.upsert({ + asin, + url: result.url || '', + title: result.title || null, + price: result.price || null, + sku: result.sku || null, + sku_color: result.sku_color || null, + sku_size: result.sku_size || null, + brand_line: result.brand_line || null, + brand_store_url: result.brand_store_url || null, + ac_badge: result.ac_badge || null, + bestseller_hint: result.bestseller_hint || null, + delivery_hint: result.delivery_hint || null, + social_proof: result.social_proof || null, + sustainability_hint: result.sustainability_hint || null, + rating_stars: result.rating_stars || null, + review_count_text: result.review_count_text || null, + main_image: result.main_image || null, + images_json: safe_json_stringify(result.images || []), + bullets_json: safe_json_stringify(result.bullets || []), + product_info_json: safe_json_stringify(result.product_info || {}), + detail_extra_lines_json: safe_json_stringify(result.detail_extra_lines || []) + }); + + return; + } + + if (result.stage === 'list') { + const batch_key = build_batch_key('list'); + const items = Array.isArray(result.items) ? result.items : []; + + for (const it of items) { + const asin = it.asin || pick_asin_from_url(it.url); + if (!asin || !it.url) continue; + + await amazon_search_item.create({ + asin, + url: it.url, + title: it.title || null, + price: it.price || null, + rating: typeof it.rating === 'number' ? it.rating : null, + rating_text: it.rating_text || null, + review_count: typeof it.review_count === 'number' ? it.review_count : null, + review_count_text: it.review_count_text || null, + rank_index: typeof it.index === 'number' ? it.index : null, + batch_key, + batch_total: typeof result.total === 'number' ? result.total : null, + batch_limit: typeof result.limit === 'number' ? result.limit : null + }); + } + + return; + } + + if (result.stage === 'reviews') { + const batch_key = build_batch_key('reviews'); + const asin = pick_asin_from_url(result.url); + const items = Array.isArray(result.items) ? result.items : []; + + for (const it of items) { + const review_id = it.review_id; + if (!review_id) continue; + + const asin_value = asin || pick_asin_from_url(it.url) || pick_asin_from_url(result.url); + + await amazon_review.upsert({ + asin: asin_value || null, + url: result.url || '', + review_id, + author: it.author || null, + title: it.title || null, + body: it.body || null, + rating_text: it.rating_text || null, + review_date: it.date || null, + review_index: typeof it.index === 'number' ? it.index : null, + batch_key, + batch_total: typeof result.total === 'number' ? result.total : null, + batch_limit: typeof result.limit === 'number' ? result.limit : null + }); + } + } +} diff --git a/server/services/cron_manager.js b/server/services/cron_manager.js index 1cd635c..0266733 100644 --- a/server/services/cron_manager.js +++ b/server/services/cron_manager.js @@ -1,30 +1 @@ -import cron from 'node-cron'; - -const task_id_to_cron_job = new Map(); - -export function stop_all_cron_jobs() { - for (const job of task_id_to_cron_job.values()) { - job.stop(); - } - task_id_to_cron_job.clear(); -} - -export function upsert_cron_job(schedule_task_id, cron_expression, on_tick) { - const existing = task_id_to_cron_job.get(schedule_task_id); - if (existing) { - existing.stop(); - task_id_to_cron_job.delete(schedule_task_id); - } - - const job = cron.schedule(cron_expression, on_tick, { scheduled: true }); - task_id_to_cron_job.set(schedule_task_id, job); -} - -export function remove_cron_job(schedule_task_id) { - const job = task_id_to_cron_job.get(schedule_task_id); - if (!job) { - return; - } - job.stop(); - task_id_to_cron_job.delete(schedule_task_id); -} +// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/services/puppeteer_runner.js b/server/services/puppeteer_runner.js index 3948292..617fad6 100644 --- a/server/services/puppeteer_runner.js +++ b/server/services/puppeteer_runner.js @@ -1,4 +1,5 @@ import dotenv from 'dotenv'; +import fs from 'node:fs'; import path from 'node:path'; import puppeteer from 'puppeteer'; @@ -30,17 +31,64 @@ function get_extension_id_from_targets(targets) { return null; } +async function wait_for_extension_id(browser, timeout_ms) { + const existing = get_extension_id_from_targets(browser.targets()); + if (existing) { + return existing; + } + + const target = await browser + .waitForTarget((t) => { + const url = t.url(); + return typeof url === 'string' && url.startsWith('chrome-extension://'); + }, { timeout: timeout_ms }) + .catch(() => null); + + if (!target) { + return null; + } + + return get_extension_id_from_targets([target]); +} + +function get_chrome_executable_path() { + // 优先环境变量,方便你后续切换版本 + const from_env = process.env.CHROME_EXECUTABLE_PATH; + if (from_env) { + return path.resolve(from_env); + } + + // 默认使用项目根目录的 chrome-win/chrome.exe + // 当前进程 cwd 通常是 server/,所以回到上一级 + return path.resolve(process.cwd(), '../chrome-win/chrome.exe'); +} + export async function get_or_create_browser() { if (browser_singleton) { return browser_singleton; } - const extension_path = path.resolve(get_crx_src_path()); + const chrome_executable_path = get_chrome_executable_path(); + if (!fs.existsSync(chrome_executable_path)) { + throw new Error(`Chrome 不存在: ${chrome_executable_path}`); + } + + const raw_extension_path = path.resolve(get_crx_src_path()); + const manifest_path = path.resolve(raw_extension_path, 'manifest.json'); + if (!fs.existsSync(manifest_path)) { + throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`); + } + + const extension_path = raw_extension_path.replace(/\\/g, '/'); const headless = String(process.env.PUPPETEER_HEADLESS || 'false') === 'true'; + const user_data_dir = path.resolve(process.cwd(), 'puppeteer_profile'); browser_singleton = await puppeteer.launch({ + executablePath: chrome_executable_path, headless, args: [ + `--user-data-dir=${user_data_dir}`, + '--enable-extensions', `--disable-extensions-except=${extension_path}`, `--load-extension=${extension_path}`, '--no-default-browser-check', @@ -58,11 +106,19 @@ export async function invoke_extension_action(action_name, action_payload) { const page = await browser.newPage(); await page.goto('about:blank'); - const targets = await browser.targets(); - const extension_id = get_extension_id_from_targets(targets); + // 尝试先打开 chrome://extensions 触发扩展初始化(某些环境下扩展 target 不会立刻出现) + try { + await page.goto('chrome://extensions/', { waitUntil: 'domcontentloaded' }); + } catch (err) { + // ignore + } + + const extension_id = await wait_for_extension_id(browser, 15000); if (!extension_id) { await page.close(); - throw new Error('未找到扩展 extension_id(请确认 CRX_SRC_PATH 指向 src 且成功加载)'); + throw new Error( + '未找到扩展 extension_id:Chrome 未加载扩展(常见原因:MV2 被禁用/企业策略未生效/CRX_SRC_PATH 不正确/使用了 headless)' + ); } const bridge_url = `chrome-extension://${extension_id}/bridge/bridge.html`; diff --git a/server/services/schedule_loader.js b/server/services/schedule_loader.js index 787d72b..db6e01e 100644 --- a/server/services/schedule_loader.js +++ b/server/services/schedule_loader.js @@ -1,33 +1,30 @@ -import { schedule_task } from '../models/index.js'; -import { safe_json_parse } from './json_utils.js'; +import cron from 'node-cron'; +import { cron_task_list } from '../config/cron_tasks.js'; import { execute_action_and_record } from './task_executor.js'; -import { remove_cron_job, upsert_cron_job } from './cron_manager.js'; -export async function reload_all_schedules() { - const rows = await schedule_task.findAll(); +const cron_jobs = []; - for (const row of rows) { - if (!row.enabled) { - remove_cron_job(row.id); - continue; - } - - upsert_cron_job(row.id, row.cron_expression, async () => { +export function start_all_cron_tasks() { + for (const task of cron_task_list) { + const job = cron.schedule(task.cron_expression, async () => { try { - await schedule_task.update( - { last_run_at: new Date() }, - { where: { id: row.id } } - ); - await execute_action_and_record({ - action_name: row.action_name, - action_payload: safe_json_parse(row.payload_json) || {}, - source: 'cron', - schedule_task_id: row.id + action_name: task.action_name, + action_payload: task.action_payload || {}, + source: 'cron' }); } catch (err) { - // cron 执行失败已在 crawl_run_record 落库,避免重复抛出影响其它任务 + // 失败会在 crawl_run_record 落库 } }); + + cron_jobs.push(job); } } + +export function stop_all_cron_tasks() { + for (const job of cron_jobs) { + job.stop(); + } + cron_jobs.length = 0; +} diff --git a/server/services/task_executor.js b/server/services/task_executor.js index ae615b7..de1abe2 100644 --- a/server/services/task_executor.js +++ b/server/services/task_executor.js @@ -1,14 +1,10 @@ import { crawl_run_record } from '../models/index.js'; import { safe_json_stringify } from './json_utils.js'; import { invoke_extension_action } from './puppeteer_runner.js'; +import { persist_amazon_result } from './amazon_persist.js'; export async function execute_action_and_record(params) { - const { - action_name, - action_payload, - source, - schedule_task_id - } = params; + const { action_name, action_payload, source } = params; const request_payload = safe_json_stringify(action_payload || {}); @@ -18,8 +14,13 @@ export async function execute_action_and_record(params) { try { const result = await invoke_extension_action(action_name, action_payload || {}); + ok = true; result_payload = safe_json_stringify(result); + + // 按 stage 自动入库(不影响原始 run_record 记录) + await persist_amazon_result(result); + return result; } catch (err) { ok = false; @@ -32,8 +33,7 @@ export async function execute_action_and_record(params) { ok, result_payload, error_message, - source, - schedule_task_id: schedule_task_id || null + source }); } }