Compare commits
3 Commits
7b42ee8ef5
...
aecb7944a8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aecb7944a8 | ||
|
|
18aa083c91 | ||
|
|
bc1068ec66 |
3
1.md
3
1.md
@@ -42,3 +42,6 @@
|
||||
|
||||
|
||||
"C:\Program Files\Google\Chrome\Application\chrome.exe" --disable-features=ExtensionManifestV2Unsupported,ExtensionManifestV2Disabled
|
||||
|
||||
1.浏览器下载地址
|
||||
|
||||
|
||||
@@ -58,7 +58,9 @@ export function get_app_config() {
|
||||
chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe'),
|
||||
log_invoke_action: get_bool('LOG_INVOKE_ACTION', true),
|
||||
auto_close_browser: get_bool('AUTO_CLOSE_BROWSER', true),
|
||||
enable_stealth: get_bool('ENABLE_STEALTH', true)
|
||||
enable_stealth: get_bool('ENABLE_STEALTH', true),
|
||||
log_sql: get_bool('LOG_SQL', false),
|
||||
log_sql_benchmark: get_bool('LOG_SQL_BENCHMARK', false)
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -4,11 +4,22 @@
|
||||
*/
|
||||
|
||||
export const cron_task_list = [
|
||||
// 示例:每 6 小时跑一次列表抓取
|
||||
// 任务流:先跑列表,再依赖列表 URL 跑详情+评论
|
||||
{
|
||||
name: 'amazon_search_list_every_6h',
|
||||
name: 'amazon_search_detail_reviews_every_1h',
|
||||
cron_expression: '0 */1 * * *',
|
||||
action_name: 'amazon_search_list',
|
||||
action_payload: { keyword: '野餐包', limit: 100 }
|
||||
type: 'flow',
|
||||
flow_name: 'amazon_search_detail_reviews',
|
||||
flow_payload: {
|
||||
// 插件参数:category_keyword / sort_by / limit
|
||||
category_keyword: '野餐包',
|
||||
// featured / price_asc / price_desc / review / newest / bestseller
|
||||
sort_by: 'bestseller',
|
||||
limit: 100,
|
||||
|
||||
// flow 自己的参数
|
||||
reviews_limit: 50,
|
||||
gap_ms: 500
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
@@ -10,7 +10,18 @@ export function get_sequelize_options() {
|
||||
password: cfg.mysql.password,
|
||||
database: cfg.mysql.database,
|
||||
dialect: 'mysql',
|
||||
logging: false,
|
||||
benchmark: cfg.crawler.log_sql_benchmark === true,
|
||||
logging: cfg.crawler.log_sql === true
|
||||
? (sql, timing_ms) => {
|
||||
if (cfg.crawler.log_sql_benchmark === true && typeof timing_ms === 'number') {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log('[sql]', { timing_ms, sql });
|
||||
return;
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log('[sql]', sql);
|
||||
}
|
||||
: false,
|
||||
define: {
|
||||
underscored: true,
|
||||
timestamps: true,
|
||||
|
||||
@@ -6,26 +6,24 @@ export function define_amazon_product(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||
sku: { type: DataTypes.STRING(256), allowNull: true },
|
||||
sku_color: { type: DataTypes.STRING(128), allowNull: true },
|
||||
sku_size: { type: DataTypes.STRING(128), allowNull: true },
|
||||
brand_line: { type: DataTypes.TEXT, allowNull: true },
|
||||
brand_store_url: { type: DataTypes.TEXT, allowNull: true },
|
||||
sku_json: { type: DataTypes.JSON, allowNull: true, comment: 'sku 结构化 JSON,如 {color:[], size:[]}' },
|
||||
brand_line: { type: DataTypes.STRING(512), allowNull: true },
|
||||
brand_store_url: { type: DataTypes.STRING(2048), allowNull: true },
|
||||
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
|
||||
bestseller_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
delivery_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
social_proof: { type: DataTypes.TEXT, allowNull: true },
|
||||
sustainability_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
bestseller_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||
delivery_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||
social_proof: { type: DataTypes.STRING(256), allowNull: true },
|
||||
sustainability_hint: { type: DataTypes.STRING(256), allowNull: true },
|
||||
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
|
||||
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
main_image: { type: DataTypes.TEXT, allowNull: true },
|
||||
images_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
bullets_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
product_info_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true }
|
||||
main_image: { type: DataTypes.STRING(2048), allowNull: true },
|
||||
images_json: { type: DataTypes.JSON, allowNull: true },
|
||||
bullets_json: { type: DataTypes.JSON, allowNull: true },
|
||||
product_info_json: { type: DataTypes.JSON, allowNull: true },
|
||||
detail_extra_lines_json: { type: DataTypes.JSON, allowNull: true }
|
||||
},
|
||||
{
|
||||
tableName: 'amazon_product',
|
||||
|
||||
@@ -6,10 +6,10 @@ export function define_amazon_review(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: true },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
review_id: { type: DataTypes.STRING(64), allowNull: false },
|
||||
author: { type: DataTypes.STRING(256), allowNull: true },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
title: { type: DataTypes.STRING(512), allowNull: true },
|
||||
body: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
review_date: { type: DataTypes.STRING(128), allowNull: true },
|
||||
|
||||
@@ -6,8 +6,8 @@ export function define_amazon_search_item(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||
rating: { type: DataTypes.FLOAT, allowNull: true },
|
||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
import {
|
||||
amazon_product,
|
||||
amazon_search_item,
|
||||
amazon_review
|
||||
} from '../models/index.js';
|
||||
import { safe_json_stringify } from './json_utils.js';
|
||||
|
||||
function build_batch_key(prefix) {
|
||||
return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`;
|
||||
}
|
||||
|
||||
function pick_asin_from_url(url) {
|
||||
if (!url) return null;
|
||||
const m = String(url).match(/\/dp\/([A-Z0-9]{8,16})/i);
|
||||
return m && m[1] ? m[1].toUpperCase() : null;
|
||||
}
|
||||
|
||||
export async function persist_amazon_result({ result }) {
|
||||
if (!result || !result.stage) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.stage === 'detail') {
|
||||
const asin = result.asin || pick_asin_from_url(result.url);
|
||||
if (!asin) {
|
||||
return;
|
||||
}
|
||||
|
||||
await amazon_product.upsert({
|
||||
asin,
|
||||
url: result.url || '',
|
||||
title: result.title || null,
|
||||
price: result.price || null,
|
||||
sku: result.sku || null,
|
||||
sku_color: result.sku_color || null,
|
||||
sku_size: result.sku_size || null,
|
||||
brand_line: result.brand_line || null,
|
||||
brand_store_url: result.brand_store_url || null,
|
||||
ac_badge: result.ac_badge || null,
|
||||
bestseller_hint: result.bestseller_hint || null,
|
||||
delivery_hint: result.delivery_hint || null,
|
||||
social_proof: result.social_proof || null,
|
||||
sustainability_hint: result.sustainability_hint || null,
|
||||
rating_stars: result.rating_stars || null,
|
||||
review_count_text: result.review_count_text || null,
|
||||
main_image: result.main_image || null,
|
||||
images_json: safe_json_stringify(result.images || []),
|
||||
bullets_json: safe_json_stringify(result.bullets || []),
|
||||
product_info_json: safe_json_stringify(result.product_info || {}),
|
||||
detail_extra_lines_json: safe_json_stringify(result.detail_extra_lines || [])
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.stage === 'list') {
|
||||
const batch_key = build_batch_key('list');
|
||||
const items = Array.isArray(result.items) ? result.items : [];
|
||||
|
||||
for (const it of items) {
|
||||
const asin = it.asin || pick_asin_from_url(it.url);
|
||||
if (!asin || !it.url) continue;
|
||||
|
||||
await amazon_search_item.create({
|
||||
asin,
|
||||
url: it.url,
|
||||
title: it.title || null,
|
||||
price: it.price || null,
|
||||
rating: typeof it.rating === 'number' ? it.rating : null,
|
||||
rating_text: it.rating_text || null,
|
||||
review_count: typeof it.review_count === 'number' ? it.review_count : null,
|
||||
review_count_text: it.review_count_text || null,
|
||||
rank_index: typeof it.index === 'number' ? it.index : null,
|
||||
batch_key,
|
||||
batch_total: typeof result.total === 'number' ? result.total : null,
|
||||
batch_limit: typeof result.limit === 'number' ? result.limit : null
|
||||
});
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.stage === 'reviews') {
|
||||
const batch_key = build_batch_key('reviews');
|
||||
const asin = pick_asin_from_url(result.url);
|
||||
const items = Array.isArray(result.items) ? result.items : [];
|
||||
|
||||
for (const it of items) {
|
||||
const review_id = it.review_id;
|
||||
if (!review_id) continue;
|
||||
|
||||
const asin_value = asin || pick_asin_from_url(it.url) || pick_asin_from_url(result.url);
|
||||
|
||||
await amazon_review.upsert({
|
||||
asin: asin_value || null,
|
||||
url: result.url || '',
|
||||
review_id,
|
||||
author: it.author || null,
|
||||
title: it.title || null,
|
||||
body: it.body || null,
|
||||
rating_text: it.rating_text || null,
|
||||
review_date: it.date || null,
|
||||
review_index: typeof it.index === 'number' ? it.index : null,
|
||||
batch_key,
|
||||
batch_total: typeof result.total === 'number' ? result.total : null,
|
||||
batch_limit: typeof result.limit === 'number' ? result.limit : null
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
import { execute_action_and_record } from '../../task_executor.js';
|
||||
import { sleep_ms } from '../flow_utils.js';
|
||||
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
|
||||
import { safe_json_stringify } from '../../json_utils.js';
|
||||
import { close_browser } from '../../puppeteer/puppeteer_runner.js';
|
||||
|
||||
function build_batch_key(prefix) {
|
||||
return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`;
|
||||
}
|
||||
|
||||
function pick_asin_from_url(url) {
|
||||
if (!url) return null;
|
||||
const m = String(url).match(/\/dp\/([A-Z0-9]{8,16})/i);
|
||||
return m && m[1] ? m[1].toUpperCase() : null;
|
||||
}
|
||||
|
||||
function unwrap_action_result(res) {
|
||||
// 插件返回通常是 { ..., result: { stage, items... } }
|
||||
if (res && typeof res === 'object' && res.result && typeof res.result === 'object') {
|
||||
return res.result;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
function normalize_sort_by(sort_by) {
|
||||
if (sort_by === undefined || sort_by === null || sort_by === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const s = String(sort_by).trim();
|
||||
if (!s) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const allow = new Set(['featured', 'price_asc', 'price_desc', 'review', 'newest', 'bestseller']);
|
||||
if (!allow.has(s)) {
|
||||
throw new Error(`sort_by 不支持: ${s}`);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
async function persist_detail(detail_res_raw) {
|
||||
const detail_res = unwrap_action_result(detail_res_raw);
|
||||
if (!detail_res || detail_res.stage !== 'detail') {
|
||||
return;
|
||||
}
|
||||
|
||||
const asin = detail_res.asin || pick_asin_from_url(detail_res.url);
|
||||
if (!asin) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sku_is_object = detail_res && detail_res.sku && typeof detail_res.sku === 'object' && !Array.isArray(detail_res.sku);
|
||||
|
||||
await amazon_product.upsert({
|
||||
asin,
|
||||
url: detail_res.url || '',
|
||||
title: detail_res.title || null,
|
||||
price: detail_res.price || null,
|
||||
sku: sku_is_object ? null : (detail_res.sku || null),
|
||||
sku_json: sku_is_object ? detail_res.sku : null,
|
||||
brand_line: detail_res.brand_line || null,
|
||||
brand_store_url: detail_res.brand_store_url || null,
|
||||
ac_badge: detail_res.ac_badge || null,
|
||||
bestseller_hint: detail_res.bestseller_hint || null,
|
||||
delivery_hint: detail_res.delivery_hint || null,
|
||||
social_proof: detail_res.social_proof || null,
|
||||
sustainability_hint: detail_res.sustainability_hint || null,
|
||||
rating_stars: detail_res.rating_stars || null,
|
||||
review_count_text: detail_res.review_count_text || null,
|
||||
main_image: detail_res.main_image || null,
|
||||
images_json: Array.isArray(detail_res.images) ? detail_res.images : null,
|
||||
bullets_json: Array.isArray(detail_res.bullets) ? detail_res.bullets : null,
|
||||
product_info_json: detail_res.product_info && typeof detail_res.product_info === 'object' ? detail_res.product_info : null,
|
||||
detail_extra_lines_json: Array.isArray(detail_res.detail_extra_lines) ? detail_res.detail_extra_lines : null
|
||||
});
|
||||
}
|
||||
|
||||
async function persist_list(list_res_raw) {
|
||||
const list_res = unwrap_action_result(list_res_raw);
|
||||
if (!list_res || list_res.stage !== 'list') {
|
||||
return;
|
||||
}
|
||||
|
||||
const batch_key = build_batch_key('list');
|
||||
const items = Array.isArray(list_res.items) ? list_res.items : [];
|
||||
|
||||
for (const it of items) {
|
||||
const asin = it.asin || pick_asin_from_url(it.url);
|
||||
if (!asin || !it.url) continue;
|
||||
|
||||
await amazon_search_item.create({
|
||||
asin,
|
||||
url: it.url,
|
||||
title: it.title || null,
|
||||
price: it.price || null,
|
||||
rating: typeof it.rating === 'number' ? it.rating : null,
|
||||
rating_text: it.rating_text || null,
|
||||
review_count: typeof it.review_count === 'number' ? it.review_count : null,
|
||||
review_count_text: it.review_count_text || null,
|
||||
rank_index: typeof it.index === 'number' ? it.index : null,
|
||||
batch_key,
|
||||
batch_total: typeof list_res.total === 'number' ? list_res.total : null,
|
||||
batch_limit: typeof list_res.limit === 'number' ? list_res.limit : null
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function persist_reviews(reviews_res_raw) {
|
||||
const reviews_res = unwrap_action_result(reviews_res_raw);
|
||||
if (!reviews_res || reviews_res.stage !== 'reviews') {
|
||||
return;
|
||||
}
|
||||
|
||||
const batch_key = build_batch_key('reviews');
|
||||
const asin = pick_asin_from_url(reviews_res.url);
|
||||
const items = Array.isArray(reviews_res.items) ? reviews_res.items : [];
|
||||
|
||||
for (const it of items) {
|
||||
const review_id = it.review_id;
|
||||
if (!review_id) continue;
|
||||
|
||||
const asin_value = asin || pick_asin_from_url(it.url) || pick_asin_from_url(reviews_res.url);
|
||||
|
||||
await amazon_review.upsert({
|
||||
asin: asin_value || null,
|
||||
url: reviews_res.url || '',
|
||||
review_id,
|
||||
author: it.author || null,
|
||||
title: it.title || null,
|
||||
body: it.body || null,
|
||||
rating_text: it.rating_text || null,
|
||||
review_date: it.date || null,
|
||||
review_index: typeof it.index === 'number' ? it.index : null,
|
||||
batch_key,
|
||||
batch_total: typeof reviews_res.total === 'number' ? reviews_res.total : null,
|
||||
batch_limit: typeof reviews_res.limit === 'number' ? reviews_res.limit : null
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function must_string(v, name) {
|
||||
if (typeof v !== 'string' || !v.trim()) {
|
||||
throw new Error(`flow 参数 ${name} 必须是字符串`);
|
||||
}
|
||||
return v.trim();
|
||||
}
|
||||
|
||||
function get_int(v, default_value) {
|
||||
const n = Number(v);
|
||||
if (Number.isNaN(n)) return default_value;
|
||||
return n;
|
||||
}
|
||||
|
||||
export async function run_amazon_search_detail_reviews_flow(flow_payload) {
|
||||
const category_keyword = must_string(flow_payload.category_keyword, 'category_keyword');
|
||||
const sort_by = normalize_sort_by(flow_payload.sort_by);
|
||||
const limit = get_int(flow_payload.limit, 100);
|
||||
const reviews_limit = get_int(flow_payload.reviews_limit, 50);
|
||||
const gap_ms = get_int(flow_payload.gap_ms, 0);
|
||||
|
||||
|
||||
|
||||
const list_payload = { category_keyword, limit };
|
||||
if (sort_by) {
|
||||
list_payload.sort_by = sort_by;
|
||||
}
|
||||
|
||||
const list_res = await execute_action_and_record({
|
||||
action_name: 'amazon_search_list',
|
||||
action_payload: list_payload,
|
||||
source: 'cron'
|
||||
});
|
||||
|
||||
await persist_list(list_res);
|
||||
|
||||
const list_result = unwrap_action_result(list_res);
|
||||
if (!list_result || list_result.stage !== 'list') {
|
||||
throw new Error('amazon_search_list 返回非 list stage');
|
||||
}
|
||||
|
||||
const items = Array.isArray(list_result.items) ? list_result.items : [];
|
||||
const urls = items
|
||||
.map((it) => (it && it.url ? String(it.url) : ''))
|
||||
.filter((u) => u);
|
||||
|
||||
const picked_urls = urls;
|
||||
|
||||
|
||||
for (const url of picked_urls) {
|
||||
if (gap_ms > 0) {
|
||||
await sleep_ms(gap_ms);
|
||||
}
|
||||
const detail_res = await execute_action_and_record({
|
||||
action_name: 'amazon_product_detail',
|
||||
action_payload: { product_url: url },
|
||||
source: 'cron'
|
||||
});
|
||||
|
||||
await persist_detail(detail_res);
|
||||
|
||||
if (gap_ms > 0) {
|
||||
await sleep_ms(gap_ms);
|
||||
}
|
||||
|
||||
const reviews_res = await execute_action_and_record({
|
||||
action_name: 'amazon_product_reviews',
|
||||
action_payload: { product_url: url, limit: reviews_limit },
|
||||
source: 'cron',
|
||||
keep_browser_open: true
|
||||
});
|
||||
|
||||
await persist_reviews(reviews_res);
|
||||
}
|
||||
|
||||
|
||||
const summary = {
|
||||
stage: 'flow',
|
||||
flow_name: 'amazon_search_detail_reviews',
|
||||
category_keyword,
|
||||
sort_by: sort_by || 'featured',
|
||||
limit,
|
||||
reviews_limit,
|
||||
total_urls: urls.length,
|
||||
picked: picked_urls.length
|
||||
};
|
||||
|
||||
await close_browser();
|
||||
|
||||
return summary;
|
||||
}
|
||||
13
server/services/flows/flow_registry.js
Normal file
13
server/services/flows/flow_registry.js
Normal file
@@ -0,0 +1,13 @@
|
||||
import { run_amazon_search_detail_reviews_flow } from './amazon/amazon_search_detail_reviews_flow.js';
|
||||
|
||||
const flow_map = {
|
||||
amazon_search_detail_reviews: run_amazon_search_detail_reviews_flow
|
||||
};
|
||||
|
||||
export function get_flow_runner(flow_name) {
|
||||
const fn = flow_map[flow_name];
|
||||
if (!fn) {
|
||||
throw new Error(`未知 flow_name: ${flow_name}`);
|
||||
}
|
||||
return fn;
|
||||
}
|
||||
3
server/services/flows/flow_utils.js
Normal file
3
server/services/flows/flow_utils.js
Normal file
@@ -0,0 +1,3 @@
|
||||
export async function sleep_ms(ms) {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -6,6 +6,18 @@ import { apply_page_stealth_defaults, get_stealth_puppeteer } from './puppeteer_
|
||||
|
||||
let browser_singleton = null;
|
||||
|
||||
export async function close_browser() {
|
||||
if (!browser_singleton) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await browser_singleton.close();
|
||||
} catch (err) {
|
||||
// ignore
|
||||
}
|
||||
browser_singleton = null;
|
||||
}
|
||||
|
||||
function get_action_timeout_ms() {
|
||||
const cfg = get_app_config();
|
||||
return cfg.crawler.action_timeout_ms;
|
||||
@@ -94,7 +106,7 @@ export async function get_or_create_browser() {
|
||||
return browser_singleton;
|
||||
}
|
||||
|
||||
export async function invoke_extension_action(action_name, action_payload) {
|
||||
export async function invoke_extension_action(action_name, action_payload, options) {
|
||||
const cfg = get_app_config();
|
||||
const browser = await get_or_create_browser();
|
||||
|
||||
@@ -189,13 +201,9 @@ export async function invoke_extension_action(action_name, action_payload) {
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg.crawler.auto_close_browser) {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch (err) {
|
||||
// ignore
|
||||
}
|
||||
browser_singleton = null;
|
||||
const keep_browser_open = options && options.keep_browser_open === true;
|
||||
if (cfg.crawler.auto_close_browser && !keep_browser_open) {
|
||||
await close_browser();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,39 @@
|
||||
import cron from 'node-cron';
|
||||
import { cron_task_list } from '../config/cron_tasks.js';
|
||||
import { execute_action_and_record } from './task_executor.js';
|
||||
import { get_flow_runner } from './flows/flow_registry.js';
|
||||
|
||||
const cron_jobs = [];
|
||||
|
||||
async function run_cron_task(task) {
|
||||
if (!task || !task.type) {
|
||||
throw new Error('cron_task 缺少 type');
|
||||
}
|
||||
|
||||
if (task.type === 'action') {
|
||||
await execute_action_and_record({
|
||||
action_name: task.action_name,
|
||||
action_payload: task.action_payload || {},
|
||||
source: 'cron'
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (task.type === 'flow') {
|
||||
const run_flow = get_flow_runner(task.flow_name);
|
||||
await run_flow(task.flow_payload || {});
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error(`cron_task type 不支持: ${task.type}`);
|
||||
}
|
||||
|
||||
export async function start_all_cron_tasks() {
|
||||
for (const task of cron_task_list) {
|
||||
// const job = cron.schedule(task.cron_expression, async () => {
|
||||
try {
|
||||
await execute_action_and_record({
|
||||
action_name: task.action_name,
|
||||
action_payload: task.action_payload || {},
|
||||
source: 'cron'
|
||||
});
|
||||
} catch (err) {
|
||||
// 失败会在 crawl_run_record 落库
|
||||
}
|
||||
|
||||
await run_cron_task(task);
|
||||
|
||||
// });
|
||||
|
||||
// cron_jobs.push(job);
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import { crawl_run_record } from '../models/index.js';
|
||||
import { safe_json_stringify } from './json_utils.js';
|
||||
import { invoke_extension_action } from './puppeteer/puppeteer_runner.js';
|
||||
import { persist_amazon_result } from './amazon_persist.js';
|
||||
|
||||
export async function execute_action_and_record(params) {
|
||||
const { action_name, action_payload, source } = params;
|
||||
const { action_name, action_payload, source, keep_browser_open } = params;
|
||||
|
||||
const request_payload = safe_json_stringify(action_payload || {});
|
||||
|
||||
@@ -13,15 +12,14 @@ export async function execute_action_and_record(params) {
|
||||
let error_message = null;
|
||||
|
||||
try {
|
||||
const result = await invoke_extension_action(action_name, action_payload || {});
|
||||
const result_obj = await invoke_extension_action(action_name, action_payload || {}, {
|
||||
keep_browser_open: keep_browser_open === true
|
||||
});
|
||||
|
||||
ok = true;
|
||||
result_payload = safe_json_stringify(result);
|
||||
result_payload = safe_json_stringify(result_obj);
|
||||
|
||||
// 按 stage 自动入库(不影响原始 run_record 记录)
|
||||
await persist_amazon_result(result);
|
||||
|
||||
return result;
|
||||
return result_obj.result;
|
||||
} catch (err) {
|
||||
ok = false;
|
||||
error_message = (err && err.message) || String(err);
|
||||
|
||||
Reference in New Issue
Block a user