This commit is contained in:
张成
2026-03-18 16:57:22 +08:00
parent 7b42ee8ef5
commit bc1068ec66
8 changed files with 275 additions and 128 deletions

View File

@@ -0,0 +1,190 @@
import { execute_action_and_record } from '../../task_executor.js';
import { map_limit, sleep_ms } from '../flow_utils.js';
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
import { safe_json_stringify } from '../../json_utils.js';
function build_batch_key(prefix) {
return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`;
}
function pick_asin_from_url(url) {
if (!url) return null;
const m = String(url).match(/\/dp\/([A-Z0-9]{8,16})/i);
return m && m[1] ? m[1].toUpperCase() : null;
}
async function persist_detail(detail_res) {
if (!detail_res || detail_res.stage !== 'detail') {
return;
}
const asin = detail_res.asin || pick_asin_from_url(detail_res.url);
if (!asin) {
return;
}
await amazon_product.upsert({
asin,
url: detail_res.url || '',
title: detail_res.title || null,
price: detail_res.price || null,
sku: detail_res.sku || null,
sku_color: detail_res.sku_color || null,
sku_size: detail_res.sku_size || null,
brand_line: detail_res.brand_line || null,
brand_store_url: detail_res.brand_store_url || null,
ac_badge: detail_res.ac_badge || null,
bestseller_hint: detail_res.bestseller_hint || null,
delivery_hint: detail_res.delivery_hint || null,
social_proof: detail_res.social_proof || null,
sustainability_hint: detail_res.sustainability_hint || null,
rating_stars: detail_res.rating_stars || null,
review_count_text: detail_res.review_count_text || null,
main_image: detail_res.main_image || null,
images_json: safe_json_stringify(detail_res.images || []),
bullets_json: safe_json_stringify(detail_res.bullets || []),
product_info_json: safe_json_stringify(detail_res.product_info || {}),
detail_extra_lines_json: safe_json_stringify(detail_res.detail_extra_lines || [])
});
}
async function persist_list(list_res) {
if (!list_res || list_res.stage !== 'list') {
return;
}
const batch_key = build_batch_key('list');
const items = Array.isArray(list_res.items) ? list_res.items : [];
for (const it of items) {
const asin = it.asin || pick_asin_from_url(it.url);
if (!asin || !it.url) continue;
await amazon_search_item.create({
asin,
url: it.url,
title: it.title || null,
price: it.price || null,
rating: typeof it.rating === 'number' ? it.rating : null,
rating_text: it.rating_text || null,
review_count: typeof it.review_count === 'number' ? it.review_count : null,
review_count_text: it.review_count_text || null,
rank_index: typeof it.index === 'number' ? it.index : null,
batch_key,
batch_total: typeof list_res.total === 'number' ? list_res.total : null,
batch_limit: typeof list_res.limit === 'number' ? list_res.limit : null
});
}
}
async function persist_reviews(reviews_res) {
if (!reviews_res || reviews_res.stage !== 'reviews') {
return;
}
const batch_key = build_batch_key('reviews');
const asin = pick_asin_from_url(reviews_res.url);
const items = Array.isArray(reviews_res.items) ? reviews_res.items : [];
for (const it of items) {
const review_id = it.review_id;
if (!review_id) continue;
const asin_value = asin || pick_asin_from_url(it.url) || pick_asin_from_url(reviews_res.url);
await amazon_review.upsert({
asin: asin_value || null,
url: reviews_res.url || '',
review_id,
author: it.author || null,
title: it.title || null,
body: it.body || null,
rating_text: it.rating_text || null,
review_date: it.date || null,
review_index: typeof it.index === 'number' ? it.index : null,
batch_key,
batch_total: typeof reviews_res.total === 'number' ? reviews_res.total : null,
batch_limit: typeof reviews_res.limit === 'number' ? reviews_res.limit : null
});
}
}
function must_string(v, name) {
if (!v || typeof v !== 'string') {
throw new Error(`flow 参数 ${name} 必须是字符串`);
}
return v;
}
function get_int(v, default_value) {
const n = Number(v);
if (Number.isNaN(n)) return default_value;
return n;
}
export async function run_amazon_search_detail_reviews_flow(flow_payload) {
const keyword = must_string(flow_payload.keyword, 'keyword');
const limit = get_int(flow_payload.limit, 100);
const max_products = get_int(flow_payload.max_products, 30);
const reviews_limit = get_int(flow_payload.reviews_limit, 50);
const concurrency = get_int(flow_payload.concurrency, 1);
const gap_ms = get_int(flow_payload.gap_ms, 0);
const list_res = await execute_action_and_record({
action_name: 'amazon_search_list',
action_payload: { keyword, limit },
source: 'cron'
});
await persist_list(list_res);
if (!list_res || list_res.stage !== 'list') {
throw new Error('amazon_search_list 返回非 list stage');
}
const items = Array.isArray(list_res.items) ? list_res.items : [];
const urls = items
.map((it) => (it && it.url ? String(it.url) : ''))
.filter((u) => u);
const picked_urls = urls.slice(0, Math.max(0, max_products));
await map_limit(picked_urls, concurrency, async (product_url) => {
if (gap_ms > 0) {
await sleep_ms(gap_ms);
}
const detail_res = await execute_action_and_record({
action_name: 'amazon_product_detail',
action_payload: { product_url },
source: 'cron'
});
await persist_detail(detail_res);
if (gap_ms > 0) {
await sleep_ms(gap_ms);
}
const reviews_res = await execute_action_and_record({
action_name: 'amazon_product_reviews',
action_payload: { product_url, limit: reviews_limit },
source: 'cron'
});
await persist_reviews(reviews_res);
return null;
});
return {
stage: 'flow',
flow_name: 'amazon_search_detail_reviews',
keyword,
limit,
max_products,
reviews_limit,
total_urls: urls.length,
picked: picked_urls.length
};
}

View File

@@ -0,0 +1,13 @@
import { run_amazon_search_detail_reviews_flow } from './amazon/amazon_search_detail_reviews_flow.js';
const flow_map = {
amazon_search_detail_reviews: run_amazon_search_detail_reviews_flow
};
export function get_flow_runner(flow_name) {
const fn = flow_map[flow_name];
if (!fn) {
throw new Error(`未知 flow_name: ${flow_name}`);
}
return fn;
}

View File

@@ -0,0 +1,27 @@
export async function sleep_ms(ms) {
await new Promise((resolve) => setTimeout(resolve, ms));
}
export async function map_limit(items, limit, worker) {
const list = Array.isArray(items) ? items : [];
const n = Math.max(1, Number(limit || 1));
const res = new Array(list.length);
let idx = 0;
async function run_one() {
while (idx < list.length) {
const cur = idx;
idx += 1;
res[cur] = await worker(list[cur], cur);
}
}
const runners = [];
for (let i = 0; i < Math.min(n, list.length); i += 1) {
runners.push(run_one());
}
await Promise.all(runners);
return res;
}