This commit is contained in:
张成
2026-03-18 10:21:04 +08:00
parent bc7c2c81ba
commit 52862249a9
5 changed files with 528 additions and 198 deletions

View File

@@ -1,20 +1,395 @@
// amazon_top_listAmazon TOP 榜单抓取Best Sellers / New Releases / Movers & Shakers // Amazon搜索列表 + 商品详情 + 评论(注入函数与 action 同文件,便于维护
import { create_tab_task } from '../libs/tabs.js'; import { create_tab_task } from '../libs/tabs.js';
import { injected_amazon_search_list } from '../injected/amazon_search_list.js';
// ---------- 页面注入(仅依赖页面 DOM勿引用本文件其它符号 ----------
export function injected_amazon_homepage_search(params) {
const keyword = params && params.keyword ? String(params.keyword).trim() : '';
if (!keyword) return { ok: false, error: 'empty_keyword' };
const input =
document.querySelector('#twotabsearchtextbox') ||
document.querySelector('input#nav-search-keywords') ||
document.querySelector('input[name="field-keywords"]');
if (!input) return { ok: false, error: 'no_search_input' };
input.focus();
input.value = keyword;
input.dispatchEvent(new Event('input', { bubbles: true }));
input.dispatchEvent(new Event('change', { bubbles: true }));
const btn =
document.querySelector('#nav-search-submit-button') ||
document.querySelector('#nav-search-bar-form input[type="submit"]') ||
document.querySelector('form[role="search"] input[type="submit"]');
if (btn) {
btn.click();
return { ok: true };
}
const form = input.closest('form');
if (form) {
form.submit();
return { ok: true };
}
return { ok: false, error: 'no_submit' };
}
export function injected_amazon_search_list(params) {
const start_url = params && params.url ? String(params.url) : location.href;
const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : '';
const sort_by = params && params.sort_by ? String(params.sort_by).trim() : '';
function pick_number(text) {
if (!text) return null;
const s = String(text).replace(/[(),]/g, ' ').replace(/\s+/g, ' ').trim();
const m = s.match(/(\d+(?:\.\d+)?)/);
return m ? Number(m[1]) : null;
}
function pick_int(text) {
if (!text) return null;
const raw = String(text).replace(/\s+/g, ' ').trim();
const u = raw.toUpperCase().replace(/,/g, '');
const km = u.match(/([\d.]+)\s*K\b/);
if (km) return Math.round(parseFloat(km[1]) * 1000);
const mm = u.match(/([\d.]+)\s*M\b/);
if (mm) return Math.round(parseFloat(mm[1]) * 1000000);
const digits = raw.replace(/[^\d]/g, '');
return digits ? Number(digits) : null;
}
function abs_url(href) {
try {
return new URL(href, location.origin).toString();
} catch (_) {
return href;
}
}
function parse_asin_from_url(url) {
if (!url || typeof url !== 'string') return null;
const m = url.match(/\/dp\/([A-Z0-9]{10})/i) || url.match(/\/gp\/product\/([A-Z0-9]{10})/i);
return m ? m[1].toUpperCase() : null;
}
function extract_results() {
const items = [];
const nodes = document.querySelectorAll('div.s-main-slot div[data-component-type="s-search-result"]');
nodes.forEach((el, idx) => {
const asin = (el.getAttribute('data-asin') || '').trim() || null;
const title_el = el.querySelector('h2 span') || el.querySelector('h2');
const title = title_el ? title_el.textContent.trim() : null;
const a = el.querySelector('a[href*="/dp/"], a[href*="/gp/product/"]');
const href = a ? a.getAttribute('href') : null;
const item_url = href ? abs_url(href) : null;
const price_el = el.querySelector('span.a-price > span.a-offscreen');
const price = price_el ? price_el.textContent.trim() : null;
const reviews_block = el.querySelector('div[data-cy="reviews-block"]') || el;
const rating_text = (() => {
const t1 = reviews_block.querySelector('span.a-icon-alt');
if (t1 && t1.textContent) return t1.textContent.trim();
const t2 = reviews_block.querySelector('span.a-size-small.a-color-base[aria-hidden="true"]');
if (t2 && t2.textContent) return t2.textContent.trim();
return null;
})();
const rating = (() => {
const n = pick_number(rating_text);
return Number.isFinite(n) ? n : null;
})();
const review_count_text = (() => {
const a1 = reviews_block.querySelector('a[href*="#customerReviews"]');
if (a1 && a1.textContent) return a1.textContent.trim();
const a2 = reviews_block.querySelector(
'a[aria-label*="rating"], a[aria-label*="ratings"], a[aria-label*="评级"], a[aria-label*="评价"]',
);
if (a2 && a2.getAttribute('aria-label')) return a2.getAttribute('aria-label').trim();
const s1 = reviews_block.querySelector('span.a-size-mini.puis-normal-weight-text');
if (s1 && s1.textContent) return s1.textContent.trim();
return null;
})();
const review_count = (() => {
const n = pick_int(review_count_text);
return Number.isFinite(n) ? n : null;
})();
items.push({
index: idx + 1,
asin: asin || parse_asin_from_url(item_url),
title,
url: item_url,
price,
rating,
rating_text,
review_count,
review_count_text,
});
});
return items;
}
function pick_next_url() {
const a = document.querySelector('a.s-pagination-next');
if (!a) return null;
const aria_disabled = (a.getAttribute('aria-disabled') || '').trim().toLowerCase();
if (aria_disabled === 'true') return null;
if (a.classList && a.classList.contains('s-pagination-disabled')) return null;
const href = a.getAttribute('href');
if (!href) return null;
return abs_url(href);
}
const items = extract_results();
return {
start_url,
href: location.href,
category_keyword,
sort_by,
total: items.length,
items,
next_url: pick_next_url(),
};
}
export function injected_amazon_product_detail() {
const norm = (s) => (s || '').replace(/\s+/g, ' ').trim();
const asin_match = location.pathname.match(/\/(?:dp|gp\/product)\/([A-Z0-9]{10})/i);
const asin = asin_match ? asin_match[1].toUpperCase() : null;
const product_info = {};
function set_info(k, v, max_len) {
k = norm(k);
v = norm(v);
max_len = max_len || 600;
if (!k || !v || k.length > 100) return;
if (v.length > max_len) v = v.slice(0, max_len);
if (!product_info[k] || v.length > product_info[k].length) product_info[k] = v;
}
const table_roots =
'#productOverview_feature_div tr, #poExpander table tr, #productDetails_detailBullets_sections1 tr, ' +
'#productDetails_techSpec_section_1 tr, table.prodDetTable tr, #productFactsDesktopExpander tr, ' +
'#technicalSpecifications_feature_div table tr, #productDetails_db_sections tr';
document.querySelectorAll(table_roots).forEach((tr) => {
const tds = tr.querySelectorAll('td');
const th = tr.querySelector('th');
const td = tr.querySelector('td');
if (tds.length >= 2) set_info(tds[0].innerText, tds[1].innerText);
else if (th && td && th !== td) set_info(th.innerText, td.innerText);
});
const detail_extra_lines = [];
document.querySelectorAll('#detailBullets_feature_div li, #rpi-attribute-values_feature_div li').forEach((li) => {
const t = li.innerText.replace(/\u200f|\u200e/g, ' ').replace(/\s+/g, ' ').trim();
const m = t.match(/^(.{1,80}?)\s*[:]\s*(.+)$/);
if (m) set_info(m[1], m[2], 1200);
else if (t.length > 8 && t.length < 800) detail_extra_lines.push(t);
});
const title_el = document.querySelector('#productTitle');
const title = title_el ? norm(title_el.textContent) : null;
const price_el =
document.querySelector('#corePrice_feature_div .a-price .a-offscreen') ||
document.querySelector('#tp_price_block_total_price_ww .a-offscreen') ||
document.querySelector('#price .a-offscreen') ||
document.querySelector('.reinventPricePriceToPayMargin .a-offscreen') ||
document.querySelector('.a-price .a-offscreen');
const price = price_el ? price_el.textContent.trim() : null;
const brand_el = document.querySelector('#bylineInfo');
const brand_line = brand_el ? norm(brand_el.textContent) : null;
const brand_store_url = document.querySelector('#bylineInfo a[href]')?.href || null;
const rating_stars = document.querySelector('#acrPopover')?.getAttribute('title') ||
document.querySelector('#averageCustomerReviews .a-icon-alt')?.textContent?.trim() || null;
const review_count_text = document.querySelector('#acrCustomerReviewText')?.textContent?.trim() || null;
const ac_badge = norm(document.querySelector('#acBadge_feature_div')?.innerText) || null;
const social_proof = norm(document.querySelector('#socialProofingAsinFaceout_feature_div')?.innerText) || null;
const bestseller_hint = norm(document.querySelector('#zeitgeistBadge_feature_div')?.innerText)?.slice(0, 200) || null;
let sustainability_hint = null;
document.querySelectorAll('button, span.a-button-text, a').forEach((el) => {
const tx = norm(el.innerText);
if (!tx || tx.length > 90) return;
if (
/\d+\s*个.*可持续发展|可持续发展特性/.test(tx) ||
/\d+\s+sustainability features?/i.test(tx)
) {
sustainability_hint = tx;
}
});
const bullets = [];
document.querySelectorAll('#feature-bullets ul li span.a-list-item').forEach((el) => {
const t = norm(el.textContent);
if (t) bullets.push(t);
});
const variants = {};
document.querySelectorAll('[id^="variation_"]').forEach((block) => {
const key = block.id.replace(/^variation_/, '') || block.id;
const sel =
block.querySelector('.selection') ||
block.querySelector('.a-button-selected .a-button-text') ||
block.querySelector('[class*="dropdown"]');
if (sel) {
const v = norm(sel.textContent);
if (v) variants[key] = v;
}
});
let delivery_hint = null;
const del = document.querySelector(
'#deliveryBlockMessage, #mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE',
);
if (del) delivery_hint = norm(del.innerText).slice(0, 500);
let sku = null;
Object.keys(product_info).forEach((k) => {
if (/^sku$/i.test(k) || /item model|型号|part number|制造商型号/i.test(k)) sku = product_info[k];
});
const images = [];
const seen_img = new Set();
function add_img(u) {
if (!u || seen_img.has(u)) return;
if (!/media-amazon|images-amazon|ssl-images/i.test(u)) return;
seen_img.add(u);
images.push(u.split('?')[0]);
}
const land = document.querySelector('#landingImage, #imgBlkFront');
if (land) {
const dyn = land.getAttribute('data-a-dynamic-image');
if (dyn) {
try {
const o = JSON.parse(dyn);
Object.keys(o).forEach(add_img);
} catch (_) {}
}
if (land.src) add_img(land.src);
}
document.querySelectorAll('#altImages img, #imageBlock_feature_div img, #ivImages img').forEach((img) => {
add_img(img.src || img.getAttribute('data-src'));
});
const main_image = images.length ? images[0] : land?.src || null;
return {
stage: 'detail',
asin,
title,
price,
brand_line,
brand_store_url,
rating_stars,
review_count_text,
ac_badge,
social_proof,
bestseller_hint,
sustainability_hint,
product_info,
detail_extra_lines,
bullets,
variants,
delivery_hint,
sku,
images,
main_image,
url: location.href,
};
}
export function injected_amazon_product_reviews(params) {
const raw = params && params.limit != null ? Number(params.limit) : 50;
const limit = Number.isFinite(raw) ? Math.max(1, Math.min(100, Math.floor(raw))) : 50;
const nodes = document.querySelectorAll('[data-hook="review"]');
const items = [];
nodes.forEach((r) => {
if (items.length >= limit) return;
const author_el = r.querySelector('.a-profile-name');
const author = author_el ? author_el.textContent.trim() : null;
const title_el = r.querySelector('[data-hook="review-title"]');
const title = title_el ? title_el.innerText.replace(/\s+/g, ' ').trim() : null;
const body_el = r.querySelector('[data-hook="review-body"]');
const body = body_el ? body_el.innerText.replace(/\s+/g, ' ').trim() : null;
const rating_el = r.querySelector('[data-hook="review-star-rating"]');
const rating_text = rating_el ? rating_el.textContent.trim() : null;
const date_el = r.querySelector('[data-hook="review-date"]');
const date = date_el ? date_el.textContent.trim() : null;
const cr = r.querySelector('[id^="customer_review-"]');
const review_id = r.id || (cr && cr.id ? cr.id.replace('customer_review-', '') : null);
items.push({ index: items.length + 1, review_id, author, rating_text, title, date, body });
});
return { stage: 'reviews', limit, total: items.length, items, url: location.href };
}
// ---------- 后台:搜索列表 ----------
const AMAZON_ZH_HOME_URL = 'https://www.amazon.com/-/zh/ref=nav_logo';
/** 英文搜索列表 URL 模板(与 language=en_US 一致,仅替换 k */
function build_amazon_search_url_en(keyword) {
const u = new URL('https://www.amazon.com/s');
u.searchParams.set('k', keyword);
u.searchParams.set('language', 'en_US');
u.searchParams.set('crid', '35M31MY4FQI');
u.searchParams.set('sprefix', ',aps,398');
u.searchParams.set('ref', 'nb_sb_ss_recent_1_0_recent');
return u.toString();
}
function is_amazon_search_list_url(tab_url) {
if (!tab_url || typeof tab_url !== 'string') return false;
if (!tab_url.includes('amazon.com')) return false;
if (!/\/s(\?|\/)/.test(tab_url)) return false;
return tab_url.includes('k=') || tab_url.includes('keywords=') || tab_url.includes('field-keywords');
}
function wait_until_search_list_url(tab_id, timeout_ms) {
const deadline = Date.now() + (timeout_ms || 45000);
return new Promise((resolve, reject) => {
const tick = () => {
chrome.tabs.get(tab_id, (tab) => {
if (chrome.runtime.lastError) return reject(new Error(chrome.runtime.lastError.message));
const u = tab && tab.url ? tab.url : '';
if (is_amazon_search_list_url(u)) return resolve(u);
if (Date.now() >= deadline) return reject(new Error('等待首页搜索跳转到列表页超时'));
setTimeout(tick, 400);
});
};
tick();
});
}
function wait_tab_complete(tab_id) {
return new Promise((resolve_wait, reject_wait) => {
chrome.tabs.get(tab_id, (tab0) => {
if (!chrome.runtime.lastError && tab0 && tab0.status === 'complete') {
return resolve_wait(tab0);
}
const on_updated = (updated_tab_id, change_info) => {
if (updated_tab_id !== tab_id) return;
if (change_info.status !== 'complete') return;
chrome.tabs.onUpdated.removeListener(on_updated);
resolve_wait(true);
};
chrome.tabs.onUpdated.addListener(on_updated);
setTimeout(() => {
chrome.tabs.onUpdated.removeListener(on_updated);
reject_wait(new Error('等待页面加载超时'));
}, 45000);
});
});
}
export function amazon_search_list(data, sendResponse) { export function amazon_search_list(data, sendResponse) {
return new Promise(async (resolve, reject) => { return new Promise(async (resolve, reject) => {
const category_keyword = (data && data.category_keyword) ? String(data.category_keyword).trim() : ''; const category_keyword = data && data.category_keyword ? String(data.category_keyword).trim() : '';
const sort_by = (data && data.sort_by) ? String(data.sort_by).trim() : ''; const sort_by = data && data.sort_by ? String(data.sort_by).trim() : '';
const limit = (() => { const limit = (() => {
const n = data && Object.prototype.hasOwnProperty.call(data, 'limit') ? Number(data.limit) : 100; const n = data && Object.prototype.hasOwnProperty.call(data, 'limit') ? Number(data.limit) : 100;
if (!Number.isFinite(n)) return 100; if (!Number.isFinite(n)) return 100;
return Math.max(1, Math.min(200, Math.floor(n))); return Math.max(1, Math.min(200, Math.floor(n)));
})(); })();
const keyword = category_keyword || 'picnic bag';
const keyword = category_keyword || '野餐包'; const search_url_custom = data && data.search_url ? String(data.search_url).trim() : '';
const entry = data && data.entry ? String(data.entry).trim() : 'direct';
const sort_map = { const sort_map = {
featured: 'relevanceblender', featured: 'relevanceblender',
review: 'review-rank', review: 'review-rank',
@@ -24,57 +399,43 @@ export function amazon_search_list(data, sendResponse) {
bestseller: 'exact-aware-popularity-rank', bestseller: 'exact-aware-popularity-rank',
}; };
const sort_s = Object.prototype.hasOwnProperty.call(sort_map, sort_by) ? sort_map[sort_by] : ''; const sort_s = Object.prototype.hasOwnProperty.call(sort_map, sort_by) ? sort_map[sort_by] : '';
// 内置 URL只替换 k / s 参数,其它参数保持一致
const default_url = (() => {
const u = new URL('https://www.amazon.com/s');
u.searchParams.set('k', keyword);
u.searchParams.set('__mk_zh_CN', '亚马逊网站');
u.searchParams.set('crid', 'ZKNCI4U8BBAP');
u.searchParams.set('ref', 'nb_sb_noss');
if (sort_s) u.searchParams.set('s', sort_s);
else u.searchParams.delete('s');
return u.toString();
})();
const url = default_url;
let times = 0;
const send_action = (action, payload) => { const send_action = (action, payload) => {
if (typeof sendResponse === 'function') { if (typeof sendResponse === 'function') {
sendResponse({ action, data: payload }); sendResponse({ action, data: payload });
sendResponse.log && sendResponse.log(payload); sendResponse.log && sendResponse.log(payload);
} }
}; };
const tab_task = create_tab_task(AMAZON_ZH_HOME_URL)
const tab_task = create_tab_task(url)
.set_latest(false) .set_latest(false)
.set_bounds({ top: 20, left: 20, width: 1440, height: 900 }) .set_bounds({ top: 20, left: 20, width: 1440, height: 900 })
.set_target('__amazon_search_list'); .set_target('__amazon_search_list');
let url = AMAZON_ZH_HOME_URL;
try { try {
const tab = await tab_task.open_async(); const tab = await tab_task.open_async();
await wait_tab_complete(tab.id);
const wait_tab_complete = (tab_id) => new Promise((resolve_wait, reject_wait) => { const home_ret = await tab.execute_script(injected_amazon_homepage_search, [{ keyword }], 'document_idle');
const on_updated = (updated_tab_id, change_info, updated_tab) => { const home_ok = Array.isArray(home_ret) ? home_ret[0] : home_ret;
if (updated_tab_id !== tab_id) return; if (!home_ok || !home_ok.ok) {
if (change_info.status !== 'complete') return; throw new Error((home_ok && home_ok.error) || '首页搜索提交失败');
chrome.tabs.onUpdated.removeListener(on_updated); }
resolve_wait(updated_tab); url = await wait_until_search_list_url(tab.id, 45000);
}; await wait_tab_complete(tab.id);
chrome.tabs.onUpdated.addListener(on_updated); if (sort_s) {
setTimeout(() => { const u = new URL(url);
chrome.tabs.onUpdated.removeListener(on_updated); u.searchParams.set('s', sort_s);
reject_wait(new Error('等待页面加载超时')); url = u.toString();
}, 45000); await new Promise((resolve_nav, reject_nav) => {
chrome.tabs.update(tab.id, { url, active: true }, () => {
if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message));
resolve_nav(true);
}); });
});
await wait_tab_complete(tab.id);
}
const unique_map = new Map(); const unique_map = new Map();
let next_url = url; let next_url = url;
let page = 1; for (let page = 1; page <= 10 && unique_map.size < limit; page += 1) {
if (page > 1) {
while (next_url && unique_map.size < limit) {
await new Promise((resolve_nav, reject_nav) => { await new Promise((resolve_nav, reject_nav) => {
chrome.tabs.update(tab.id, { url: next_url, active: true }, () => { chrome.tabs.update(tab.id, { url: next_url, active: true }, () => {
if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message)); if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message));
@@ -82,7 +443,7 @@ export function amazon_search_list(data, sendResponse) {
}); });
}); });
await wait_tab_complete(tab.id); await wait_tab_complete(tab.id);
}
const injected_result_list = await tab.execute_script( const injected_result_list = await tab.execute_script(
injected_amazon_search_list, injected_amazon_search_list,
[{ url: next_url, category_keyword, sort_by }], [{ url: next_url, category_keyword, sort_by }],
@@ -90,67 +451,141 @@ export function amazon_search_list(data, sendResponse) {
); );
const injected_result = Array.isArray(injected_result_list) ? injected_result_list[0] : null; const injected_result = Array.isArray(injected_result_list) ? injected_result_list[0] : null;
const items = injected_result && Array.isArray(injected_result.items) ? injected_result.items : []; const items = injected_result && Array.isArray(injected_result.items) ? injected_result.items : [];
items.forEach((it) => { items.forEach((it) => {
const k = (it && (it.asin || it.url)) ? String(it.asin || it.url) : null; const k = it && (it.asin || it.url) ? String(it.asin || it.url) : null;
if (!k) return; if (!k) return;
if (!unique_map.has(k)) unique_map.set(k, it); if (!unique_map.has(k)) unique_map.set(k, it);
}); });
if (unique_map.size >= limit) break;
next_url = injected_result && injected_result.next_url ? String(injected_result.next_url) : null; next_url = injected_result && injected_result.next_url ? String(injected_result.next_url) : null;
page += 1; if (!next_url) break;
if (page > 10) break; // 防止死循环(默认 100 条一般 <= 3 页)
} }
const list_result = {
const injected_result = {
stage: 'list', stage: 'list',
limit, limit,
total: unique_map.size, total: unique_map.size,
items: Array.from(unique_map.values()).slice(0, limit), items: Array.from(unique_map.values()).slice(0, limit),
}; };
const result = { const result = {
code: 0, code: 0,
status: true, status: true,
message: 'ok', message: 'ok',
data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: injected_result }, data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result },
}; };
send_action('amazon_search_list', result); send_action('amazon_search_list', result);
resolve({ tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: injected_result }); resolve({ tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result });
// 成功后关闭打开的 tab同时会关闭 popup window
tab.remove(0); tab.remove(0);
} catch (err) { } catch (err) {
const result = { send_action('amazon_search_list', {
code: 30, code: 30,
status: false, status: false,
message: (err && err.message) || String(err), message: (err && err.message) || String(err),
data: null, data: null,
documentURI: url, documentURI: url || AMAZON_ZH_HOME_URL,
}; });
send_action('amazon_search_list', result);
reject(err); reject(err);
} }
}); });
} }
amazon_search_list.desc = 'Amazon 搜索结果列表抓取DOM 解析)'; amazon_search_list.desc = 'Amazon 搜索列表:先打开中文首页,搜索框输入类目并搜索,再分页抓取';
amazon_search_list.params = { amazon_search_list.params = {
category_keyword: { category_keyword: { type: 'string', desc: '类目关键词(在首页搜索框输入后点搜索,进入列表再抓)', default: '野餐包' },
type: 'string',
desc: '分类关键词',
default: '野餐包',
},
sort_by: { sort_by: {
type: 'string', type: 'string',
desc: '排序方式featured(精选) / price_asc(价格从低到高) / price_desc(价格从高到低) / review(平均买家评论数) / newest(最新商品) / bestseller(畅销商品)', desc: '排序方式featured / price_asc / price_desc / review / newest / bestseller',
default: 'featured', default: 'featured',
}, },
limit: { limit: { type: 'number', desc: '抓取数量上限(默认 100最大 200', default: 100 },
type: 'number', };
desc: '抓取数量上限(默认 100最大 200',
default: 100, // ---------- 后台:商品详情 / 评论 ----------
function normalize_product_url(u) {
let s = u ? String(u).trim() : '';
if (!s) throw new Error('缺少 product_url');
if (s.startsWith('//')) s = 'https:' + s;
if (!/^https?:\/\//i.test(s)) s = 'https://' + s;
const url_obj = new URL(s);
if (!url_obj.hostname.includes('amazon.')) throw new Error('product_url 需为亚马逊域名');
if (!/\/dp\/[A-Z0-9]{10}/i.test(url_obj.pathname) && !/\/gp\/product\/[A-Z0-9]{10}/i.test(url_obj.pathname)) {
throw new Error('product_url 需包含 /dp/ASIN 或 /gp/product/ASIN');
}
return url_obj.toString();
}
function run_pdp_action(product_url, injected_fn, inject_args, action_name, sendResponse) {
const send_action = (action, payload) => {
if (typeof sendResponse === 'function') {
sendResponse({ action, data: payload });
sendResponse.log && sendResponse.log(payload);
}
};
return new Promise(async (resolve, reject) => {
let url = product_url;
try {
url = normalize_product_url(product_url);
} catch (e) {
send_action(action_name, { code: 10, status: false, message: e.message, data: null });
return reject(e);
}
const tab_task = create_tab_task(url).set_bounds({ top: 20, left: 20, width: 1280, height: 900 });
try {
const tab = await tab_task.open_async();
await wait_tab_complete(tab.id);
const raw_list = await tab.execute_script(injected_fn, inject_args || [], 'document_idle');
const result = Array.isArray(raw_list) ? raw_list[0] : raw_list;
send_action(action_name, {
code: 0,
status: true,
message: 'ok',
data: { tab_id: tab.id, product_url: url, result },
});
resolve({ tab_id: tab.id, product_url: url, result });
tab.remove(0);
} catch (err) {
send_action(action_name, {
code: 30,
status: false,
message: (err && err.message) || String(err),
data: null,
documentURI: url,
});
reject(err);
}
});
}
export function amazon_product_detail(data, sendResponse) {
return run_pdp_action(data && data.product_url, injected_amazon_product_detail, [], 'amazon_product_detail', sendResponse);
}
amazon_product_detail.desc = 'Amazon 商品详情标题、价格、品牌、SKU、要点、变体、配送摘要等';
amazon_product_detail.params = {
product_url: {
type: 'string',
desc: '商品详情页完整 URL含 /dp/ASIN',
default: 'https://www.amazon.com/-/zh/dp/B0B56CHMSC',
}, },
}; };
export function amazon_product_reviews(data, sendResponse) {
const limit = data && data.limit != null ? Number(data.limit) : 50;
return run_pdp_action(
data && data.product_url,
injected_amazon_product_reviews,
[{ limit }],
'amazon_product_reviews',
sendResponse,
);
}
amazon_product_reviews.desc = 'Amazon 商品页买家评论(详情页 [data-hook=review],条数受页面展示限制)';
amazon_product_reviews.params = {
product_url: {
type: 'string',
desc: '商品详情页完整 URL',
default: 'https://www.amazon.com/-/zh/dp/B0B56CHMSC',
},
limit: { type: 'number', desc: '最多条数(默认 50上限 100', default: 50 },
};

View File

@@ -1,8 +1,10 @@
import { amazon_search_list } from '../actions/amazon.js'; import { amazon_search_list, amazon_product_detail, amazon_product_reviews } from '../actions/amazon.js';
const actions = { const actions = {
amazon_search_list, amazon_search_list,
amazon_product_detail,
amazon_product_reviews,
}; };
function list_actions_meta() { function list_actions_meta() {

View File

@@ -1,121 +0,0 @@
// 注入到页面的 Amazon 搜索列表解析逻辑
export function injected_amazon_search_list(params) {
const start_url = params && params.url ? String(params.url) : location.href;
const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : '';
const sort_by = params && params.sort_by ? String(params.sort_by).trim() : '';
function pick_number(text) {
if (!text) return null;
const s = String(text).replace(/[(),]/g, ' ').replace(/\s+/g, ' ').trim();
const m = s.match(/(\d+(?:\.\d+)?)/);
return m ? Number(m[1]) : null;
}
function pick_int(text) {
if (!text) return null;
const s = String(text).replace(/[^\d]/g, '');
return s ? Number(s) : null;
}
function abs_url(href) {
try {
return new URL(href, location.origin).toString();
} catch (_) {
return href;
}
}
function parse_asin_from_url(url) {
if (!url || typeof url !== 'string') return null;
const m = url.match(/\/dp\/([A-Z0-9]{10})/i) || url.match(/\/gp\/product\/([A-Z0-9]{10})/i);
return m ? m[1].toUpperCase() : null;
}
function extract_results() {
const items = [];
const nodes = document.querySelectorAll('div.s-main-slot div[data-component-type="s-search-result"]');
nodes.forEach((el, idx) => {
const asin = (el.getAttribute('data-asin') || '').trim() || null;
const title_el = el.querySelector('h2 span') || el.querySelector('h2');
const title = title_el ? title_el.textContent.trim() : null;
const a = el.querySelector('a[href*="/dp/"], a[href*="/gp/product/"]');
const href = a ? a.getAttribute('href') : null;
const url = href ? abs_url(href) : null;
const price_el = el.querySelector('span.a-price > span.a-offscreen');
const price = price_el ? price_el.textContent.trim() : null;
const reviews_block = el.querySelector('div[data-cy="reviews-block"]') || el;
const rating_text = (() => {
const t1 = reviews_block.querySelector('span.a-icon-alt');
if (t1 && t1.textContent) return t1.textContent.trim();
const t2 = reviews_block.querySelector('span.a-size-small.a-color-base[aria-hidden="true"]');
if (t2 && t2.textContent) return t2.textContent.trim();
return null;
})();
const rating = (() => {
const n = pick_number(rating_text);
return Number.isFinite(n) ? n : null;
})();
const review_count_text = (() => {
const a1 = reviews_block.querySelector('a[href*="#customerReviews"]');
if (a1 && a1.textContent) return a1.textContent.trim();
const a2 = reviews_block.querySelector('a[aria-label*="rating"], a[aria-label*="ratings"], a[aria-label*="评级"], a[aria-label*="评价"]');
if (a2 && a2.getAttribute('aria-label')) return a2.getAttribute('aria-label').trim();
const s1 = reviews_block.querySelector('span.a-size-mini.puis-normal-weight-text');
if (s1 && s1.textContent) return s1.textContent.trim();
return null;
})();
const review_count = (() => {
const n = pick_int(review_count_text);
return Number.isFinite(n) ? n : null;
})();
items.push({
index: idx + 1,
asin: asin || parse_asin_from_url(url),
title,
url,
price,
rating,
rating_text,
review_count,
review_count_text,
});
});
return items;
}
function pick_next_url() {
const a = document.querySelector('a.s-pagination-next');
if (!a) return null;
const aria_disabled = (a.getAttribute('aria-disabled') || '').trim().toLowerCase();
if (aria_disabled === 'true') return null;
if (a.classList && a.classList.contains('s-pagination-disabled')) return null;
const href = a.getAttribute('href');
if (!href) return null;
return abs_url(href);
}
const items = extract_results();
const next_url = pick_next_url();
// 只返回一次结果,避免 send 太多影响判定
return {
start_url,
href: location.href,
category_keyword,
sort_by,
total: items.length,
items,
next_url,
};
}

View File

@@ -25,6 +25,8 @@
<option value="zhipu_query_position_page">zhipu_query_position_page</option> <option value="zhipu_query_position_page">zhipu_query_position_page</option>
<option value="amazon_top_list">amazon_top_list</option> <option value="amazon_top_list">amazon_top_list</option>
<option value="amazon_search_list">amazon_search_list</option> <option value="amazon_search_list">amazon_search_list</option>
<option value="amazon_product_detail">amazon_product_detail</option>
<option value="amazon_product_reviews">amazon_product_reviews</option>
</select> </select>
<label class="label">参数JSON</label> <label class="label">参数JSON</label>

12
执行计划.md Normal file
View File

@@ -0,0 +1,12 @@
提取产品标题、价格、SKU、品牌、变体、上架时间、物流方式。
抓取售后留言和评论Reviews
---
## 扩展指令mv2_simple_crx
| 指令 | 参数 | 说明 |
|------|------|------|
| `amazon_product_detail` | `product_url`(商品详情页完整链接,须含 `/dp/ASIN` | 抓取标题、价格、品牌、SKU、要点、变体、配送摘要、主图等 |
| `amazon_product_reviews` | `product_url`;可选 `limit`(默认 50最大 100 | 抓取详情页「买家评论」区域已渲染的评论(`[data-hook=review]`),条数以页面实际展示为准 |