diff --git a/mv2_simple_crx/src/actions/amazon.js b/mv2_simple_crx/src/actions/amazon.js index f3baaa1..64dadd5 100644 --- a/mv2_simple_crx/src/actions/amazon.js +++ b/mv2_simple_crx/src/actions/amazon.js @@ -1,20 +1,395 @@ -// amazon_top_list:Amazon TOP 榜单抓取(Best Sellers / New Releases / Movers & Shakers) +// Amazon:搜索列表 + 商品详情 + 评论(注入函数与 action 同文件,便于维护) import { create_tab_task } from '../libs/tabs.js'; -import { injected_amazon_search_list } from '../injected/amazon_search_list.js'; + +// ---------- 页面注入(仅依赖页面 DOM,勿引用本文件其它符号) ---------- + +export function injected_amazon_homepage_search(params) { + const keyword = params && params.keyword ? String(params.keyword).trim() : ''; + if (!keyword) return { ok: false, error: 'empty_keyword' }; + const input = + document.querySelector('#twotabsearchtextbox') || + document.querySelector('input#nav-search-keywords') || + document.querySelector('input[name="field-keywords"]'); + if (!input) return { ok: false, error: 'no_search_input' }; + input.focus(); + input.value = keyword; + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new Event('change', { bubbles: true })); + const btn = + document.querySelector('#nav-search-submit-button') || + document.querySelector('#nav-search-bar-form input[type="submit"]') || + document.querySelector('form[role="search"] input[type="submit"]'); + if (btn) { + btn.click(); + return { ok: true }; + } + const form = input.closest('form'); + if (form) { + form.submit(); + return { ok: true }; + } + return { ok: false, error: 'no_submit' }; +} + +export function injected_amazon_search_list(params) { + const start_url = params && params.url ? String(params.url) : location.href; + const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : ''; + const sort_by = params && params.sort_by ? String(params.sort_by).trim() : ''; + + function pick_number(text) { + if (!text) return null; + const s = String(text).replace(/[(),]/g, ' ').replace(/\s+/g, ' ').trim(); + const m = s.match(/(\d+(?:\.\d+)?)/); + return m ? Number(m[1]) : null; + } + + function pick_int(text) { + if (!text) return null; + const raw = String(text).replace(/\s+/g, ' ').trim(); + const u = raw.toUpperCase().replace(/,/g, ''); + const km = u.match(/([\d.]+)\s*K\b/); + if (km) return Math.round(parseFloat(km[1]) * 1000); + const mm = u.match(/([\d.]+)\s*M\b/); + if (mm) return Math.round(parseFloat(mm[1]) * 1000000); + const digits = raw.replace(/[^\d]/g, ''); + return digits ? Number(digits) : null; + } + + function abs_url(href) { + try { + return new URL(href, location.origin).toString(); + } catch (_) { + return href; + } + } + + function parse_asin_from_url(url) { + if (!url || typeof url !== 'string') return null; + const m = url.match(/\/dp\/([A-Z0-9]{10})/i) || url.match(/\/gp\/product\/([A-Z0-9]{10})/i); + return m ? m[1].toUpperCase() : null; + } + + function extract_results() { + const items = []; + const nodes = document.querySelectorAll('div.s-main-slot div[data-component-type="s-search-result"]'); + nodes.forEach((el, idx) => { + const asin = (el.getAttribute('data-asin') || '').trim() || null; + const title_el = el.querySelector('h2 span') || el.querySelector('h2'); + const title = title_el ? title_el.textContent.trim() : null; + const a = el.querySelector('a[href*="/dp/"], a[href*="/gp/product/"]'); + const href = a ? a.getAttribute('href') : null; + const item_url = href ? abs_url(href) : null; + const price_el = el.querySelector('span.a-price > span.a-offscreen'); + const price = price_el ? price_el.textContent.trim() : null; + const reviews_block = el.querySelector('div[data-cy="reviews-block"]') || el; + const rating_text = (() => { + const t1 = reviews_block.querySelector('span.a-icon-alt'); + if (t1 && t1.textContent) return t1.textContent.trim(); + const t2 = reviews_block.querySelector('span.a-size-small.a-color-base[aria-hidden="true"]'); + if (t2 && t2.textContent) return t2.textContent.trim(); + return null; + })(); + const rating = (() => { + const n = pick_number(rating_text); + return Number.isFinite(n) ? n : null; + })(); + const review_count_text = (() => { + const a1 = reviews_block.querySelector('a[href*="#customerReviews"]'); + if (a1 && a1.textContent) return a1.textContent.trim(); + const a2 = reviews_block.querySelector( + 'a[aria-label*="rating"], a[aria-label*="ratings"], a[aria-label*="评级"], a[aria-label*="评价"]', + ); + if (a2 && a2.getAttribute('aria-label')) return a2.getAttribute('aria-label').trim(); + const s1 = reviews_block.querySelector('span.a-size-mini.puis-normal-weight-text'); + if (s1 && s1.textContent) return s1.textContent.trim(); + return null; + })(); + const review_count = (() => { + const n = pick_int(review_count_text); + return Number.isFinite(n) ? n : null; + })(); + items.push({ + index: idx + 1, + asin: asin || parse_asin_from_url(item_url), + title, + url: item_url, + price, + rating, + rating_text, + review_count, + review_count_text, + }); + }); + return items; + } + + function pick_next_url() { + const a = document.querySelector('a.s-pagination-next'); + if (!a) return null; + const aria_disabled = (a.getAttribute('aria-disabled') || '').trim().toLowerCase(); + if (aria_disabled === 'true') return null; + if (a.classList && a.classList.contains('s-pagination-disabled')) return null; + const href = a.getAttribute('href'); + if (!href) return null; + return abs_url(href); + } + + const items = extract_results(); + return { + start_url, + href: location.href, + category_keyword, + sort_by, + total: items.length, + items, + next_url: pick_next_url(), + }; +} + +export function injected_amazon_product_detail() { + const norm = (s) => (s || '').replace(/\s+/g, ' ').trim(); + const asin_match = location.pathname.match(/\/(?:dp|gp\/product)\/([A-Z0-9]{10})/i); + const asin = asin_match ? asin_match[1].toUpperCase() : null; + + const product_info = {}; + function set_info(k, v, max_len) { + k = norm(k); + v = norm(v); + max_len = max_len || 600; + if (!k || !v || k.length > 100) return; + if (v.length > max_len) v = v.slice(0, max_len); + if (!product_info[k] || v.length > product_info[k].length) product_info[k] = v; + } + + const table_roots = + '#productOverview_feature_div tr, #poExpander table tr, #productDetails_detailBullets_sections1 tr, ' + + '#productDetails_techSpec_section_1 tr, table.prodDetTable tr, #productFactsDesktopExpander tr, ' + + '#technicalSpecifications_feature_div table tr, #productDetails_db_sections tr'; + document.querySelectorAll(table_roots).forEach((tr) => { + const tds = tr.querySelectorAll('td'); + const th = tr.querySelector('th'); + const td = tr.querySelector('td'); + if (tds.length >= 2) set_info(tds[0].innerText, tds[1].innerText); + else if (th && td && th !== td) set_info(th.innerText, td.innerText); + }); + + const detail_extra_lines = []; + document.querySelectorAll('#detailBullets_feature_div li, #rpi-attribute-values_feature_div li').forEach((li) => { + const t = li.innerText.replace(/\u200f|\u200e/g, ' ').replace(/\s+/g, ' ').trim(); + const m = t.match(/^(.{1,80}?)\s*[::]\s*(.+)$/); + if (m) set_info(m[1], m[2], 1200); + else if (t.length > 8 && t.length < 800) detail_extra_lines.push(t); + }); + + const title_el = document.querySelector('#productTitle'); + const title = title_el ? norm(title_el.textContent) : null; + const price_el = + document.querySelector('#corePrice_feature_div .a-price .a-offscreen') || + document.querySelector('#tp_price_block_total_price_ww .a-offscreen') || + document.querySelector('#price .a-offscreen') || + document.querySelector('.reinventPricePriceToPayMargin .a-offscreen') || + document.querySelector('.a-price .a-offscreen'); + const price = price_el ? price_el.textContent.trim() : null; + + const brand_el = document.querySelector('#bylineInfo'); + const brand_line = brand_el ? norm(brand_el.textContent) : null; + const brand_store_url = document.querySelector('#bylineInfo a[href]')?.href || null; + + const rating_stars = document.querySelector('#acrPopover')?.getAttribute('title') || + document.querySelector('#averageCustomerReviews .a-icon-alt')?.textContent?.trim() || null; + const review_count_text = document.querySelector('#acrCustomerReviewText')?.textContent?.trim() || null; + + const ac_badge = norm(document.querySelector('#acBadge_feature_div')?.innerText) || null; + const social_proof = norm(document.querySelector('#socialProofingAsinFaceout_feature_div')?.innerText) || null; + const bestseller_hint = norm(document.querySelector('#zeitgeistBadge_feature_div')?.innerText)?.slice(0, 200) || null; + let sustainability_hint = null; + document.querySelectorAll('button, span.a-button-text, a').forEach((el) => { + const tx = norm(el.innerText); + if (!tx || tx.length > 90) return; + if ( + /\d+\s*个.*可持续发展|可持续发展特性/.test(tx) || + /\d+\s+sustainability features?/i.test(tx) + ) { + sustainability_hint = tx; + } + }); + + const bullets = []; + document.querySelectorAll('#feature-bullets ul li span.a-list-item').forEach((el) => { + const t = norm(el.textContent); + if (t) bullets.push(t); + }); + + const variants = {}; + document.querySelectorAll('[id^="variation_"]').forEach((block) => { + const key = block.id.replace(/^variation_/, '') || block.id; + const sel = + block.querySelector('.selection') || + block.querySelector('.a-button-selected .a-button-text') || + block.querySelector('[class*="dropdown"]'); + if (sel) { + const v = norm(sel.textContent); + if (v) variants[key] = v; + } + }); + + let delivery_hint = null; + const del = document.querySelector( + '#deliveryBlockMessage, #mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE', + ); + if (del) delivery_hint = norm(del.innerText).slice(0, 500); + + let sku = null; + Object.keys(product_info).forEach((k) => { + if (/^sku$/i.test(k) || /item model|型号|part number|制造商型号/i.test(k)) sku = product_info[k]; + }); + + const images = []; + const seen_img = new Set(); + function add_img(u) { + if (!u || seen_img.has(u)) return; + if (!/media-amazon|images-amazon|ssl-images/i.test(u)) return; + seen_img.add(u); + images.push(u.split('?')[0]); + } + const land = document.querySelector('#landingImage, #imgBlkFront'); + if (land) { + const dyn = land.getAttribute('data-a-dynamic-image'); + if (dyn) { + try { + const o = JSON.parse(dyn); + Object.keys(o).forEach(add_img); + } catch (_) {} + } + if (land.src) add_img(land.src); + } + document.querySelectorAll('#altImages img, #imageBlock_feature_div img, #ivImages img').forEach((img) => { + add_img(img.src || img.getAttribute('data-src')); + }); + + const main_image = images.length ? images[0] : land?.src || null; + + return { + stage: 'detail', + asin, + title, + price, + brand_line, + brand_store_url, + rating_stars, + review_count_text, + ac_badge, + social_proof, + bestseller_hint, + sustainability_hint, + product_info, + detail_extra_lines, + bullets, + variants, + delivery_hint, + sku, + images, + main_image, + url: location.href, + }; +} + +export function injected_amazon_product_reviews(params) { + const raw = params && params.limit != null ? Number(params.limit) : 50; + const limit = Number.isFinite(raw) ? Math.max(1, Math.min(100, Math.floor(raw))) : 50; + const nodes = document.querySelectorAll('[data-hook="review"]'); + const items = []; + nodes.forEach((r) => { + if (items.length >= limit) return; + const author_el = r.querySelector('.a-profile-name'); + const author = author_el ? author_el.textContent.trim() : null; + const title_el = r.querySelector('[data-hook="review-title"]'); + const title = title_el ? title_el.innerText.replace(/\s+/g, ' ').trim() : null; + const body_el = r.querySelector('[data-hook="review-body"]'); + const body = body_el ? body_el.innerText.replace(/\s+/g, ' ').trim() : null; + const rating_el = r.querySelector('[data-hook="review-star-rating"]'); + const rating_text = rating_el ? rating_el.textContent.trim() : null; + const date_el = r.querySelector('[data-hook="review-date"]'); + const date = date_el ? date_el.textContent.trim() : null; + const cr = r.querySelector('[id^="customer_review-"]'); + const review_id = r.id || (cr && cr.id ? cr.id.replace('customer_review-', '') : null); + items.push({ index: items.length + 1, review_id, author, rating_text, title, date, body }); + }); + return { stage: 'reviews', limit, total: items.length, items, url: location.href }; +} + +// ---------- 后台:搜索列表 ---------- + +const AMAZON_ZH_HOME_URL = 'https://www.amazon.com/-/zh/ref=nav_logo'; + +/** 英文搜索列表 URL 模板(与 language=en_US 一致,仅替换 k) */ +function build_amazon_search_url_en(keyword) { + const u = new URL('https://www.amazon.com/s'); + u.searchParams.set('k', keyword); + u.searchParams.set('language', 'en_US'); + u.searchParams.set('crid', '35M31MY4FQI'); + u.searchParams.set('sprefix', ',aps,398'); + u.searchParams.set('ref', 'nb_sb_ss_recent_1_0_recent'); + return u.toString(); +} + +function is_amazon_search_list_url(tab_url) { + if (!tab_url || typeof tab_url !== 'string') return false; + if (!tab_url.includes('amazon.com')) return false; + if (!/\/s(\?|\/)/.test(tab_url)) return false; + return tab_url.includes('k=') || tab_url.includes('keywords=') || tab_url.includes('field-keywords'); +} + +function wait_until_search_list_url(tab_id, timeout_ms) { + const deadline = Date.now() + (timeout_ms || 45000); + return new Promise((resolve, reject) => { + const tick = () => { + chrome.tabs.get(tab_id, (tab) => { + if (chrome.runtime.lastError) return reject(new Error(chrome.runtime.lastError.message)); + const u = tab && tab.url ? tab.url : ''; + if (is_amazon_search_list_url(u)) return resolve(u); + if (Date.now() >= deadline) return reject(new Error('等待首页搜索跳转到列表页超时')); + setTimeout(tick, 400); + }); + }; + tick(); + }); +} + +function wait_tab_complete(tab_id) { + return new Promise((resolve_wait, reject_wait) => { + chrome.tabs.get(tab_id, (tab0) => { + if (!chrome.runtime.lastError && tab0 && tab0.status === 'complete') { + return resolve_wait(tab0); + } + const on_updated = (updated_tab_id, change_info) => { + if (updated_tab_id !== tab_id) return; + if (change_info.status !== 'complete') return; + chrome.tabs.onUpdated.removeListener(on_updated); + resolve_wait(true); + }; + chrome.tabs.onUpdated.addListener(on_updated); + setTimeout(() => { + chrome.tabs.onUpdated.removeListener(on_updated); + reject_wait(new Error('等待页面加载超时')); + }, 45000); + }); + }); +} export function amazon_search_list(data, sendResponse) { return new Promise(async (resolve, reject) => { - const category_keyword = (data && data.category_keyword) ? String(data.category_keyword).trim() : ''; - const sort_by = (data && data.sort_by) ? String(data.sort_by).trim() : ''; + const category_keyword = data && data.category_keyword ? String(data.category_keyword).trim() : ''; + const sort_by = data && data.sort_by ? String(data.sort_by).trim() : ''; const limit = (() => { const n = data && Object.prototype.hasOwnProperty.call(data, 'limit') ? Number(data.limit) : 100; if (!Number.isFinite(n)) return 100; return Math.max(1, Math.min(200, Math.floor(n))); })(); - - const keyword = category_keyword || '野餐包'; - + const keyword = category_keyword || 'picnic bag'; + const search_url_custom = data && data.search_url ? String(data.search_url).trim() : ''; + const entry = data && data.entry ? String(data.entry).trim() : 'direct'; const sort_map = { featured: 'relevanceblender', review: 'review-rank', @@ -24,65 +399,51 @@ export function amazon_search_list(data, sendResponse) { bestseller: 'exact-aware-popularity-rank', }; const sort_s = Object.prototype.hasOwnProperty.call(sort_map, sort_by) ? sort_map[sort_by] : ''; - - // 内置 URL,只替换 k / s 参数,其它参数保持一致 - const default_url = (() => { - const u = new URL('https://www.amazon.com/s'); - u.searchParams.set('k', keyword); - u.searchParams.set('__mk_zh_CN', '亚马逊网站'); - u.searchParams.set('crid', 'ZKNCI4U8BBAP'); - u.searchParams.set('ref', 'nb_sb_noss'); - if (sort_s) u.searchParams.set('s', sort_s); - else u.searchParams.delete('s'); - return u.toString(); - })(); - - const url = default_url; - - let times = 0; - const send_action = (action, payload) => { if (typeof sendResponse === 'function') { sendResponse({ action, data: payload }); sendResponse.log && sendResponse.log(payload); } }; - - const tab_task = create_tab_task(url) + const tab_task = create_tab_task(AMAZON_ZH_HOME_URL) .set_latest(false) .set_bounds({ top: 20, left: 20, width: 1440, height: 900 }) .set_target('__amazon_search_list'); - + let url = AMAZON_ZH_HOME_URL; try { const tab = await tab_task.open_async(); - - const wait_tab_complete = (tab_id) => new Promise((resolve_wait, reject_wait) => { - const on_updated = (updated_tab_id, change_info, updated_tab) => { - if (updated_tab_id !== tab_id) return; - if (change_info.status !== 'complete') return; - chrome.tabs.onUpdated.removeListener(on_updated); - resolve_wait(updated_tab); - }; - chrome.tabs.onUpdated.addListener(on_updated); - setTimeout(() => { - chrome.tabs.onUpdated.removeListener(on_updated); - reject_wait(new Error('等待页面加载超时')); - }, 45000); - }); - - const unique_map = new Map(); - let next_url = url; - let page = 1; - - while (next_url && unique_map.size < limit) { + await wait_tab_complete(tab.id); + const home_ret = await tab.execute_script(injected_amazon_homepage_search, [{ keyword }], 'document_idle'); + const home_ok = Array.isArray(home_ret) ? home_ret[0] : home_ret; + if (!home_ok || !home_ok.ok) { + throw new Error((home_ok && home_ok.error) || '首页搜索提交失败'); + } + url = await wait_until_search_list_url(tab.id, 45000); + await wait_tab_complete(tab.id); + if (sort_s) { + const u = new URL(url); + u.searchParams.set('s', sort_s); + url = u.toString(); await new Promise((resolve_nav, reject_nav) => { - chrome.tabs.update(tab.id, { url: next_url, active: true }, () => { + chrome.tabs.update(tab.id, { url, active: true }, () => { if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message)); resolve_nav(true); }); }); await wait_tab_complete(tab.id); - + } + const unique_map = new Map(); + let next_url = url; + for (let page = 1; page <= 10 && unique_map.size < limit; page += 1) { + if (page > 1) { + await new Promise((resolve_nav, reject_nav) => { + chrome.tabs.update(tab.id, { url: next_url, active: true }, () => { + if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message)); + resolve_nav(true); + }); + }); + await wait_tab_complete(tab.id); + } const injected_result_list = await tab.execute_script( injected_amazon_search_list, [{ url: next_url, category_keyword, sort_by }], @@ -90,67 +451,141 @@ export function amazon_search_list(data, sendResponse) { ); const injected_result = Array.isArray(injected_result_list) ? injected_result_list[0] : null; const items = injected_result && Array.isArray(injected_result.items) ? injected_result.items : []; - items.forEach((it) => { - const k = (it && (it.asin || it.url)) ? String(it.asin || it.url) : null; + const k = it && (it.asin || it.url) ? String(it.asin || it.url) : null; if (!k) return; if (!unique_map.has(k)) unique_map.set(k, it); }); - + if (unique_map.size >= limit) break; next_url = injected_result && injected_result.next_url ? String(injected_result.next_url) : null; - page += 1; - if (page > 10) break; // 防止死循环(默认 100 条一般 <= 3 页) + if (!next_url) break; } - - const injected_result = { + const list_result = { stage: 'list', limit, total: unique_map.size, items: Array.from(unique_map.values()).slice(0, limit), }; - const result = { code: 0, status: true, message: 'ok', - data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: injected_result }, + data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result }, }; send_action('amazon_search_list', result); - resolve({ tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: injected_result }); - - // 成功后关闭打开的 tab(同时会关闭 popup window) + resolve({ tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result }); tab.remove(0); } catch (err) { - const result = { + send_action('amazon_search_list', { code: 30, status: false, message: (err && err.message) || String(err), data: null, - documentURI: url, - }; - send_action('amazon_search_list', result); + documentURI: url || AMAZON_ZH_HOME_URL, + }); reject(err); } }); } -amazon_search_list.desc = 'Amazon 搜索结果列表抓取(DOM 解析)'; +amazon_search_list.desc = 'Amazon 搜索列表:先打开中文首页,搜索框输入类目并搜索,再分页抓取'; amazon_search_list.params = { - category_keyword: { - type: 'string', - desc: '分类关键词', - default: '野餐包', - }, + category_keyword: { type: 'string', desc: '类目关键词(在首页搜索框输入后点搜索,进入列表再抓)', default: '野餐包' }, sort_by: { type: 'string', - desc: '排序方式:featured(精选) / price_asc(价格从低到高) / price_desc(价格从高到低) / review(平均买家评论数) / newest(最新商品) / bestseller(畅销商品)', + desc: '排序方式:featured / price_asc / price_desc / review / newest / bestseller', default: 'featured', }, - limit: { - type: 'number', - desc: '抓取数量上限(默认 100,最大 200)', - default: 100, + limit: { type: 'number', desc: '抓取数量上限(默认 100,最大 200)', default: 100 }, +}; + +// ---------- 后台:商品详情 / 评论 ---------- + +function normalize_product_url(u) { + let s = u ? String(u).trim() : ''; + if (!s) throw new Error('缺少 product_url'); + if (s.startsWith('//')) s = 'https:' + s; + if (!/^https?:\/\//i.test(s)) s = 'https://' + s; + const url_obj = new URL(s); + if (!url_obj.hostname.includes('amazon.')) throw new Error('product_url 需为亚马逊域名'); + if (!/\/dp\/[A-Z0-9]{10}/i.test(url_obj.pathname) && !/\/gp\/product\/[A-Z0-9]{10}/i.test(url_obj.pathname)) { + throw new Error('product_url 需包含 /dp/ASIN 或 /gp/product/ASIN'); + } + return url_obj.toString(); +} + +function run_pdp_action(product_url, injected_fn, inject_args, action_name, sendResponse) { + const send_action = (action, payload) => { + if (typeof sendResponse === 'function') { + sendResponse({ action, data: payload }); + sendResponse.log && sendResponse.log(payload); + } + }; + return new Promise(async (resolve, reject) => { + let url = product_url; + try { + url = normalize_product_url(product_url); + } catch (e) { + send_action(action_name, { code: 10, status: false, message: e.message, data: null }); + return reject(e); + } + const tab_task = create_tab_task(url).set_bounds({ top: 20, left: 20, width: 1280, height: 900 }); + try { + const tab = await tab_task.open_async(); + await wait_tab_complete(tab.id); + const raw_list = await tab.execute_script(injected_fn, inject_args || [], 'document_idle'); + const result = Array.isArray(raw_list) ? raw_list[0] : raw_list; + send_action(action_name, { + code: 0, + status: true, + message: 'ok', + data: { tab_id: tab.id, product_url: url, result }, + }); + resolve({ tab_id: tab.id, product_url: url, result }); + tab.remove(0); + } catch (err) { + send_action(action_name, { + code: 30, + status: false, + message: (err && err.message) || String(err), + data: null, + documentURI: url, + }); + reject(err); + } + }); +} + +export function amazon_product_detail(data, sendResponse) { + return run_pdp_action(data && data.product_url, injected_amazon_product_detail, [], 'amazon_product_detail', sendResponse); +} + +amazon_product_detail.desc = 'Amazon 商品详情(标题、价格、品牌、SKU、要点、变体、配送摘要等)'; +amazon_product_detail.params = { + product_url: { + type: 'string', + desc: '商品详情页完整 URL(含 /dp/ASIN)', + default: 'https://www.amazon.com/-/zh/dp/B0B56CHMSC', }, }; +export function amazon_product_reviews(data, sendResponse) { + const limit = data && data.limit != null ? Number(data.limit) : 50; + return run_pdp_action( + data && data.product_url, + injected_amazon_product_reviews, + [{ limit }], + 'amazon_product_reviews', + sendResponse, + ); +} +amazon_product_reviews.desc = 'Amazon 商品页买家评论(详情页 [data-hook=review],条数受页面展示限制)'; +amazon_product_reviews.params = { + product_url: { + type: 'string', + desc: '商品详情页完整 URL', + default: 'https://www.amazon.com/-/zh/dp/B0B56CHMSC', + }, + limit: { type: 'number', desc: '最多条数(默认 50,上限 100)', default: 50 }, +}; diff --git a/mv2_simple_crx/src/background/index.js b/mv2_simple_crx/src/background/index.js index ba5cdb3..212e316 100644 --- a/mv2_simple_crx/src/background/index.js +++ b/mv2_simple_crx/src/background/index.js @@ -1,8 +1,10 @@ -import { amazon_search_list } from '../actions/amazon.js'; +import { amazon_search_list, amazon_product_detail, amazon_product_reviews } from '../actions/amazon.js'; const actions = { amazon_search_list, + amazon_product_detail, + amazon_product_reviews, }; function list_actions_meta() { diff --git a/mv2_simple_crx/src/injected/amazon_search_list.js b/mv2_simple_crx/src/injected/amazon_search_list.js deleted file mode 100644 index a4fcf55..0000000 --- a/mv2_simple_crx/src/injected/amazon_search_list.js +++ /dev/null @@ -1,121 +0,0 @@ -// 注入到页面的 Amazon 搜索列表解析逻辑 - -export function injected_amazon_search_list(params) { - const start_url = params && params.url ? String(params.url) : location.href; - const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : ''; - const sort_by = params && params.sort_by ? String(params.sort_by).trim() : ''; - - function pick_number(text) { - if (!text) return null; - const s = String(text).replace(/[(),]/g, ' ').replace(/\s+/g, ' ').trim(); - const m = s.match(/(\d+(?:\.\d+)?)/); - return m ? Number(m[1]) : null; - } - - function pick_int(text) { - if (!text) return null; - const s = String(text).replace(/[^\d]/g, ''); - return s ? Number(s) : null; - } - - function abs_url(href) { - try { - return new URL(href, location.origin).toString(); - } catch (_) { - return href; - } - } - - function parse_asin_from_url(url) { - if (!url || typeof url !== 'string') return null; - const m = url.match(/\/dp\/([A-Z0-9]{10})/i) || url.match(/\/gp\/product\/([A-Z0-9]{10})/i); - return m ? m[1].toUpperCase() : null; - } - - function extract_results() { - const items = []; - const nodes = document.querySelectorAll('div.s-main-slot div[data-component-type="s-search-result"]'); - nodes.forEach((el, idx) => { - const asin = (el.getAttribute('data-asin') || '').trim() || null; - - const title_el = el.querySelector('h2 span') || el.querySelector('h2'); - const title = title_el ? title_el.textContent.trim() : null; - - const a = el.querySelector('a[href*="/dp/"], a[href*="/gp/product/"]'); - const href = a ? a.getAttribute('href') : null; - const url = href ? abs_url(href) : null; - - const price_el = el.querySelector('span.a-price > span.a-offscreen'); - const price = price_el ? price_el.textContent.trim() : null; - - const reviews_block = el.querySelector('div[data-cy="reviews-block"]') || el; - - const rating_text = (() => { - const t1 = reviews_block.querySelector('span.a-icon-alt'); - if (t1 && t1.textContent) return t1.textContent.trim(); - const t2 = reviews_block.querySelector('span.a-size-small.a-color-base[aria-hidden="true"]'); - if (t2 && t2.textContent) return t2.textContent.trim(); - return null; - })(); - - const rating = (() => { - const n = pick_number(rating_text); - return Number.isFinite(n) ? n : null; - })(); - - const review_count_text = (() => { - const a1 = reviews_block.querySelector('a[href*="#customerReviews"]'); - if (a1 && a1.textContent) return a1.textContent.trim(); - const a2 = reviews_block.querySelector('a[aria-label*="rating"], a[aria-label*="ratings"], a[aria-label*="评级"], a[aria-label*="评价"]'); - if (a2 && a2.getAttribute('aria-label')) return a2.getAttribute('aria-label').trim(); - const s1 = reviews_block.querySelector('span.a-size-mini.puis-normal-weight-text'); - if (s1 && s1.textContent) return s1.textContent.trim(); - return null; - })(); - - const review_count = (() => { - const n = pick_int(review_count_text); - return Number.isFinite(n) ? n : null; - })(); - - items.push({ - index: idx + 1, - asin: asin || parse_asin_from_url(url), - title, - url, - price, - rating, - rating_text, - review_count, - review_count_text, - }); - }); - - return items; - } - - function pick_next_url() { - const a = document.querySelector('a.s-pagination-next'); - if (!a) return null; - const aria_disabled = (a.getAttribute('aria-disabled') || '').trim().toLowerCase(); - if (aria_disabled === 'true') return null; - if (a.classList && a.classList.contains('s-pagination-disabled')) return null; - const href = a.getAttribute('href'); - if (!href) return null; - return abs_url(href); - } - - const items = extract_results(); - const next_url = pick_next_url(); - - // 只返回一次结果,避免 send 太多影响判定 - return { - start_url, - href: location.href, - category_keyword, - sort_by, - total: items.length, - items, - next_url, - }; -} diff --git a/mv2_simple_crx/src/ui/index.html b/mv2_simple_crx/src/ui/index.html index 2269a65..ede6de4 100644 --- a/mv2_simple_crx/src/ui/index.html +++ b/mv2_simple_crx/src/ui/index.html @@ -25,6 +25,8 @@ + + diff --git a/执行计划.md b/执行计划.md new file mode 100644 index 0000000..2057b10 --- /dev/null +++ b/执行计划.md @@ -0,0 +1,12 @@ +提取产品标题、价格、SKU、品牌、变体、上架时间、物流方式。 + +抓取售后留言和评论(Reviews) + +--- + +## 扩展指令(mv2_simple_crx) + +| 指令 | 参数 | 说明 | +|------|------|------| +| `amazon_product_detail` | `product_url`(商品详情页完整链接,须含 `/dp/ASIN`) | 抓取标题、价格、品牌、SKU、要点、变体、配送摘要、主图等 | +| `amazon_product_reviews` | `product_url`;可选 `limit`(默认 50,最大 100) | 抓取详情页「买家评论」区域已渲染的评论(`[data-hook=review]`),条数以页面实际展示为准 |