@@ -1,20 +1,395 @@
// amazon_top_list: Amazon TOP 榜单抓取( Best Sellers / New Releases / Movers & Shakers )
// Amazon: 搜索列表 + 商品详情 + 评论(注入函数与 action 同文件,便于维护 )
import { create _tab _task } from '../libs/tabs.js' ;
import { injected _amazon _search _list } from '../injected/amazon_search_list.js' ;
// ---------- 页面注入(仅依赖页面 DOM, 勿引用本文件其它符号) ----------
export function injected _amazon _homepage _search ( params ) {
const keyword = params && params . keyword ? String ( params . keyword ) . trim ( ) : '' ;
if ( ! keyword ) return { ok : false , error : 'empty_keyword' } ;
const input =
document . querySelector ( '#twotabsearchtextbox' ) ||
document . querySelector ( 'input#nav-search-keywords' ) ||
document . querySelector ( 'input[name="field-keywords"]' ) ;
if ( ! input ) return { ok : false , error : 'no_search_input' } ;
input . focus ( ) ;
input . value = keyword ;
input . dispatchEvent ( new Event ( 'input' , { bubbles : true } ) ) ;
input . dispatchEvent ( new Event ( 'change' , { bubbles : true } ) ) ;
const btn =
document . querySelector ( '#nav-search-submit-button' ) ||
document . querySelector ( '#nav-search-bar-form input[type="submit"]' ) ||
document . querySelector ( 'form[role="search"] input[type="submit"]' ) ;
if ( btn ) {
btn . click ( ) ;
return { ok : true } ;
}
const form = input . closest ( 'form' ) ;
if ( form ) {
form . submit ( ) ;
return { ok : true } ;
}
return { ok : false , error : 'no_submit' } ;
}
export function injected _amazon _search _list ( params ) {
const start _url = params && params . url ? String ( params . url ) : location . href ;
const category _keyword = params && params . category _keyword ? String ( params . category _keyword ) . trim ( ) : '' ;
const sort _by = params && params . sort _by ? String ( params . sort _by ) . trim ( ) : '' ;
function pick _number ( text ) {
if ( ! text ) return null ;
const s = String ( text ) . replace ( /[(),]/g , ' ' ) . replace ( /\s+/g , ' ' ) . trim ( ) ;
const m = s . match ( /(\d+(?:\.\d+)?)/ ) ;
return m ? Number ( m [ 1 ] ) : null ;
}
function pick _int ( text ) {
if ( ! text ) return null ;
const raw = String ( text ) . replace ( /\s+/g , ' ' ) . trim ( ) ;
const u = raw . toUpperCase ( ) . replace ( /,/g , '' ) ;
const km = u . match ( /([\d.]+)\s*K\b/ ) ;
if ( km ) return Math . round ( parseFloat ( km [ 1 ] ) * 1000 ) ;
const mm = u . match ( /([\d.]+)\s*M\b/ ) ;
if ( mm ) return Math . round ( parseFloat ( mm [ 1 ] ) * 1000000 ) ;
const digits = raw . replace ( /[^\d]/g , '' ) ;
return digits ? Number ( digits ) : null ;
}
function abs _url ( href ) {
try {
return new URL ( href , location . origin ) . toString ( ) ;
} catch ( _ ) {
return href ;
}
}
function parse _asin _from _url ( url ) {
if ( ! url || typeof url !== 'string' ) return null ;
const m = url . match ( /\/dp\/([A-Z0-9]{10})/i ) || url . match ( /\/gp\/product\/([A-Z0-9]{10})/i ) ;
return m ? m [ 1 ] . toUpperCase ( ) : null ;
}
function extract _results ( ) {
const items = [ ] ;
const nodes = document . querySelectorAll ( 'div.s-main-slot div[data-component-type="s-search-result"]' ) ;
nodes . forEach ( ( el , idx ) => {
const asin = ( el . getAttribute ( 'data-asin' ) || '' ) . trim ( ) || null ;
const title _el = el . querySelector ( 'h2 span' ) || el . querySelector ( 'h2' ) ;
const title = title _el ? title _el . textContent . trim ( ) : null ;
const a = el . querySelector ( 'a[href*="/dp/"], a[href*="/gp/product/"]' ) ;
const href = a ? a . getAttribute ( 'href' ) : null ;
const item _url = href ? abs _url ( href ) : null ;
const price _el = el . querySelector ( 'span.a-price > span.a-offscreen' ) ;
const price = price _el ? price _el . textContent . trim ( ) : null ;
const reviews _block = el . querySelector ( 'div[data-cy="reviews-block"]' ) || el ;
const rating _text = ( ( ) => {
const t1 = reviews _block . querySelector ( 'span.a-icon-alt' ) ;
if ( t1 && t1 . textContent ) return t1 . textContent . trim ( ) ;
const t2 = reviews _block . querySelector ( 'span.a-size-small.a-color-base[aria-hidden="true"]' ) ;
if ( t2 && t2 . textContent ) return t2 . textContent . trim ( ) ;
return null ;
} ) ( ) ;
const rating = ( ( ) => {
const n = pick _number ( rating _text ) ;
return Number . isFinite ( n ) ? n : null ;
} ) ( ) ;
const review _count _text = ( ( ) => {
const a1 = reviews _block . querySelector ( 'a[href*="#customerReviews"]' ) ;
if ( a1 && a1 . textContent ) return a1 . textContent . trim ( ) ;
const a2 = reviews _block . querySelector (
'a[aria-label*="rating"], a[aria-label*="ratings"], a[aria-label*="评级"], a[aria-label*="评价"]' ,
) ;
if ( a2 && a2 . getAttribute ( 'aria-label' ) ) return a2 . getAttribute ( 'aria-label' ) . trim ( ) ;
const s1 = reviews _block . querySelector ( 'span.a-size-mini.puis-normal-weight-text' ) ;
if ( s1 && s1 . textContent ) return s1 . textContent . trim ( ) ;
return null ;
} ) ( ) ;
const review _count = ( ( ) => {
const n = pick _int ( review _count _text ) ;
return Number . isFinite ( n ) ? n : null ;
} ) ( ) ;
items . push ( {
index : idx + 1 ,
asin : asin || parse _asin _from _url ( item _url ) ,
title ,
url : item _url ,
price ,
rating ,
rating _text ,
review _count ,
review _count _text ,
} ) ;
} ) ;
return items ;
}
function pick _next _url ( ) {
const a = document . querySelector ( 'a.s-pagination-next' ) ;
if ( ! a ) return null ;
const aria _disabled = ( a . getAttribute ( 'aria-disabled' ) || '' ) . trim ( ) . toLowerCase ( ) ;
if ( aria _disabled === 'true' ) return null ;
if ( a . classList && a . classList . contains ( 's-pagination-disabled' ) ) return null ;
const href = a . getAttribute ( 'href' ) ;
if ( ! href ) return null ;
return abs _url ( href ) ;
}
const items = extract _results ( ) ;
return {
start _url ,
href : location . href ,
category _keyword ,
sort _by ,
total : items . length ,
items ,
next _url : pick _next _url ( ) ,
} ;
}
export function injected _amazon _product _detail ( ) {
const norm = ( s ) => ( s || '' ) . replace ( /\s+/g , ' ' ) . trim ( ) ;
const asin _match = location . pathname . match ( /\/(?:dp|gp\/product)\/([A-Z0-9]{10})/i ) ;
const asin = asin _match ? asin _match [ 1 ] . toUpperCase ( ) : null ;
const product _info = { } ;
function set _info ( k , v , max _len ) {
k = norm ( k ) ;
v = norm ( v ) ;
max _len = max _len || 600 ;
if ( ! k || ! v || k . length > 100 ) return ;
if ( v . length > max _len ) v = v . slice ( 0 , max _len ) ;
if ( ! product _info [ k ] || v . length > product _info [ k ] . length ) product _info [ k ] = v ;
}
const table _roots =
'#productOverview_feature_div tr, #poExpander table tr, #productDetails_detailBullets_sections1 tr, ' +
'#productDetails_techSpec_section_1 tr, table.prodDetTable tr, #productFactsDesktopExpander tr, ' +
'#technicalSpecifications_feature_div table tr, #productDetails_db_sections tr' ;
document . querySelectorAll ( table _roots ) . forEach ( ( tr ) => {
const tds = tr . querySelectorAll ( 'td' ) ;
const th = tr . querySelector ( 'th' ) ;
const td = tr . querySelector ( 'td' ) ;
if ( tds . length >= 2 ) set _info ( tds [ 0 ] . innerText , tds [ 1 ] . innerText ) ;
else if ( th && td && th !== td ) set _info ( th . innerText , td . innerText ) ;
} ) ;
const detail _extra _lines = [ ] ;
document . querySelectorAll ( '#detailBullets_feature_div li, #rpi-attribute-values_feature_div li' ) . forEach ( ( li ) => {
const t = li . innerText . replace ( /\u200f|\u200e/g , ' ' ) . replace ( /\s+/g , ' ' ) . trim ( ) ;
const m = t . match ( /^(.{1,80}?)\s*[: :]\s*(.+)$/ ) ;
if ( m ) set _info ( m [ 1 ] , m [ 2 ] , 1200 ) ;
else if ( t . length > 8 && t . length < 800 ) detail _extra _lines . push ( t ) ;
} ) ;
const title _el = document . querySelector ( '#productTitle' ) ;
const title = title _el ? norm ( title _el . textContent ) : null ;
const price _el =
document . querySelector ( '#corePrice_feature_div .a-price .a-offscreen' ) ||
document . querySelector ( '#tp_price_block_total_price_ww .a-offscreen' ) ||
document . querySelector ( '#price .a-offscreen' ) ||
document . querySelector ( '.reinventPricePriceToPayMargin .a-offscreen' ) ||
document . querySelector ( '.a-price .a-offscreen' ) ;
const price = price _el ? price _el . textContent . trim ( ) : null ;
const brand _el = document . querySelector ( '#bylineInfo' ) ;
const brand _line = brand _el ? norm ( brand _el . textContent ) : null ;
const brand _store _url = document . querySelector ( '#bylineInfo a[href]' ) ? . href || null ;
const rating _stars = document . querySelector ( '#acrPopover' ) ? . getAttribute ( 'title' ) ||
document . querySelector ( '#averageCustomerReviews .a-icon-alt' ) ? . textContent ? . trim ( ) || null ;
const review _count _text = document . querySelector ( '#acrCustomerReviewText' ) ? . textContent ? . trim ( ) || null ;
const ac _badge = norm ( document . querySelector ( '#acBadge_feature_div' ) ? . innerText ) || null ;
const social _proof = norm ( document . querySelector ( '#socialProofingAsinFaceout_feature_div' ) ? . innerText ) || null ;
const bestseller _hint = norm ( document . querySelector ( '#zeitgeistBadge_feature_div' ) ? . innerText ) ? . slice ( 0 , 200 ) || null ;
let sustainability _hint = null ;
document . querySelectorAll ( 'button, span.a-button-text, a' ) . forEach ( ( el ) => {
const tx = norm ( el . innerText ) ;
if ( ! tx || tx . length > 90 ) return ;
if (
/\d+\s*个.*可持续发展|可持续发展特性/ . test ( tx ) ||
/\d+\s+sustainability features?/i . test ( tx )
) {
sustainability _hint = tx ;
}
} ) ;
const bullets = [ ] ;
document . querySelectorAll ( '#feature-bullets ul li span.a-list-item' ) . forEach ( ( el ) => {
const t = norm ( el . textContent ) ;
if ( t ) bullets . push ( t ) ;
} ) ;
const variants = { } ;
document . querySelectorAll ( '[id^="variation_"]' ) . forEach ( ( block ) => {
const key = block . id . replace ( /^variation_/ , '' ) || block . id ;
const sel =
block . querySelector ( '.selection' ) ||
block . querySelector ( '.a-button-selected .a-button-text' ) ||
block . querySelector ( '[class*="dropdown"]' ) ;
if ( sel ) {
const v = norm ( sel . textContent ) ;
if ( v ) variants [ key ] = v ;
}
} ) ;
let delivery _hint = null ;
const del = document . querySelector (
'#deliveryBlockMessage, #mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE' ,
) ;
if ( del ) delivery _hint = norm ( del . innerText ) . slice ( 0 , 500 ) ;
let sku = null ;
Object . keys ( product _info ) . forEach ( ( k ) => {
if ( /^sku$/i . test ( k ) || /item model|型号|part number|制造商型号/i . test ( k ) ) sku = product _info [ k ] ;
} ) ;
const images = [ ] ;
const seen _img = new Set ( ) ;
function add _img ( u ) {
if ( ! u || seen _img . has ( u ) ) return ;
if ( ! /media-amazon|images-amazon|ssl-images/i . test ( u ) ) return ;
seen _img . add ( u ) ;
images . push ( u . split ( '?' ) [ 0 ] ) ;
}
const land = document . querySelector ( '#landingImage, #imgBlkFront' ) ;
if ( land ) {
const dyn = land . getAttribute ( 'data-a-dynamic-image' ) ;
if ( dyn ) {
try {
const o = JSON . parse ( dyn ) ;
Object . keys ( o ) . forEach ( add _img ) ;
} catch ( _ ) { }
}
if ( land . src ) add _img ( land . src ) ;
}
document . querySelectorAll ( '#altImages img, #imageBlock_feature_div img, #ivImages img' ) . forEach ( ( img ) => {
add _img ( img . src || img . getAttribute ( 'data-src' ) ) ;
} ) ;
const main _image = images . length ? images [ 0 ] : land ? . src || null ;
return {
stage : 'detail' ,
asin ,
title ,
price ,
brand _line ,
brand _store _url ,
rating _stars ,
review _count _text ,
ac _badge ,
social _proof ,
bestseller _hint ,
sustainability _hint ,
product _info ,
detail _extra _lines ,
bullets ,
variants ,
delivery _hint ,
sku ,
images ,
main _image ,
url : location . href ,
} ;
}
export function injected _amazon _product _reviews ( params ) {
const raw = params && params . limit != null ? Number ( params . limit ) : 50 ;
const limit = Number . isFinite ( raw ) ? Math . max ( 1 , Math . min ( 100 , Math . floor ( raw ) ) ) : 50 ;
const nodes = document . querySelectorAll ( '[data-hook="review"]' ) ;
const items = [ ] ;
nodes . forEach ( ( r ) => {
if ( items . length >= limit ) return ;
const author _el = r . querySelector ( '.a-profile-name' ) ;
const author = author _el ? author _el . textContent . trim ( ) : null ;
const title _el = r . querySelector ( '[data-hook="review-title"]' ) ;
const title = title _el ? title _el . innerText . replace ( /\s+/g , ' ' ) . trim ( ) : null ;
const body _el = r . querySelector ( '[data-hook="review-body"]' ) ;
const body = body _el ? body _el . innerText . replace ( /\s+/g , ' ' ) . trim ( ) : null ;
const rating _el = r . querySelector ( '[data-hook="review-star-rating"]' ) ;
const rating _text = rating _el ? rating _el . textContent . trim ( ) : null ;
const date _el = r . querySelector ( '[data-hook="review-date"]' ) ;
const date = date _el ? date _el . textContent . trim ( ) : null ;
const cr = r . querySelector ( '[id^="customer_review-"]' ) ;
const review _id = r . id || ( cr && cr . id ? cr . id . replace ( 'customer_review-' , '' ) : null ) ;
items . push ( { index : items . length + 1 , review _id , author , rating _text , title , date , body } ) ;
} ) ;
return { stage : 'reviews' , limit , total : items . length , items , url : location . href } ;
}
// ---------- 后台:搜索列表 ----------
const AMAZON _ZH _HOME _URL = 'https://www.amazon.com/-/zh/ref=nav_logo' ;
/** 英文搜索列表 URL 模板(与 language=en_US 一致,仅替换 k) */
function build _amazon _search _url _en ( keyword ) {
const u = new URL ( 'https://www.amazon.com/s' ) ;
u . searchParams . set ( 'k' , keyword ) ;
u . searchParams . set ( 'language' , 'en_US' ) ;
u . searchParams . set ( 'crid' , '35M31MY4FQI' ) ;
u . searchParams . set ( 'sprefix' , ',aps,398' ) ;
u . searchParams . set ( 'ref' , 'nb_sb_ss_recent_1_0_recent' ) ;
return u . toString ( ) ;
}
function is _amazon _search _list _url ( tab _url ) {
if ( ! tab _url || typeof tab _url !== 'string' ) return false ;
if ( ! tab _url . includes ( 'amazon.com' ) ) return false ;
if ( ! /\/s(\?|\/)/ . test ( tab _url ) ) return false ;
return tab _url . includes ( 'k=' ) || tab _url . includes ( 'keywords=' ) || tab _url . includes ( 'field-keywords' ) ;
}
function wait _until _search _list _url ( tab _id , timeout _ms ) {
const deadline = Date . now ( ) + ( timeout _ms || 45000 ) ;
return new Promise ( ( resolve , reject ) => {
const tick = ( ) => {
chrome . tabs . get ( tab _id , ( tab ) => {
if ( chrome . runtime . lastError ) return reject ( new Error ( chrome . runtime . lastError . message ) ) ;
const u = tab && tab . url ? tab . url : '' ;
if ( is _amazon _search _list _url ( u ) ) return resolve ( u ) ;
if ( Date . now ( ) >= deadline ) return reject ( new Error ( '等待首页搜索跳转到列表页超时' ) ) ;
setTimeout ( tick , 400 ) ;
} ) ;
} ;
tick ( ) ;
} ) ;
}
function wait _tab _complete ( tab _id ) {
return new Promise ( ( resolve _wait , reject _wait ) => {
chrome . tabs . get ( tab _id , ( tab0 ) => {
if ( ! chrome . runtime . lastError && tab0 && tab0 . status === 'complete' ) {
return resolve _wait ( tab0 ) ;
}
const on _updated = ( updated _tab _id , change _info ) => {
if ( updated _tab _id !== tab _id ) return ;
if ( change _info . status !== 'complete' ) return ;
chrome . tabs . onUpdated . removeListener ( on _updated ) ;
resolve _wait ( true ) ;
} ;
chrome . tabs . onUpdated . addListener ( on _updated ) ;
setTimeout ( ( ) => {
chrome . tabs . onUpdated . removeListener ( on _updated ) ;
reject _wait ( new Error ( '等待页面加载超时' ) ) ;
} , 45000 ) ;
} ) ;
} ) ;
}
export function amazon _search _list ( data , sendResponse ) {
return new Promise ( async ( resolve , reject ) => {
const category _keyword = ( data && data . category _keyword ) ? String ( data . category _keyword ) . trim ( ) : '' ;
const sort _by = ( data && data . sort _by ) ? String ( data . sort _by ) . trim ( ) : '' ;
const category _keyword = data && data . category _keyword ? String ( data . category _keyword ) . trim ( ) : '' ;
const sort _by = data && data . sort _by ? String ( data . sort _by ) . trim ( ) : '' ;
const limit = ( ( ) => {
const n = data && Object . prototype . hasOwnProperty . call ( data , 'limit' ) ? Number ( data . limit ) : 100 ;
if ( ! Number . isFinite ( n ) ) return 100 ;
return Math . max ( 1 , Math . min ( 200 , Math . floor ( n ) ) ) ;
} ) ( ) ;
const keyword = c ategory _keyword || '野餐包 ' ;
const keyword = category _keyword || 'picnic bag' ;
const search _url _custom = d ata && data . search _url ? String ( data . search _url ) . trim ( ) : '' ;
const entry = data && data . entry ? String ( data . entry ) . trim ( ) : 'direct' ;
const sort _map = {
featured : 'relevanceblender' ,
review : 'review-rank' ,
@@ -24,65 +399,51 @@ export function amazon_search_list(data, sendResponse) {
bestseller : 'exact-aware-popularity-rank' ,
} ;
const sort _s = Object . prototype . hasOwnProperty . call ( sort _map , sort _by ) ? sort _map [ sort _by ] : '' ;
// 内置 URL, 只替换 k / s 参数,其它参数保持一致
const default _url = ( ( ) => {
const u = new URL ( 'https://www.amazon.com/s' ) ;
u . searchParams . set ( 'k' , keyword ) ;
u . searchParams . set ( '__mk_zh_CN' , '亚马逊网站' ) ;
u . searchParams . set ( 'crid' , 'ZKNCI4U8BBAP' ) ;
u . searchParams . set ( 'ref' , 'nb_sb_noss' ) ;
if ( sort _s ) u . searchParams . set ( 's' , sort _s ) ;
else u . searchParams . delete ( 's' ) ;
return u . toString ( ) ;
} ) ( ) ;
const url = default _url ;
let times = 0 ;
const send _action = ( action , payload ) => {
if ( typeof sendResponse === 'function' ) {
sendResponse ( { action , data : payload } ) ;
sendResponse . log && sendResponse . log ( payload ) ;
}
} ;
const tab _task = create _tab _task ( url )
const tab _task = create _tab _task ( AMAZON _ZH _HOME _URL )
. set _latest ( false )
. set _bounds ( { top : 20 , left : 20 , width : 1440 , height : 900 } )
. set _target ( '__amazon_search_list' ) ;
let url = AMAZON _ZH _HOME _URL ;
try {
const tab = await tab _task . open _async ( ) ;
const wait _ tab_complete = ( tab _id ) => new Promise ( ( resolve _wait , reject _wait ) => {
const on _updated = ( updated _tab _id , change _info , updated _tab ) => {
if ( updated _tab _id !== tab _id ) return ;
if ( change _info . status !== 'complete' ) return ;
chrome . tabs . onUpdated . removeListener ( on _updated ) ;
resolve _wait ( updated _tab ) ;
} ;
chrome . tabs . onUpdated . addListener ( on _updated ) ;
setTimeout ( ( ) => {
chrome . tabs . onUpdated . removeListener ( on _updated ) ;
reject _wait ( new Error ( '等待页面加载超时' ) ) ;
} , 45000 ) ;
} ) ;
const unique _map = new Map ( ) ;
let next _url = url ;
let page = 1 ;
while ( next _url && unique _map . size < limit ) {
await wait _tab _complete ( tab . id ) ;
const home _ret = a wait tab . execute _script ( injected _amazon _homepage _search , [ { keyword } ] , 'document_idle' ) ;
const home _ok = Array . isArray ( home _ret ) ? home _ret [ 0 ] : home _ret ;
if ( ! home _ok || ! home _ok . ok ) {
throw new Error ( ( home _ok && home _ok . error ) || '首页搜索提交失败' ) ;
}
url = await wait _until _search _list _url ( tab . id , 45000 ) ;
await wait _tab _complete ( tab . id ) ;
if ( sort _s ) {
const u = new URL ( url ) ;
u . searchParams . set ( 's' , sort _s ) ;
url = u . toString ( ) ;
await new Promise ( ( resolve _nav , reject _nav ) => {
chrome . tabs . update ( tab . id , { url : next _url , active : true } , ( ) => {
chrome . tabs . update ( tab . id , { url , active : true } , ( ) => {
if ( chrome . runtime . lastError ) return reject _nav ( new Error ( chrome . runtime . lastError . message ) ) ;
resolve _nav ( true ) ;
} ) ;
} ) ;
await wait _tab _complete ( tab . id ) ;
}
const unique _map = new Map ( ) ;
let next _url = url ;
for ( let page = 1 ; page <= 10 && unique _map . size < limit ; page += 1 ) {
if ( page > 1 ) {
await new Promise ( ( resolve _nav , reject _nav ) => {
chrome . tabs . update ( tab . id , { url : next _url , active : true } , ( ) => {
if ( chrome . runtime . lastError ) return reject _nav ( new Error ( chrome . runtime . lastError . message ) ) ;
resolve _nav ( true ) ;
} ) ;
} ) ;
await wait _tab _complete ( tab . id ) ;
}
const injected _result _list = await tab . execute _script (
injected _amazon _search _list ,
[ { url : next _url , category _keyword , sort _by } ] ,
@@ -90,67 +451,141 @@ export function amazon_search_list(data, sendResponse) {
) ;
const injected _result = Array . isArray ( injected _result _list ) ? injected _result _list [ 0 ] : null ;
const items = injected _result && Array . isArray ( injected _result . items ) ? injected _result . items : [ ] ;
items . forEach ( ( it ) => {
const k = ( it && ( it . asin || it . url ) ) ? String ( it . asin || it . url ) : null ;
const k = it && ( it . asin || it . url ) ? String ( it . asin || it . url ) : null ;
if ( ! k ) return ;
if ( ! unique _map . has ( k ) ) unique _map . set ( k , it ) ;
} ) ;
if ( unique _map . size >= limit ) break ;
next _url = injected _result && injected _result . next _url ? String ( injected _result . next _url ) : null ;
page += 1 ;
if ( page > 10 ) break ; // 防止死循环(默认 100 条一般 <= 3 页)
if ( ! next _url ) break ;
}
const injected _result = {
const list _result = {
stage : 'list' ,
limit ,
total : unique _map . size ,
items : Array . from ( unique _map . values ( ) ) . slice ( 0 , limit ) ,
} ;
const result = {
code : 0 ,
status : true ,
message : 'ok' ,
data : { tab _id : tab . id , url , category _keyword , sort _by : sort _by || 'featured' , limit , result : injected _result } ,
data : { tab _id : tab . id , url , category _keyword , sort _by : sort _by || 'featured' , limit , result : list _result } ,
} ;
send _action ( 'amazon_search_list' , result ) ;
resolve ( { tab _id : tab . id , url , category _keyword , sort _by : sort _by || 'featured' , limit , result : injected _result } ) ;
// 成功后关闭打开的 tab( 同时会关闭 popup window)
resolve ( { tab _id : tab . id , url , category _keyword , sort _by : sort _by || 'featured' , limit , result : list _result } ) ;
tab . remove ( 0 ) ;
} catch ( err ) {
const result = {
send _action ( 'amazon_search_list' , {
code : 30 ,
status : false ,
message : ( err && err . message ) || String ( err ) ,
data : null ,
documentURI : url ,
} ;
send _action ( 'amazon_search_list' , result ) ;
documentURI : url || AMAZON _ZH _HOME _URL ,
} ) ;
reject ( err ) ;
}
} ) ;
}
amazon _search _list . desc = 'Amazon 搜索结果列表抓取( DOM 解析) ' ;
amazon _search _list . desc = 'Amazon 搜索列表:先打开中文首页,搜索框输入类目并搜索,再分页抓取 ' ;
amazon _search _list . params = {
category _keyword : {
type : 'string' ,
desc : '分类关键词' ,
default : '野餐包' ,
} ,
category _keyword : { type : 'string' , desc : '类目关键词(在首页搜索框输入后点搜索,进入列表再抓)' , default : '野餐包' } ,
sort _by : {
type : 'string' ,
desc : '排序方式: featured(精选) / price_asc(价格从低到高) / price_desc(价格从高到低) / review(平均买家评论数) / newest(最新商品) / bestseller(畅销商品) ' ,
desc : '排序方式: featured / price_asc / price_desc / review / newest / bestseller' ,
default : 'featured' ,
} ,
limit : {
type : 'number' ,
desc : '抓取数量上限(默认 100, 最大 200) ' ,
default : 100 ,
limit : { type : 'number' , desc : '抓取数量上限(默认 100, 最大 200) ' , default : 100 } ,
} ;
// ---------- 后台:商品详情 / 评论 ----------
function normalize _product _url ( u ) {
let s = u ? String ( u ) . trim ( ) : '' ;
if ( ! s ) throw new Error ( '缺少 product_url' ) ;
if ( s . startsWith ( '//' ) ) s = 'https:' + s ;
if ( ! /^https?:\/\//i . test ( s ) ) s = 'https://' + s ;
const url _obj = new URL ( s ) ;
if ( ! url _obj . hostname . includes ( 'amazon.' ) ) throw new Error ( 'product_url 需为亚马逊域名' ) ;
if ( ! /\/dp\/[A-Z0-9]{10}/i . test ( url _obj . pathname ) && ! /\/gp\/product\/[A-Z0-9]{10}/i . test ( url _obj . pathname ) ) {
throw new Error ( 'product_url 需包含 /dp/ASIN 或 /gp/product/ASIN' ) ;
}
return url _obj . toString ( ) ;
}
function run _pdp _action ( product _url , injected _fn , inject _args , action _name , sendResponse ) {
const send _action = ( action , payload ) => {
if ( typeof sendResponse === 'function' ) {
sendResponse ( { action , data : payload } ) ;
sendResponse . log && sendResponse . log ( payload ) ;
}
} ;
return new Promise ( async ( resolve , reject ) => {
let url = product _url ;
try {
url = normalize _product _url ( product _url ) ;
} catch ( e ) {
send _action ( action _name , { code : 10 , status : false , message : e . message , data : null } ) ;
return reject ( e ) ;
}
const tab _task = create _tab _task ( url ) . set _bounds ( { top : 20 , left : 20 , width : 1280 , height : 900 } ) ;
try {
const tab = await tab _task . open _async ( ) ;
await wait _tab _complete ( tab . id ) ;
const raw _list = await tab . execute _script ( injected _fn , inject _args || [ ] , 'document_idle' ) ;
const result = Array . isArray ( raw _list ) ? raw _list [ 0 ] : raw _list ;
send _action ( action _name , {
code : 0 ,
status : true ,
message : 'ok' ,
data : { tab _id : tab . id , product _url : url , result } ,
} ) ;
resolve ( { tab _id : tab . id , product _url : url , result } ) ;
tab . remove ( 0 ) ;
} catch ( err ) {
send _action ( action _name , {
code : 30 ,
status : false ,
message : ( err && err . message ) || String ( err ) ,
data : null ,
documentURI : url ,
} ) ;
reject ( err ) ;
}
} ) ;
}
export function amazon _product _detail ( data , sendResponse ) {
return run _pdp _action ( data && data . product _url , injected _amazon _product _detail , [ ] , 'amazon_product_detail' , sendResponse ) ;
}
amazon _product _detail . desc = 'Amazon 商品详情( 标题、价格、品牌、SKU、要点、变体、配送摘要等) ' ;
amazon _product _detail . params = {
product _url : {
type : 'string' ,
desc : '商品详情页完整 URL( 含 /dp/ASIN) ' ,
default : 'https://www.amazon.com/-/zh/dp/B0B56CHMSC' ,
} ,
} ;
export function amazon _product _reviews ( data , sendResponse ) {
const limit = data && data . limit != null ? Number ( data . limit ) : 50 ;
return run _pdp _action (
data && data . product _url ,
injected _amazon _product _reviews ,
[ { limit } ] ,
'amazon_product_reviews' ,
sendResponse ,
) ;
}
amazon _product _reviews . desc = 'Amazon 商品页买家评论(详情页 [data-hook=review],条数受页面展示限制)' ;
amazon _product _reviews . params = {
product _url : {
type : 'string' ,
desc : '商品详情页完整 URL' ,
default : 'https://www.amazon.com/-/zh/dp/B0B56CHMSC' ,
} ,
limit : { type : 'number' , desc : '最多条数(默认 50, 上限 100) ' , default : 50 } ,
} ;