1
This commit is contained in:
@@ -58,7 +58,9 @@ export function get_app_config() {
|
||||
chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe'),
|
||||
log_invoke_action: get_bool('LOG_INVOKE_ACTION', true),
|
||||
auto_close_browser: get_bool('AUTO_CLOSE_BROWSER', true),
|
||||
enable_stealth: get_bool('ENABLE_STEALTH', true)
|
||||
enable_stealth: get_bool('ENABLE_STEALTH', true),
|
||||
log_sql: get_bool('LOG_SQL', false),
|
||||
log_sql_benchmark: get_bool('LOG_SQL_BENCHMARK', false)
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -10,7 +10,18 @@ export function get_sequelize_options() {
|
||||
password: cfg.mysql.password,
|
||||
database: cfg.mysql.database,
|
||||
dialect: 'mysql',
|
||||
logging: false,
|
||||
benchmark: cfg.crawler.log_sql_benchmark === true,
|
||||
logging: cfg.crawler.log_sql === true
|
||||
? (sql, timing_ms) => {
|
||||
if (cfg.crawler.log_sql_benchmark === true && typeof timing_ms === 'number') {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log('[sql]', { timing_ms, sql });
|
||||
return;
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.log('[sql]', sql);
|
||||
}
|
||||
: false,
|
||||
define: {
|
||||
underscored: true,
|
||||
timestamps: true,
|
||||
|
||||
@@ -6,26 +6,24 @@ export function define_amazon_product(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||
sku: { type: DataTypes.STRING(256), allowNull: true },
|
||||
sku_color: { type: DataTypes.STRING(128), allowNull: true },
|
||||
sku_size: { type: DataTypes.STRING(128), allowNull: true },
|
||||
brand_line: { type: DataTypes.TEXT, allowNull: true },
|
||||
brand_store_url: { type: DataTypes.TEXT, allowNull: true },
|
||||
sku_json: { type: DataTypes.JSON, allowNull: true, comment: 'sku 结构化 JSON,如 {color:[], size:[]}' },
|
||||
brand_line: { type: DataTypes.STRING(512), allowNull: true },
|
||||
brand_store_url: { type: DataTypes.STRING(2048), allowNull: true },
|
||||
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
|
||||
bestseller_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
delivery_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
social_proof: { type: DataTypes.TEXT, allowNull: true },
|
||||
sustainability_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||
bestseller_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||
delivery_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||
social_proof: { type: DataTypes.STRING(256), allowNull: true },
|
||||
sustainability_hint: { type: DataTypes.STRING(256), allowNull: true },
|
||||
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
|
||||
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
main_image: { type: DataTypes.TEXT, allowNull: true },
|
||||
images_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
bullets_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
product_info_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true }
|
||||
main_image: { type: DataTypes.STRING(2048), allowNull: true },
|
||||
images_json: { type: DataTypes.JSON, allowNull: true },
|
||||
bullets_json: { type: DataTypes.JSON, allowNull: true },
|
||||
product_info_json: { type: DataTypes.JSON, allowNull: true },
|
||||
detail_extra_lines_json: { type: DataTypes.JSON, allowNull: true }
|
||||
},
|
||||
{
|
||||
tableName: 'amazon_product',
|
||||
|
||||
@@ -6,10 +6,10 @@ export function define_amazon_review(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: true },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
review_id: { type: DataTypes.STRING(64), allowNull: false },
|
||||
author: { type: DataTypes.STRING(256), allowNull: true },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
title: { type: DataTypes.STRING(512), allowNull: true },
|
||||
body: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
review_date: { type: DataTypes.STRING(128), allowNull: true },
|
||||
|
||||
@@ -6,8 +6,8 @@ export function define_amazon_search_item(sequelize) {
|
||||
{
|
||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||
url: { type: DataTypes.TEXT, allowNull: false },
|
||||
title: { type: DataTypes.TEXT, allowNull: true },
|
||||
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||
rating: { type: DataTypes.FLOAT, allowNull: true },
|
||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { execute_action_and_record } from '../../task_executor.js';
|
||||
import { map_limit, sleep_ms } from '../flow_utils.js';
|
||||
import { sleep_ms } from '../flow_utils.js';
|
||||
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
|
||||
import { safe_json_stringify } from '../../json_utils.js';
|
||||
import { close_browser } from '../../puppeteer/puppeteer_runner.js';
|
||||
@@ -51,14 +51,15 @@ async function persist_detail(detail_res_raw) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sku_is_object = detail_res && detail_res.sku && typeof detail_res.sku === 'object' && !Array.isArray(detail_res.sku);
|
||||
|
||||
await amazon_product.upsert({
|
||||
asin,
|
||||
url: detail_res.url || '',
|
||||
title: detail_res.title || null,
|
||||
price: detail_res.price || null,
|
||||
sku: detail_res.sku || null,
|
||||
sku_color: detail_res.sku_color || null,
|
||||
sku_size: detail_res.sku_size || null,
|
||||
sku: sku_is_object ? null : (detail_res.sku || null),
|
||||
sku_json: sku_is_object ? detail_res.sku : null,
|
||||
brand_line: detail_res.brand_line || null,
|
||||
brand_store_url: detail_res.brand_store_url || null,
|
||||
ac_badge: detail_res.ac_badge || null,
|
||||
@@ -69,10 +70,10 @@ async function persist_detail(detail_res_raw) {
|
||||
rating_stars: detail_res.rating_stars || null,
|
||||
review_count_text: detail_res.review_count_text || null,
|
||||
main_image: detail_res.main_image || null,
|
||||
images_json: safe_json_stringify(detail_res.images || []),
|
||||
bullets_json: safe_json_stringify(detail_res.bullets || []),
|
||||
product_info_json: safe_json_stringify(detail_res.product_info || {}),
|
||||
detail_extra_lines_json: safe_json_stringify(detail_res.detail_extra_lines || [])
|
||||
images_json: Array.isArray(detail_res.images) ? detail_res.images : null,
|
||||
bullets_json: Array.isArray(detail_res.bullets) ? detail_res.bullets : null,
|
||||
product_info_json: detail_res.product_info && typeof detail_res.product_info === 'object' ? detail_res.product_info : null,
|
||||
detail_extra_lines_json: Array.isArray(detail_res.detail_extra_lines) ? detail_res.detail_extra_lines : null
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,26 +1,3 @@
|
||||
export async function sleep_ms(ms) {
|
||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export async function map_limit(items, worker) {
|
||||
const list = Array.isArray(items) ? items : [];
|
||||
|
||||
const res = new Array(list.length);
|
||||
let idx = 0;
|
||||
|
||||
async function run_one() {
|
||||
while (idx < list.length) {
|
||||
const cur = idx;
|
||||
idx += 1;
|
||||
res[cur] = await worker(list[cur], cur);
|
||||
}
|
||||
}
|
||||
|
||||
const runners = [];
|
||||
for (let i = 0; i < list.length; i += 1) {
|
||||
runners.push(run_one());
|
||||
}
|
||||
|
||||
await Promise.all(runners);
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -31,11 +31,9 @@ async function run_cron_task(task) {
|
||||
export async function start_all_cron_tasks() {
|
||||
for (const task of cron_task_list) {
|
||||
// const job = cron.schedule(task.cron_expression, async () => {
|
||||
try {
|
||||
|
||||
await run_cron_task(task);
|
||||
} catch (err) {
|
||||
// action 内部已记录 crawl_run_record;flow 内部 action 也会记录
|
||||
}
|
||||
|
||||
// });
|
||||
|
||||
// cron_jobs.push(job);
|
||||
|
||||
Reference in New Issue
Block a user