1
This commit is contained in:
@@ -58,7 +58,9 @@ export function get_app_config() {
|
|||||||
chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe'),
|
chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe'),
|
||||||
log_invoke_action: get_bool('LOG_INVOKE_ACTION', true),
|
log_invoke_action: get_bool('LOG_INVOKE_ACTION', true),
|
||||||
auto_close_browser: get_bool('AUTO_CLOSE_BROWSER', true),
|
auto_close_browser: get_bool('AUTO_CLOSE_BROWSER', true),
|
||||||
enable_stealth: get_bool('ENABLE_STEALTH', true)
|
enable_stealth: get_bool('ENABLE_STEALTH', true),
|
||||||
|
log_sql: get_bool('LOG_SQL', false),
|
||||||
|
log_sql_benchmark: get_bool('LOG_SQL_BENCHMARK', false)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,18 @@ export function get_sequelize_options() {
|
|||||||
password: cfg.mysql.password,
|
password: cfg.mysql.password,
|
||||||
database: cfg.mysql.database,
|
database: cfg.mysql.database,
|
||||||
dialect: 'mysql',
|
dialect: 'mysql',
|
||||||
logging: false,
|
benchmark: cfg.crawler.log_sql_benchmark === true,
|
||||||
|
logging: cfg.crawler.log_sql === true
|
||||||
|
? (sql, timing_ms) => {
|
||||||
|
if (cfg.crawler.log_sql_benchmark === true && typeof timing_ms === 'number') {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log('[sql]', { timing_ms, sql });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log('[sql]', sql);
|
||||||
|
}
|
||||||
|
: false,
|
||||||
define: {
|
define: {
|
||||||
underscored: true,
|
underscored: true,
|
||||||
timestamps: true,
|
timestamps: true,
|
||||||
|
|||||||
@@ -6,26 +6,24 @@ export function define_amazon_product(sequelize) {
|
|||||||
{
|
{
|
||||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||||
url: { type: DataTypes.TEXT, allowNull: false },
|
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||||
title: { type: DataTypes.TEXT, allowNull: true },
|
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
sku: { type: DataTypes.STRING(256), allowNull: true },
|
sku_json: { type: DataTypes.JSON, allowNull: true, comment: 'sku 结构化 JSON,如 {color:[], size:[]}' },
|
||||||
sku_color: { type: DataTypes.STRING(128), allowNull: true },
|
brand_line: { type: DataTypes.STRING(512), allowNull: true },
|
||||||
sku_size: { type: DataTypes.STRING(128), allowNull: true },
|
brand_store_url: { type: DataTypes.STRING(2048), allowNull: true },
|
||||||
brand_line: { type: DataTypes.TEXT, allowNull: true },
|
|
||||||
brand_store_url: { type: DataTypes.TEXT, allowNull: true },
|
|
||||||
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
|
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
bestseller_hint: { type: DataTypes.TEXT, allowNull: true },
|
bestseller_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||||
delivery_hint: { type: DataTypes.TEXT, allowNull: true },
|
delivery_hint: { type: DataTypes.STRING(512), allowNull: true },
|
||||||
social_proof: { type: DataTypes.TEXT, allowNull: true },
|
social_proof: { type: DataTypes.STRING(256), allowNull: true },
|
||||||
sustainability_hint: { type: DataTypes.TEXT, allowNull: true },
|
sustainability_hint: { type: DataTypes.STRING(256), allowNull: true },
|
||||||
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
|
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
main_image: { type: DataTypes.TEXT, allowNull: true },
|
main_image: { type: DataTypes.STRING(2048), allowNull: true },
|
||||||
images_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
images_json: { type: DataTypes.JSON, allowNull: true },
|
||||||
bullets_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
bullets_json: { type: DataTypes.JSON, allowNull: true },
|
||||||
product_info_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
product_info_json: { type: DataTypes.JSON, allowNull: true },
|
||||||
detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true }
|
detail_extra_lines_json: { type: DataTypes.JSON, allowNull: true }
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
tableName: 'amazon_product',
|
tableName: 'amazon_product',
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ export function define_amazon_review(sequelize) {
|
|||||||
{
|
{
|
||||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
asin: { type: DataTypes.STRING(32), allowNull: true },
|
asin: { type: DataTypes.STRING(32), allowNull: true },
|
||||||
url: { type: DataTypes.TEXT, allowNull: false },
|
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||||
review_id: { type: DataTypes.STRING(64), allowNull: false },
|
review_id: { type: DataTypes.STRING(64), allowNull: false },
|
||||||
author: { type: DataTypes.STRING(256), allowNull: true },
|
author: { type: DataTypes.STRING(256), allowNull: true },
|
||||||
title: { type: DataTypes.TEXT, allowNull: true },
|
title: { type: DataTypes.STRING(512), allowNull: true },
|
||||||
body: { type: DataTypes.TEXT('long'), allowNull: true },
|
body: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
review_date: { type: DataTypes.STRING(128), allowNull: true },
|
review_date: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ export function define_amazon_search_item(sequelize) {
|
|||||||
{
|
{
|
||||||
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
asin: { type: DataTypes.STRING(32), allowNull: false },
|
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||||
url: { type: DataTypes.TEXT, allowNull: false },
|
url: { type: DataTypes.STRING(2048), allowNull: false },
|
||||||
title: { type: DataTypes.TEXT, allowNull: true },
|
title: { type: DataTypes.STRING(1024), allowNull: true },
|
||||||
price: { type: DataTypes.STRING(64), allowNull: true },
|
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
rating: { type: DataTypes.FLOAT, allowNull: true },
|
rating: { type: DataTypes.FLOAT, allowNull: true },
|
||||||
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { execute_action_and_record } from '../../task_executor.js';
|
import { execute_action_and_record } from '../../task_executor.js';
|
||||||
import { map_limit, sleep_ms } from '../flow_utils.js';
|
import { sleep_ms } from '../flow_utils.js';
|
||||||
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
|
import { amazon_product, amazon_search_item, amazon_review } from '../../../models/index.js';
|
||||||
import { safe_json_stringify } from '../../json_utils.js';
|
import { safe_json_stringify } from '../../json_utils.js';
|
||||||
import { close_browser } from '../../puppeteer/puppeteer_runner.js';
|
import { close_browser } from '../../puppeteer/puppeteer_runner.js';
|
||||||
@@ -51,14 +51,15 @@ async function persist_detail(detail_res_raw) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const sku_is_object = detail_res && detail_res.sku && typeof detail_res.sku === 'object' && !Array.isArray(detail_res.sku);
|
||||||
|
|
||||||
await amazon_product.upsert({
|
await amazon_product.upsert({
|
||||||
asin,
|
asin,
|
||||||
url: detail_res.url || '',
|
url: detail_res.url || '',
|
||||||
title: detail_res.title || null,
|
title: detail_res.title || null,
|
||||||
price: detail_res.price || null,
|
price: detail_res.price || null,
|
||||||
sku: detail_res.sku || null,
|
sku: sku_is_object ? null : (detail_res.sku || null),
|
||||||
sku_color: detail_res.sku_color || null,
|
sku_json: sku_is_object ? detail_res.sku : null,
|
||||||
sku_size: detail_res.sku_size || null,
|
|
||||||
brand_line: detail_res.brand_line || null,
|
brand_line: detail_res.brand_line || null,
|
||||||
brand_store_url: detail_res.brand_store_url || null,
|
brand_store_url: detail_res.brand_store_url || null,
|
||||||
ac_badge: detail_res.ac_badge || null,
|
ac_badge: detail_res.ac_badge || null,
|
||||||
@@ -69,10 +70,10 @@ async function persist_detail(detail_res_raw) {
|
|||||||
rating_stars: detail_res.rating_stars || null,
|
rating_stars: detail_res.rating_stars || null,
|
||||||
review_count_text: detail_res.review_count_text || null,
|
review_count_text: detail_res.review_count_text || null,
|
||||||
main_image: detail_res.main_image || null,
|
main_image: detail_res.main_image || null,
|
||||||
images_json: safe_json_stringify(detail_res.images || []),
|
images_json: Array.isArray(detail_res.images) ? detail_res.images : null,
|
||||||
bullets_json: safe_json_stringify(detail_res.bullets || []),
|
bullets_json: Array.isArray(detail_res.bullets) ? detail_res.bullets : null,
|
||||||
product_info_json: safe_json_stringify(detail_res.product_info || {}),
|
product_info_json: detail_res.product_info && typeof detail_res.product_info === 'object' ? detail_res.product_info : null,
|
||||||
detail_extra_lines_json: safe_json_stringify(detail_res.detail_extra_lines || [])
|
detail_extra_lines_json: Array.isArray(detail_res.detail_extra_lines) ? detail_res.detail_extra_lines : null
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,26 +1,3 @@
|
|||||||
export async function sleep_ms(ms) {
|
export async function sleep_ms(ms) {
|
||||||
await new Promise((resolve) => setTimeout(resolve, ms));
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function map_limit(items, worker) {
|
|
||||||
const list = Array.isArray(items) ? items : [];
|
|
||||||
|
|
||||||
const res = new Array(list.length);
|
|
||||||
let idx = 0;
|
|
||||||
|
|
||||||
async function run_one() {
|
|
||||||
while (idx < list.length) {
|
|
||||||
const cur = idx;
|
|
||||||
idx += 1;
|
|
||||||
res[cur] = await worker(list[cur], cur);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const runners = [];
|
|
||||||
for (let i = 0; i < list.length; i += 1) {
|
|
||||||
runners.push(run_one());
|
|
||||||
}
|
|
||||||
|
|
||||||
await Promise.all(runners);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -31,11 +31,9 @@ async function run_cron_task(task) {
|
|||||||
export async function start_all_cron_tasks() {
|
export async function start_all_cron_tasks() {
|
||||||
for (const task of cron_task_list) {
|
for (const task of cron_task_list) {
|
||||||
// const job = cron.schedule(task.cron_expression, async () => {
|
// const job = cron.schedule(task.cron_expression, async () => {
|
||||||
try {
|
|
||||||
await run_cron_task(task);
|
await run_cron_task(task);
|
||||||
} catch (err) {
|
|
||||||
// action 内部已记录 crawl_run_record;flow 内部 action 也会记录
|
|
||||||
}
|
|
||||||
// });
|
// });
|
||||||
|
|
||||||
// cron_jobs.push(job);
|
// cron_jobs.push(job);
|
||||||
|
|||||||
Reference in New Issue
Block a user