Compare commits
2 Commits
54341f0a0b
...
37e39d35b8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
37e39d35b8 | ||
|
|
5b671d320b |
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
server/node_modules/*
|
||||||
|
server/puppeteer_profile/*
|
||||||
|
.env
|
||||||
|
chrome-win/*
|
||||||
@@ -86,7 +86,7 @@ export function injected_amazon_switch_language(params) {
|
|||||||
|
|
||||||
export function injected_amazon_search_list(params) {
|
export function injected_amazon_search_list(params) {
|
||||||
const start_url = params && params.url ? String(params.url) : location.href;
|
const start_url = params && params.url ? String(params.url) : location.href;
|
||||||
const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : '';
|
const category_keyword = params && params.category_keyword ? String(params.category_keyword).trim() : '';
|
||||||
const sort_by = params && params.sort_by ? String(params.sort_by).trim() : '';
|
const sort_by = params && params.sort_by ? String(params.sort_by).trim() : '';
|
||||||
|
|
||||||
function pick_number(text) {
|
function pick_number(text) {
|
||||||
@@ -106,15 +106,15 @@ export function injected_amazon_search_list(params) {
|
|||||||
if (mm) return Math.round(parseFloat(mm[1]) * 1000000);
|
if (mm) return Math.round(parseFloat(mm[1]) * 1000000);
|
||||||
const digits = raw.replace(/[^\d]/g, '');
|
const digits = raw.replace(/[^\d]/g, '');
|
||||||
return digits ? Number(digits) : null;
|
return digits ? Number(digits) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function abs_url(href) {
|
function abs_url(href) {
|
||||||
try {
|
try {
|
||||||
return new URL(href, location.origin).toString();
|
return new URL(href, location.origin).toString();
|
||||||
} catch (_) {
|
} catch (_) {
|
||||||
return href;
|
return href;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function parse_asin_from_url(url) {
|
function parse_asin_from_url(url) {
|
||||||
if (!url || typeof url !== 'string') return null;
|
if (!url || typeof url !== 'string') return null;
|
||||||
@@ -161,20 +161,20 @@ export function injected_amazon_search_list(params) {
|
|||||||
const n = pick_int(review_count_text);
|
const n = pick_int(review_count_text);
|
||||||
return Number.isFinite(n) ? n : null;
|
return Number.isFinite(n) ? n : null;
|
||||||
})();
|
})();
|
||||||
items.push({
|
items.push({
|
||||||
index: idx + 1,
|
index: idx + 1,
|
||||||
asin: asin || parse_asin_from_url(item_url),
|
asin: asin || parse_asin_from_url(item_url),
|
||||||
title,
|
title,
|
||||||
url: item_url,
|
url: item_url,
|
||||||
price,
|
price,
|
||||||
rating,
|
rating,
|
||||||
rating_text,
|
rating_text,
|
||||||
review_count,
|
review_count,
|
||||||
review_count_text,
|
review_count_text,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
return items;
|
return items;
|
||||||
}
|
}
|
||||||
|
|
||||||
function pick_next_url() {
|
function pick_next_url() {
|
||||||
const a = document.querySelector('a.s-pagination-next');
|
const a = document.querySelector('a.s-pagination-next');
|
||||||
@@ -191,7 +191,7 @@ export function injected_amazon_search_list(params) {
|
|||||||
return {
|
return {
|
||||||
start_url,
|
start_url,
|
||||||
href: location.href,
|
href: location.href,
|
||||||
category_keyword,
|
category_keyword,
|
||||||
sort_by,
|
sort_by,
|
||||||
total: items.length,
|
total: items.length,
|
||||||
items,
|
items,
|
||||||
@@ -273,18 +273,90 @@ export function injected_amazon_product_detail() {
|
|||||||
if (t) bullets.push(t);
|
if (t) bullets.push(t);
|
||||||
});
|
});
|
||||||
|
|
||||||
const variants = {};
|
/** 变体 id 后缀是否为颜色 / 尺寸(仅提取这两项,不收集其它维度) */
|
||||||
document.querySelectorAll('[id^="variation_"]').forEach((block) => {
|
function is_sku_color_key(k) {
|
||||||
const key = block.id.replace(/^variation_/, '') || block.id;
|
const x = String(k).toLowerCase();
|
||||||
const sel =
|
return x === 'color' || x === 'color_name' || x.endsWith('_color_name');
|
||||||
block.querySelector('.selection') ||
|
}
|
||||||
block.querySelector('.a-button-selected .a-button-text') ||
|
function is_sku_size_key(k) {
|
||||||
block.querySelector('[class*="dropdown"]');
|
const x = String(k).toLowerCase();
|
||||||
if (sel) {
|
return x === 'size' || x === 'size_name' || x.endsWith('_size_name');
|
||||||
const v = norm(sel.textContent);
|
}
|
||||||
if (v) variants[key] = v;
|
|
||||||
|
/** 变体维度:颜色 / 尺寸 各为选项列表 */
|
||||||
|
const sku = { color: [], size: [] };
|
||||||
|
|
||||||
|
const twister_plus_root = document.querySelector('#twister-plus-desktop-twister-container');
|
||||||
|
|
||||||
|
if (twister_plus_root) {
|
||||||
|
const color_row = twister_plus_root.querySelector('#inline-twister-row-color_name');
|
||||||
|
if (color_row) {
|
||||||
|
const seen_c = new Set();
|
||||||
|
color_row.querySelectorAll('li').forEach((li) => {
|
||||||
|
const img = li.querySelector('img[alt]');
|
||||||
|
if (!img) return;
|
||||||
|
const v = norm(img.getAttribute('alt'));
|
||||||
|
if (v && !seen_c.has(v)) {
|
||||||
|
seen_c.add(v);
|
||||||
|
sku.color.push(v);
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
if (!sku.color.length) {
|
||||||
|
const dim = twister_plus_root.querySelector('#inline-twister-expanded-dimension-text-color_name');
|
||||||
|
const v = dim && norm(dim.textContent);
|
||||||
|
if (v) sku.color.push(v);
|
||||||
|
}
|
||||||
|
const size_row = twister_plus_root.querySelector('#inline-twister-row-size_name');
|
||||||
|
if (size_row) {
|
||||||
|
const seen_s = new Set();
|
||||||
|
size_row.querySelectorAll('li').forEach((li) => {
|
||||||
|
const el = li.querySelector('.swatch-title-text-display, .swatch-title-text-single-line');
|
||||||
|
const v = el ? norm(el.textContent) : null;
|
||||||
|
if (v && !seen_s.has(v)) {
|
||||||
|
seen_s.add(v);
|
||||||
|
sku.size.push(v);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (!sku.size.length) {
|
||||||
|
const dim = twister_plus_root.querySelector('#inline-twister-expanded-dimension-text-size_name');
|
||||||
|
const v = dim && norm(dim.textContent);
|
||||||
|
if (v) sku.size.push(v);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let cur_color = null;
|
||||||
|
let cur_size = null;
|
||||||
|
document.querySelectorAll('[id^="variation_"]').forEach((block) => {
|
||||||
|
const key = block.id.replace(/^variation_/, '') || block.id;
|
||||||
|
if (!is_sku_color_key(key) && !is_sku_size_key(key)) return;
|
||||||
|
const sel =
|
||||||
|
block.querySelector('.selection') ||
|
||||||
|
block.querySelector('.a-button-selected .a-button-text') ||
|
||||||
|
block.querySelector('[class*="dropdown"]');
|
||||||
|
if (!sel) return;
|
||||||
|
const v = norm(sel.textContent);
|
||||||
|
if (!v) return;
|
||||||
|
if (is_sku_color_key(key) && !cur_color) cur_color = v;
|
||||||
|
if (is_sku_size_key(key) && !cur_size) cur_size = v;
|
||||||
|
});
|
||||||
|
document.querySelectorAll('div.inline-twister-row[id^="inline-twister-row-"]').forEach((row) => {
|
||||||
|
const id = row.id || '';
|
||||||
|
const key = id.replace(/^inline-twister-row-/, '') || id;
|
||||||
|
if (!is_sku_color_key(key) && !is_sku_size_key(key)) return;
|
||||||
|
const selected =
|
||||||
|
row.querySelector('.a-button-selected .swatch-title-text-display') ||
|
||||||
|
row.querySelector('.a-button-selected .a-button-text') ||
|
||||||
|
row.querySelector('.a-button-selected');
|
||||||
|
if (!selected) return;
|
||||||
|
const v = norm(selected.textContent);
|
||||||
|
if (!v) return;
|
||||||
|
if (is_sku_color_key(key) && !cur_color) cur_color = v;
|
||||||
|
if (is_sku_size_key(key) && !cur_size) cur_size = v;
|
||||||
|
});
|
||||||
|
if (cur_color) sku.color.push(cur_color);
|
||||||
|
if (cur_size) sku.size.push(cur_size);
|
||||||
|
}
|
||||||
|
|
||||||
let delivery_hint = null;
|
let delivery_hint = null;
|
||||||
const del = document.querySelector(
|
const del = document.querySelector(
|
||||||
@@ -292,11 +364,6 @@ export function injected_amazon_product_detail() {
|
|||||||
);
|
);
|
||||||
if (del) delivery_hint = norm(del.innerText).slice(0, 500);
|
if (del) delivery_hint = norm(del.innerText).slice(0, 500);
|
||||||
|
|
||||||
let sku = null;
|
|
||||||
Object.keys(product_info).forEach((k) => {
|
|
||||||
if (/^sku$/i.test(k) || /item model|型号|part number|制造商型号/i.test(k)) sku = product_info[k];
|
|
||||||
});
|
|
||||||
|
|
||||||
const images = [];
|
const images = [];
|
||||||
const seen_img = new Set();
|
const seen_img = new Set();
|
||||||
function add_img(u) {
|
function add_img(u) {
|
||||||
@@ -338,7 +405,6 @@ export function injected_amazon_product_detail() {
|
|||||||
product_info,
|
product_info,
|
||||||
detail_extra_lines,
|
detail_extra_lines,
|
||||||
bullets,
|
bullets,
|
||||||
variants,
|
|
||||||
delivery_hint,
|
delivery_hint,
|
||||||
sku,
|
sku,
|
||||||
images,
|
images,
|
||||||
@@ -492,8 +558,8 @@ export function amazon_search_list(data, sendResponse) {
|
|||||||
chrome.tabs.update(tab.id, { url: next_url, active: true }, () => {
|
chrome.tabs.update(tab.id, { url: next_url, active: true }, () => {
|
||||||
if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message));
|
if (chrome.runtime.lastError) return reject_nav(new Error(chrome.runtime.lastError.message));
|
||||||
resolve_nav(true);
|
resolve_nav(true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
await wait_tab_complete(tab.id);
|
await wait_tab_complete(tab.id);
|
||||||
}
|
}
|
||||||
const injected_result_list = await tab.execute_script(
|
const injected_result_list = await tab.execute_script(
|
||||||
@@ -518,10 +584,10 @@ export function amazon_search_list(data, sendResponse) {
|
|||||||
total: unique_map.size,
|
total: unique_map.size,
|
||||||
items: Array.from(unique_map.values()).slice(0, limit),
|
items: Array.from(unique_map.values()).slice(0, limit),
|
||||||
};
|
};
|
||||||
const result = {
|
const result = {
|
||||||
code: 0,
|
code: 0,
|
||||||
status: true,
|
status: true,
|
||||||
message: 'ok',
|
message: 'ok',
|
||||||
data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result },
|
data: { tab_id: tab.id, url, category_keyword, sort_by: sort_by || 'featured', limit, result: list_result },
|
||||||
};
|
};
|
||||||
send_action('amazon_search_list', result);
|
send_action('amazon_search_list', result);
|
||||||
@@ -610,7 +676,7 @@ export function amazon_set_language(data, sendResponse) {
|
|||||||
data: null,
|
data: null,
|
||||||
documentURI: AMAZON_HOME_FOR_LANG,
|
documentURI: AMAZON_HOME_FOR_LANG,
|
||||||
});
|
});
|
||||||
reject(err);
|
reject(err);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -686,7 +752,8 @@ export function amazon_product_detail(data, sendResponse) {
|
|||||||
return run_pdp_action(data && data.product_url, injected_amazon_product_detail, [], 'amazon_product_detail', sendResponse);
|
return run_pdp_action(data && data.product_url, injected_amazon_product_detail, [], 'amazon_product_detail', sendResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
amazon_product_detail.desc = 'Amazon 商品详情(标题、价格、品牌、SKU、要点、变体、配送摘要等)';
|
amazon_product_detail.desc =
|
||||||
|
'Amazon 商品详情(标题、价格、品牌、sku{color[],size[]}、要点、配送摘要等)';
|
||||||
amazon_product_detail.params = {
|
amazon_product_detail.params = {
|
||||||
product_url: {
|
product_url: {
|
||||||
type: 'string',
|
type: 'string',
|
||||||
|
|||||||
11
mv2_simple_crx/src/bridge/bridge.html
Normal file
11
mv2_simple_crx/src/bridge/bridge.html
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="zh-CN">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>server_bridge</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<!-- Puppeteer 打开此页后 evaluate 调用 window.server_bridge_invoke,与 background 同协议 -->
|
||||||
|
<script src="bridge.js"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
32
mv2_simple_crx/src/bridge/bridge.js
Normal file
32
mv2_simple_crx/src/bridge/bridge.js
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/**
|
||||||
|
* 服务端 Puppeteer 通过此页与 background 通讯(等同 UI 发 chrome.runtime.sendMessage)
|
||||||
|
* 页面内若需 Web Worker 做重计算,可在此 postMessage;当前直连 background 即可满足指令/结果
|
||||||
|
*/
|
||||||
|
(function () {
|
||||||
|
function server_bridge_invoke(action, data) {
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
if (!action) {
|
||||||
|
reject(new Error('缺少 action'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
chrome.runtime.sendMessage({ action: action, data: data || {} }, function (res) {
|
||||||
|
var err = chrome.runtime.lastError;
|
||||||
|
if (err) {
|
||||||
|
reject(new Error(err.message));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!res) {
|
||||||
|
reject(new Error('background 无响应'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (res.ok) {
|
||||||
|
resolve(res.data);
|
||||||
|
} else {
|
||||||
|
reject(new Error(res.error || 'action 失败'));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
window.server_bridge_invoke = server_bridge_invoke;
|
||||||
|
})();
|
||||||
15
server/.env.example
Normal file
15
server/.env.example
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# MySQL
|
||||||
|
MYSQL_HOST=127.0.0.1
|
||||||
|
MYSQL_PORT=3306
|
||||||
|
MYSQL_USER=root
|
||||||
|
MYSQL_PASSWORD=
|
||||||
|
MYSQL_DATABASE=ecom_crawl
|
||||||
|
|
||||||
|
# 扩展目录(未打包,含 manifest.json)
|
||||||
|
CRX_SRC_PATH=d:/项目/电商抓取项目/mv2_simple_crx/src
|
||||||
|
|
||||||
|
SERVER_PORT=38080
|
||||||
|
ACTION_TIMEOUT_MS=300000
|
||||||
|
PUPPETEER_HEADLESS=false
|
||||||
|
# 可选:指定浏览器路径(不填默认用 ../chrome-win/chrome.exe)
|
||||||
|
CHROME_EXECUTABLE_PATH=
|
||||||
40
server/README.md
Normal file
40
server/README.md
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
## server(Koa + Sequelize + MySQL)
|
||||||
|
|
||||||
|
### 功能
|
||||||
|
- `POST /api/crawl/run_action`:服务端调用扩展 action,返回结果,并按 `stage` 自动入库
|
||||||
|
- **定时任务写死配置**:`config/cron_tasks.js`(不走数据库)
|
||||||
|
|
||||||
|
### 运行
|
||||||
|
1. 安装依赖
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd server
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
2. 配置环境变量
|
||||||
|
|
||||||
|
```bash
|
||||||
|
copy .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
3. 初始化/更新表结构
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node scripts/db_sync.js
|
||||||
|
```
|
||||||
|
|
||||||
|
4. 启动
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node app.js
|
||||||
|
```
|
||||||
|
|
||||||
|
### 定时任务
|
||||||
|
编辑 `config/cron_tasks.js` 的 `cron_task_list`,重启服务即可生效。
|
||||||
|
|
||||||
|
### 落库表
|
||||||
|
- `crawl_run_record`:所有 action 调用的原始请求/响应
|
||||||
|
- `amazon_product`:`stage=detail` 详情
|
||||||
|
- `amazon_search_item`:`stage=list` 列表 item
|
||||||
|
- `amazon_review`:`stage=reviews` 评论
|
||||||
29
server/app.js
Normal file
29
server/app.js
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import dotenv from 'dotenv';
|
||||||
|
import Koa from 'koa';
|
||||||
|
import body_parser from 'koa-bodyparser';
|
||||||
|
|
||||||
|
import { sequelize } from './models/index.js';
|
||||||
|
import { crawl_router } from './routes/crawl.js';
|
||||||
|
import { start_all_cron_tasks } from './services/schedule_loader.js';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const app = new Koa();
|
||||||
|
app.use(body_parser({ jsonLimit: '10mb' }));
|
||||||
|
|
||||||
|
app.use(crawl_router.routes()).use(crawl_router.allowedMethods());
|
||||||
|
|
||||||
|
app.use(async (ctx) => {
|
||||||
|
ctx.status = 404;
|
||||||
|
ctx.body = { ok: false, error: 'not_found' };
|
||||||
|
});
|
||||||
|
|
||||||
|
const port = Number(process.env.SERVER_PORT || 38080);
|
||||||
|
|
||||||
|
await sequelize.authenticate();
|
||||||
|
await sequelize.sync();
|
||||||
|
start_all_cron_tasks();
|
||||||
|
|
||||||
|
app.listen(port);
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log(`server listening on ${port}`);
|
||||||
14
server/config/cron_tasks.js
Normal file
14
server/config/cron_tasks.js
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/**
|
||||||
|
* 写死定时任务配置(不走数据库)
|
||||||
|
* cron_expression 参考 node-cron
|
||||||
|
*/
|
||||||
|
|
||||||
|
export const cron_task_list = [
|
||||||
|
// 示例:每 6 小时跑一次列表抓取
|
||||||
|
// {
|
||||||
|
// name: 'amazon_search_list_every_6h',
|
||||||
|
// cron_expression: '0 */6 * * *',
|
||||||
|
// action_name: 'amazon_search_list',
|
||||||
|
// action_payload: { keyword: '午餐包', limit: 100 }
|
||||||
|
// }
|
||||||
|
];
|
||||||
22
server/config/database.js
Normal file
22
server/config/database.js
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
export function get_sequelize_options() {
|
||||||
|
return {
|
||||||
|
host: process.env.MYSQL_HOST || '127.0.0.1',
|
||||||
|
port: Number(process.env.MYSQL_PORT || 3306),
|
||||||
|
username: process.env.MYSQL_USER || 'root',
|
||||||
|
password: process.env.MYSQL_PASSWORD || '',
|
||||||
|
database: process.env.MYSQL_DATABASE || 'ecom_crawl',
|
||||||
|
dialect: 'mysql',
|
||||||
|
logging: false,
|
||||||
|
define: {
|
||||||
|
underscored: true,
|
||||||
|
timestamps: true,
|
||||||
|
createdAt: 'created_at',
|
||||||
|
updatedAt: 'updated_at'
|
||||||
|
},
|
||||||
|
timezone: '+08:00'
|
||||||
|
};
|
||||||
|
}
|
||||||
39
server/models/amazon_product.js
Normal file
39
server/models/amazon_product.js
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import { DataTypes } from 'sequelize';
|
||||||
|
|
||||||
|
export function define_amazon_product(sequelize) {
|
||||||
|
return sequelize.define(
|
||||||
|
'amazon_product',
|
||||||
|
{
|
||||||
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
|
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||||
|
url: { type: DataTypes.TEXT, allowNull: false },
|
||||||
|
title: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
sku: { type: DataTypes.STRING(256), allowNull: true },
|
||||||
|
sku_color: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
|
sku_size: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
|
brand_line: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
brand_store_url: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
|
bestseller_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
delivery_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
social_proof: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
sustainability_hint: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
main_image: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
images_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||||
|
bullets_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||||
|
product_info_json: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||||
|
detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tableName: 'amazon_product',
|
||||||
|
indexes: [
|
||||||
|
{ unique: true, fields: ['asin'] },
|
||||||
|
{ fields: ['created_at'] },
|
||||||
|
{ fields: ['updated_at'] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
31
server/models/amazon_review.js
Normal file
31
server/models/amazon_review.js
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import { DataTypes } from 'sequelize';
|
||||||
|
|
||||||
|
export function define_amazon_review(sequelize) {
|
||||||
|
return sequelize.define(
|
||||||
|
'amazon_review',
|
||||||
|
{
|
||||||
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
|
asin: { type: DataTypes.STRING(32), allowNull: true },
|
||||||
|
url: { type: DataTypes.TEXT, allowNull: false },
|
||||||
|
review_id: { type: DataTypes.STRING(64), allowNull: false },
|
||||||
|
author: { type: DataTypes.STRING(256), allowNull: true },
|
||||||
|
title: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
body: { type: DataTypes.TEXT('long'), allowNull: true },
|
||||||
|
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
review_date: { type: DataTypes.STRING(128), allowNull: true },
|
||||||
|
review_index: { type: DataTypes.INTEGER, allowNull: true },
|
||||||
|
batch_key: { type: DataTypes.STRING(64), allowNull: false },
|
||||||
|
batch_total: { type: DataTypes.INTEGER, allowNull: true },
|
||||||
|
batch_limit: { type: DataTypes.INTEGER, allowNull: true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tableName: 'amazon_review',
|
||||||
|
indexes: [
|
||||||
|
{ unique: true, fields: ['review_id'] },
|
||||||
|
{ fields: ['asin'] },
|
||||||
|
{ fields: ['batch_key'] },
|
||||||
|
{ fields: ['created_at'] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
30
server/models/amazon_search_item.js
Normal file
30
server/models/amazon_search_item.js
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import { DataTypes } from 'sequelize';
|
||||||
|
|
||||||
|
export function define_amazon_search_item(sequelize) {
|
||||||
|
return sequelize.define(
|
||||||
|
'amazon_search_item',
|
||||||
|
{
|
||||||
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
|
asin: { type: DataTypes.STRING(32), allowNull: false },
|
||||||
|
url: { type: DataTypes.TEXT, allowNull: false },
|
||||||
|
title: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
price: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
rating: { type: DataTypes.FLOAT, allowNull: true },
|
||||||
|
rating_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
review_count: { type: DataTypes.INTEGER, allowNull: true },
|
||||||
|
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
|
||||||
|
rank_index: { type: DataTypes.INTEGER, allowNull: true, comment: '列表中的 index 字段' },
|
||||||
|
batch_key: { type: DataTypes.STRING(64), allowNull: false, comment: '一次列表抓取的批次 key' },
|
||||||
|
batch_total: { type: DataTypes.INTEGER, allowNull: true },
|
||||||
|
batch_limit: { type: DataTypes.INTEGER, allowNull: true }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tableName: 'amazon_search_item',
|
||||||
|
indexes: [
|
||||||
|
{ fields: ['asin'] },
|
||||||
|
{ fields: ['batch_key'] },
|
||||||
|
{ fields: ['created_at'] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
25
server/models/crawl_run_record.js
Normal file
25
server/models/crawl_run_record.js
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import { DataTypes } from 'sequelize';
|
||||||
|
|
||||||
|
export function define_crawl_run_record(sequelize) {
|
||||||
|
return sequelize.define(
|
||||||
|
'crawl_run_record',
|
||||||
|
{
|
||||||
|
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
|
||||||
|
action_name: { type: DataTypes.STRING(128), allowNull: false },
|
||||||
|
request_payload: { type: DataTypes.TEXT, allowNull: true, comment: 'JSON 请求体' },
|
||||||
|
ok: { type: DataTypes.BOOLEAN, allowNull: false },
|
||||||
|
result_payload: { type: DataTypes.TEXT('long'), allowNull: true, comment: 'JSON 结果' },
|
||||||
|
error_message: { type: DataTypes.TEXT, allowNull: true },
|
||||||
|
source: {
|
||||||
|
type: DataTypes.STRING(32),
|
||||||
|
allowNull: false,
|
||||||
|
defaultValue: 'api',
|
||||||
|
comment: 'api | cron'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
tableName: 'crawl_run_record',
|
||||||
|
indexes: [{ fields: ['action_name'] }, { fields: ['created_at'] }]
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
17
server/models/index.js
Normal file
17
server/models/index.js
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import { Sequelize } from 'sequelize';
|
||||||
|
import { get_sequelize_options } from '../config/database.js';
|
||||||
|
import { define_crawl_run_record } from './crawl_run_record.js';
|
||||||
|
import { define_amazon_product } from './amazon_product.js';
|
||||||
|
import { define_amazon_search_item } from './amazon_search_item.js';
|
||||||
|
import { define_amazon_review } from './amazon_review.js';
|
||||||
|
|
||||||
|
const sequelize_options = get_sequelize_options();
|
||||||
|
const { database, username, password, ...rest } = sequelize_options;
|
||||||
|
|
||||||
|
export const sequelize = new Sequelize(database, username, password, rest);
|
||||||
|
|
||||||
|
export const crawl_run_record = define_crawl_run_record(sequelize);
|
||||||
|
|
||||||
|
export const amazon_product = define_amazon_product(sequelize);
|
||||||
|
export const amazon_search_item = define_amazon_search_item(sequelize);
|
||||||
|
export const amazon_review = define_amazon_review(sequelize);
|
||||||
1
server/models/schedule_task.js
Normal file
1
server/models/schedule_task.js
Normal file
@@ -0,0 +1 @@
|
|||||||
|
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js)
|
||||||
2461
server/package-lock.json
generated
Normal file
2461
server/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
20
server/package.json
Normal file
20
server/package.json
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"name": "ecom_crawl_server",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"start": "node app.js",
|
||||||
|
"db_sync": "node scripts/db_sync.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@koa/router": "^12.0.1",
|
||||||
|
"dotenv": "^16.4.5",
|
||||||
|
"koa": "^2.15.3",
|
||||||
|
"koa-bodyparser": "^4.4.1",
|
||||||
|
"mysql2": "^3.11.0",
|
||||||
|
"node-cron": "^3.0.3",
|
||||||
|
"puppeteer": "^23.4.1",
|
||||||
|
"sequelize": "^6.37.3"
|
||||||
|
}
|
||||||
|
}
|
||||||
26
server/routes/crawl.js
Normal file
26
server/routes/crawl.js
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import Router from '@koa/router';
|
||||||
|
import { execute_action_and_record } from '../services/task_executor.js';
|
||||||
|
|
||||||
|
export const crawl_router = new Router();
|
||||||
|
|
||||||
|
crawl_router.post('/api/crawl/run_action', async (ctx) => {
|
||||||
|
const { action_name, action_payload } = ctx.request.body || {};
|
||||||
|
|
||||||
|
if (!action_name) {
|
||||||
|
ctx.status = 400;
|
||||||
|
ctx.body = { ok: false, error: '缺少 action_name' };
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = await execute_action_and_record({
|
||||||
|
action_name,
|
||||||
|
action_payload: action_payload || {},
|
||||||
|
source: 'api'
|
||||||
|
});
|
||||||
|
ctx.body = { ok: true, data };
|
||||||
|
} catch (err) {
|
||||||
|
ctx.status = 500;
|
||||||
|
ctx.body = { ok: false, error: (err && err.message) || String(err) };
|
||||||
|
}
|
||||||
|
});
|
||||||
1
server/routes/schedule_task.js
Normal file
1
server/routes/schedule_task.js
Normal file
@@ -0,0 +1 @@
|
|||||||
|
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js)
|
||||||
6
server/scripts/db_sync.js
Normal file
6
server/scripts/db_sync.js
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
import { sequelize } from '../models/index.js';
|
||||||
|
|
||||||
|
await sequelize.sync({ alter: true });
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.log('sync ok');
|
||||||
|
await sequelize.close();
|
||||||
110
server/services/amazon_persist.js
Normal file
110
server/services/amazon_persist.js
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
import {
|
||||||
|
amazon_product,
|
||||||
|
amazon_search_item,
|
||||||
|
amazon_review
|
||||||
|
} from '../models/index.js';
|
||||||
|
import { safe_json_stringify } from './json_utils.js';
|
||||||
|
|
||||||
|
function build_batch_key(prefix) {
|
||||||
|
return `${prefix}_${Date.now()}_${Math.random().toString().slice(2, 8)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function pick_asin_from_url(url) {
|
||||||
|
if (!url) return null;
|
||||||
|
const m = String(url).match(/\/dp\/([A-Z0-9]{8,16})/i);
|
||||||
|
return m && m[1] ? m[1].toUpperCase() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function persist_amazon_result(result) {
|
||||||
|
if (!result || !result.stage) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.stage === 'detail') {
|
||||||
|
const asin = result.asin || pick_asin_from_url(result.url);
|
||||||
|
if (!asin) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await amazon_product.upsert({
|
||||||
|
asin,
|
||||||
|
url: result.url || '',
|
||||||
|
title: result.title || null,
|
||||||
|
price: result.price || null,
|
||||||
|
sku: result.sku || null,
|
||||||
|
sku_color: result.sku_color || null,
|
||||||
|
sku_size: result.sku_size || null,
|
||||||
|
brand_line: result.brand_line || null,
|
||||||
|
brand_store_url: result.brand_store_url || null,
|
||||||
|
ac_badge: result.ac_badge || null,
|
||||||
|
bestseller_hint: result.bestseller_hint || null,
|
||||||
|
delivery_hint: result.delivery_hint || null,
|
||||||
|
social_proof: result.social_proof || null,
|
||||||
|
sustainability_hint: result.sustainability_hint || null,
|
||||||
|
rating_stars: result.rating_stars || null,
|
||||||
|
review_count_text: result.review_count_text || null,
|
||||||
|
main_image: result.main_image || null,
|
||||||
|
images_json: safe_json_stringify(result.images || []),
|
||||||
|
bullets_json: safe_json_stringify(result.bullets || []),
|
||||||
|
product_info_json: safe_json_stringify(result.product_info || {}),
|
||||||
|
detail_extra_lines_json: safe_json_stringify(result.detail_extra_lines || [])
|
||||||
|
});
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.stage === 'list') {
|
||||||
|
const batch_key = build_batch_key('list');
|
||||||
|
const items = Array.isArray(result.items) ? result.items : [];
|
||||||
|
|
||||||
|
for (const it of items) {
|
||||||
|
const asin = it.asin || pick_asin_from_url(it.url);
|
||||||
|
if (!asin || !it.url) continue;
|
||||||
|
|
||||||
|
await amazon_search_item.create({
|
||||||
|
asin,
|
||||||
|
url: it.url,
|
||||||
|
title: it.title || null,
|
||||||
|
price: it.price || null,
|
||||||
|
rating: typeof it.rating === 'number' ? it.rating : null,
|
||||||
|
rating_text: it.rating_text || null,
|
||||||
|
review_count: typeof it.review_count === 'number' ? it.review_count : null,
|
||||||
|
review_count_text: it.review_count_text || null,
|
||||||
|
rank_index: typeof it.index === 'number' ? it.index : null,
|
||||||
|
batch_key,
|
||||||
|
batch_total: typeof result.total === 'number' ? result.total : null,
|
||||||
|
batch_limit: typeof result.limit === 'number' ? result.limit : null
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.stage === 'reviews') {
|
||||||
|
const batch_key = build_batch_key('reviews');
|
||||||
|
const asin = pick_asin_from_url(result.url);
|
||||||
|
const items = Array.isArray(result.items) ? result.items : [];
|
||||||
|
|
||||||
|
for (const it of items) {
|
||||||
|
const review_id = it.review_id;
|
||||||
|
if (!review_id) continue;
|
||||||
|
|
||||||
|
const asin_value = asin || pick_asin_from_url(it.url) || pick_asin_from_url(result.url);
|
||||||
|
|
||||||
|
await amazon_review.upsert({
|
||||||
|
asin: asin_value || null,
|
||||||
|
url: result.url || '',
|
||||||
|
review_id,
|
||||||
|
author: it.author || null,
|
||||||
|
title: it.title || null,
|
||||||
|
body: it.body || null,
|
||||||
|
rating_text: it.rating_text || null,
|
||||||
|
review_date: it.date || null,
|
||||||
|
review_index: typeof it.index === 'number' ? it.index : null,
|
||||||
|
batch_key,
|
||||||
|
batch_total: typeof result.total === 'number' ? result.total : null,
|
||||||
|
batch_limit: typeof result.limit === 'number' ? result.limit : null
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
server/services/cron_manager.js
Normal file
1
server/services/cron_manager.js
Normal file
@@ -0,0 +1 @@
|
|||||||
|
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js)
|
||||||
18
server/services/json_utils.js
Normal file
18
server/services/json_utils.js
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
export function safe_json_stringify(value) {
|
||||||
|
try {
|
||||||
|
return JSON.stringify(value);
|
||||||
|
} catch (err) {
|
||||||
|
return JSON.stringify({ error: 'json_stringify_failed', message: String(err) });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function safe_json_parse(text) {
|
||||||
|
if (text === null || text === undefined || text === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return JSON.parse(text);
|
||||||
|
} catch (err) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
158
server/services/puppeteer_runner.js
Normal file
158
server/services/puppeteer_runner.js
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
import dotenv from 'dotenv';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
let browser_singleton = null;
|
||||||
|
|
||||||
|
function get_action_timeout_ms() {
|
||||||
|
return Number(process.env.ACTION_TIMEOUT_MS || 300000);
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_crx_src_path() {
|
||||||
|
const crx_src_path = process.env.CRX_SRC_PATH;
|
||||||
|
if (!crx_src_path) {
|
||||||
|
throw new Error('缺少环境变量 CRX_SRC_PATH');
|
||||||
|
}
|
||||||
|
return crx_src_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_extension_id_from_targets(targets) {
|
||||||
|
for (const target of targets) {
|
||||||
|
const url = target.url();
|
||||||
|
if (!url) continue;
|
||||||
|
if (url.startsWith('chrome-extension://')) {
|
||||||
|
const match = url.match(/^chrome-extension:\/\/([^/]+)\//);
|
||||||
|
if (match && match[1]) return match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function wait_for_extension_id(browser, timeout_ms) {
|
||||||
|
const existing = get_extension_id_from_targets(browser.targets());
|
||||||
|
if (existing) {
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
|
||||||
|
const target = await browser
|
||||||
|
.waitForTarget((t) => {
|
||||||
|
const url = t.url();
|
||||||
|
return typeof url === 'string' && url.startsWith('chrome-extension://');
|
||||||
|
}, { timeout: timeout_ms })
|
||||||
|
.catch(() => null);
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return get_extension_id_from_targets([target]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_chrome_executable_path() {
|
||||||
|
// 优先环境变量,方便你后续切换版本
|
||||||
|
const from_env = process.env.CHROME_EXECUTABLE_PATH;
|
||||||
|
if (from_env) {
|
||||||
|
return path.resolve(from_env);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 默认使用项目根目录的 chrome-win/chrome.exe
|
||||||
|
// 当前进程 cwd 通常是 server/,所以回到上一级
|
||||||
|
return path.resolve(process.cwd(), '../chrome-win/chrome.exe');
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function get_or_create_browser() {
|
||||||
|
if (browser_singleton) {
|
||||||
|
return browser_singleton;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chrome_executable_path = get_chrome_executable_path();
|
||||||
|
if (!fs.existsSync(chrome_executable_path)) {
|
||||||
|
throw new Error(`Chrome 不存在: ${chrome_executable_path}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const raw_extension_path = path.resolve(get_crx_src_path());
|
||||||
|
const manifest_path = path.resolve(raw_extension_path, 'manifest.json');
|
||||||
|
if (!fs.existsSync(manifest_path)) {
|
||||||
|
throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const extension_path = raw_extension_path.replace(/\\/g, '/');
|
||||||
|
const headless = String(process.env.PUPPETEER_HEADLESS || 'false') === 'true';
|
||||||
|
const user_data_dir = path.resolve(process.cwd(), 'puppeteer_profile');
|
||||||
|
|
||||||
|
browser_singleton = await puppeteer.launch({
|
||||||
|
executablePath: chrome_executable_path,
|
||||||
|
headless,
|
||||||
|
args: [
|
||||||
|
`--user-data-dir=${user_data_dir}`,
|
||||||
|
'--enable-extensions',
|
||||||
|
`--disable-extensions-except=${extension_path}`,
|
||||||
|
`--load-extension=${extension_path}`,
|
||||||
|
'--no-default-browser-check',
|
||||||
|
'--disable-popup-blocking',
|
||||||
|
'--disable-dev-shm-usage'
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
return browser_singleton;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function invoke_extension_action(action_name, action_payload) {
|
||||||
|
const browser = await get_or_create_browser();
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.goto('about:blank');
|
||||||
|
|
||||||
|
// 尝试先打开 chrome://extensions 触发扩展初始化(某些环境下扩展 target 不会立刻出现)
|
||||||
|
try {
|
||||||
|
await page.goto('chrome://extensions/', { waitUntil: 'domcontentloaded' });
|
||||||
|
} catch (err) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
const extension_id = await wait_for_extension_id(browser, 15000);
|
||||||
|
if (!extension_id) {
|
||||||
|
await page.close();
|
||||||
|
throw new Error(
|
||||||
|
'未找到扩展 extension_id:Chrome 未加载扩展(常见原因:MV2 被禁用/企业策略未生效/CRX_SRC_PATH 不正确/使用了 headless)'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const bridge_url = `chrome-extension://${extension_id}/bridge/bridge.html`;
|
||||||
|
await page.goto(bridge_url, { waitUntil: 'domcontentloaded' });
|
||||||
|
|
||||||
|
const timeout_ms = get_action_timeout_ms();
|
||||||
|
const action_res = await page.evaluate(
|
||||||
|
async (action, payload, timeout) => {
|
||||||
|
function with_timeout(promise, timeout_ms_inner) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const timer = setTimeout(() => reject(new Error('action_timeout')), timeout_ms_inner);
|
||||||
|
promise
|
||||||
|
.then((v) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve(v);
|
||||||
|
})
|
||||||
|
.catch((e) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
reject(e);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!window.server_bridge_invoke) {
|
||||||
|
throw new Error('bridge 未注入 window.server_bridge_invoke');
|
||||||
|
}
|
||||||
|
|
||||||
|
return await with_timeout(window.server_bridge_invoke(action, payload), timeout);
|
||||||
|
},
|
||||||
|
action_name,
|
||||||
|
action_payload || {},
|
||||||
|
timeout_ms
|
||||||
|
);
|
||||||
|
|
||||||
|
await page.close();
|
||||||
|
return action_res;
|
||||||
|
}
|
||||||
30
server/services/schedule_loader.js
Normal file
30
server/services/schedule_loader.js
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import cron from 'node-cron';
|
||||||
|
import { cron_task_list } from '../config/cron_tasks.js';
|
||||||
|
import { execute_action_and_record } from './task_executor.js';
|
||||||
|
|
||||||
|
const cron_jobs = [];
|
||||||
|
|
||||||
|
export function start_all_cron_tasks() {
|
||||||
|
for (const task of cron_task_list) {
|
||||||
|
const job = cron.schedule(task.cron_expression, async () => {
|
||||||
|
try {
|
||||||
|
await execute_action_and_record({
|
||||||
|
action_name: task.action_name,
|
||||||
|
action_payload: task.action_payload || {},
|
||||||
|
source: 'cron'
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
// 失败会在 crawl_run_record 落库
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
cron_jobs.push(job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stop_all_cron_tasks() {
|
||||||
|
for (const job of cron_jobs) {
|
||||||
|
job.stop();
|
||||||
|
}
|
||||||
|
cron_jobs.length = 0;
|
||||||
|
}
|
||||||
39
server/services/task_executor.js
Normal file
39
server/services/task_executor.js
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import { crawl_run_record } from '../models/index.js';
|
||||||
|
import { safe_json_stringify } from './json_utils.js';
|
||||||
|
import { invoke_extension_action } from './puppeteer_runner.js';
|
||||||
|
import { persist_amazon_result } from './amazon_persist.js';
|
||||||
|
|
||||||
|
export async function execute_action_and_record(params) {
|
||||||
|
const { action_name, action_payload, source } = params;
|
||||||
|
|
||||||
|
const request_payload = safe_json_stringify(action_payload || {});
|
||||||
|
|
||||||
|
let ok = false;
|
||||||
|
let result_payload = null;
|
||||||
|
let error_message = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await invoke_extension_action(action_name, action_payload || {});
|
||||||
|
|
||||||
|
ok = true;
|
||||||
|
result_payload = safe_json_stringify(result);
|
||||||
|
|
||||||
|
// 按 stage 自动入库(不影响原始 run_record 记录)
|
||||||
|
await persist_amazon_result(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (err) {
|
||||||
|
ok = false;
|
||||||
|
error_message = (err && err.message) || String(err);
|
||||||
|
throw err;
|
||||||
|
} finally {
|
||||||
|
await crawl_run_record.create({
|
||||||
|
action_name,
|
||||||
|
request_payload,
|
||||||
|
ok,
|
||||||
|
result_payload,
|
||||||
|
error_message,
|
||||||
|
source
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user