This commit is contained in:
张成
2026-03-18 15:25:34 +08:00
parent 5b671d320b
commit 37e39d35b8
17 changed files with 368 additions and 167 deletions

View File

@@ -0,0 +1,39 @@
import { DataTypes } from 'sequelize';
export function define_amazon_product(sequelize) {
return sequelize.define(
'amazon_product',
{
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
asin: { type: DataTypes.STRING(32), allowNull: false },
url: { type: DataTypes.TEXT, allowNull: false },
title: { type: DataTypes.TEXT, allowNull: true },
price: { type: DataTypes.STRING(64), allowNull: true },
sku: { type: DataTypes.STRING(256), allowNull: true },
sku_color: { type: DataTypes.STRING(128), allowNull: true },
sku_size: { type: DataTypes.STRING(128), allowNull: true },
brand_line: { type: DataTypes.TEXT, allowNull: true },
brand_store_url: { type: DataTypes.TEXT, allowNull: true },
ac_badge: { type: DataTypes.STRING(128), allowNull: true },
bestseller_hint: { type: DataTypes.TEXT, allowNull: true },
delivery_hint: { type: DataTypes.TEXT, allowNull: true },
social_proof: { type: DataTypes.TEXT, allowNull: true },
sustainability_hint: { type: DataTypes.TEXT, allowNull: true },
rating_stars: { type: DataTypes.STRING(64), allowNull: true },
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
main_image: { type: DataTypes.TEXT, allowNull: true },
images_json: { type: DataTypes.TEXT('long'), allowNull: true },
bullets_json: { type: DataTypes.TEXT('long'), allowNull: true },
product_info_json: { type: DataTypes.TEXT('long'), allowNull: true },
detail_extra_lines_json: { type: DataTypes.TEXT('long'), allowNull: true }
},
{
tableName: 'amazon_product',
indexes: [
{ unique: true, fields: ['asin'] },
{ fields: ['created_at'] },
{ fields: ['updated_at'] }
]
}
);
}

View File

@@ -0,0 +1,31 @@
import { DataTypes } from 'sequelize';
export function define_amazon_review(sequelize) {
return sequelize.define(
'amazon_review',
{
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
asin: { type: DataTypes.STRING(32), allowNull: true },
url: { type: DataTypes.TEXT, allowNull: false },
review_id: { type: DataTypes.STRING(64), allowNull: false },
author: { type: DataTypes.STRING(256), allowNull: true },
title: { type: DataTypes.TEXT, allowNull: true },
body: { type: DataTypes.TEXT('long'), allowNull: true },
rating_text: { type: DataTypes.STRING(64), allowNull: true },
review_date: { type: DataTypes.STRING(128), allowNull: true },
review_index: { type: DataTypes.INTEGER, allowNull: true },
batch_key: { type: DataTypes.STRING(64), allowNull: false },
batch_total: { type: DataTypes.INTEGER, allowNull: true },
batch_limit: { type: DataTypes.INTEGER, allowNull: true }
},
{
tableName: 'amazon_review',
indexes: [
{ unique: true, fields: ['review_id'] },
{ fields: ['asin'] },
{ fields: ['batch_key'] },
{ fields: ['created_at'] }
]
}
);
}

View File

@@ -0,0 +1,30 @@
import { DataTypes } from 'sequelize';
export function define_amazon_search_item(sequelize) {
return sequelize.define(
'amazon_search_item',
{
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
asin: { type: DataTypes.STRING(32), allowNull: false },
url: { type: DataTypes.TEXT, allowNull: false },
title: { type: DataTypes.TEXT, allowNull: true },
price: { type: DataTypes.STRING(64), allowNull: true },
rating: { type: DataTypes.FLOAT, allowNull: true },
rating_text: { type: DataTypes.STRING(64), allowNull: true },
review_count: { type: DataTypes.INTEGER, allowNull: true },
review_count_text: { type: DataTypes.STRING(64), allowNull: true },
rank_index: { type: DataTypes.INTEGER, allowNull: true, comment: '列表中的 index 字段' },
batch_key: { type: DataTypes.STRING(64), allowNull: false, comment: '一次列表抓取的批次 key' },
batch_total: { type: DataTypes.INTEGER, allowNull: true },
batch_limit: { type: DataTypes.INTEGER, allowNull: true }
},
{
tableName: 'amazon_search_item',
indexes: [
{ fields: ['asin'] },
{ fields: ['batch_key'] },
{ fields: ['created_at'] }
]
}
);
}

View File

@@ -15,8 +15,7 @@ export function define_crawl_run_record(sequelize) {
allowNull: false,
defaultValue: 'api',
comment: 'api | cron'
},
schedule_task_id: { type: DataTypes.BIGINT.UNSIGNED, allowNull: true }
}
},
{
tableName: 'crawl_run_record',

View File

@@ -1,7 +1,9 @@
import { Sequelize } from 'sequelize';
import { get_sequelize_options } from '../config/database.js';
import { define_crawl_run_record } from './crawl_run_record.js';
import { define_schedule_task } from './schedule_task.js';
import { define_amazon_product } from './amazon_product.js';
import { define_amazon_search_item } from './amazon_search_item.js';
import { define_amazon_review } from './amazon_review.js';
const sequelize_options = get_sequelize_options();
const { database, username, password, ...rest } = sequelize_options;
@@ -9,7 +11,7 @@ const { database, username, password, ...rest } = sequelize_options;
export const sequelize = new Sequelize(database, username, password, rest);
export const crawl_run_record = define_crawl_run_record(sequelize);
export const schedule_task = define_schedule_task(sequelize);
schedule_task.hasMany(crawl_run_record, { foreignKey: 'schedule_task_id', as: 'records' });
crawl_run_record.belongsTo(schedule_task, { foreignKey: 'schedule_task_id', as: 'schedule_task' });
export const amazon_product = define_amazon_product(sequelize);
export const amazon_search_item = define_amazon_search_item(sequelize);
export const amazon_review = define_amazon_review(sequelize);

View File

@@ -1,19 +1 @@
import { DataTypes } from 'sequelize';
export function define_schedule_task(sequelize) {
return sequelize.define(
'schedule_task',
{
id: { type: DataTypes.BIGINT.UNSIGNED, primaryKey: true, autoIncrement: true },
name: { type: DataTypes.STRING(128), allowNull: false },
cron_expression: { type: DataTypes.STRING(64), allowNull: false },
action_name: { type: DataTypes.STRING(128), allowNull: false },
payload_json: { type: DataTypes.TEXT, allowNull: true },
enabled: { type: DataTypes.BOOLEAN, allowNull: false, defaultValue: true },
last_run_at: { type: DataTypes.DATE, allowNull: true }
},
{
tableName: 'schedule_task'
}
);
}
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js