This commit is contained in:
张成
2026-03-18 15:46:57 +08:00
parent 37e39d35b8
commit 3d3b9b5dfa
12 changed files with 175 additions and 63 deletions

18
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,18 @@
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "同步数据库",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\server\\scripts\\db_sync.js"
}
]
}

View File

@@ -1,15 +0,0 @@
# MySQL
MYSQL_HOST=127.0.0.1
MYSQL_PORT=3306
MYSQL_USER=root
MYSQL_PASSWORD=
MYSQL_DATABASE=ecom_crawl
# 扩展目录(未打包,含 manifest.json
CRX_SRC_PATH=d:/项目/电商抓取项目/mv2_simple_crx/src
SERVER_PORT=38080
ACTION_TIMEOUT_MS=300000
PUPPETEER_HEADLESS=false
# 可选:指定浏览器路径(不填默认用 ../chrome-win/chrome.exe
CHROME_EXECUTABLE_PATH=

View File

@@ -1,4 +1,4 @@
import dotenv from 'dotenv';
import { get_app_config } from './config/app_config.js';
import Koa from 'koa';
import body_parser from 'koa-bodyparser';
@@ -6,8 +6,6 @@ import { sequelize } from './models/index.js';
import { crawl_router } from './routes/crawl.js';
import { start_all_cron_tasks } from './services/schedule_loader.js';
dotenv.config();
const app = new Koa();
app.use(body_parser({ jsonLimit: '10mb' }));
@@ -18,7 +16,8 @@ app.use(async (ctx) => {
ctx.body = { ok: false, error: 'not_found' };
});
const port = Number(process.env.SERVER_PORT || 38080);
const cfg = get_app_config();
const port = cfg.server.port;
await sequelize.authenticate();
await sequelize.sync();

View File

@@ -0,0 +1,63 @@
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { get_env } from './env.js';
function must_get(key) {
const v = get_env(key);
if (v === undefined || v === null || v === '') {
throw new Error(`缺少配置 ${key}`);
}
return v;
}
function get_bool(key, default_value) {
const v = get_env(key);
if (v === undefined || v === null || v === '') {
return default_value;
}
return String(v).toLowerCase() === 'true';
}
function get_int(key, default_value) {
const v = get_env(key);
if (v === undefined || v === null || v === '') {
return default_value;
}
const n = Number(v);
if (Number.isNaN(n)) {
throw new Error(`配置 ${key} 必须是数字`);
}
return n;
}
let cached = null;
export function get_app_config() {
if (cached) {
return cached;
}
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
cached = {
mysql: {
host: must_get('MYSQL_HOST'),
port: get_int('MYSQL_PORT', 3306),
user: must_get('MYSQL_USER'),
password: must_get('MYSQL_PASSWORD'),
database: must_get('MYSQL_DATABASE')
},
server: {
port: get_int('SERVER_PORT', 38080)
},
crawler: {
crx_src_path: must_get('CRX_SRC_PATH'),
action_timeout_ms: get_int('ACTION_TIMEOUT_MS', 300000),
puppeteer_headless: get_bool('PUPPETEER_HEADLESS', false),
chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe')
}
};
return cached;
}

View File

@@ -1,14 +1,14 @@
import dotenv from 'dotenv';
dotenv.config();
import { get_app_config } from './app_config.js';
export function get_sequelize_options() {
const cfg = get_app_config();
return {
host: process.env.MYSQL_HOST || '127.0.0.1',
port: Number(process.env.MYSQL_PORT || 3306),
username: process.env.MYSQL_USER || 'root',
password: process.env.MYSQL_PASSWORD || '',
database: process.env.MYSQL_DATABASE || 'ecom_crawl',
host: cfg.mysql.host,
port: cfg.mysql.port,
username: cfg.mysql.user,
password: cfg.mysql.password,
database: cfg.mysql.database,
dialect: 'mysql',
logging: false,
define: {

74
server/config/env.js Normal file
View File

@@ -0,0 +1,74 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
let loaded = false;
let env_map = {};
function unquote(value) {
const v = String(value);
if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) {
return v.slice(1, -1);
}
return v;
}
function parse_env_text(text) {
const out = {};
const lines = String(text).split(/\r?\n/);
for (const raw_line of lines) {
const line = raw_line.trim();
if (!line) continue;
if (line.startsWith('#')) continue;
const idx = line.indexOf('=');
if (idx <= 0) continue;
const key = line.slice(0, idx).trim();
let value = line.slice(idx + 1).trim();
// 去掉行尾注释:仅在未被引号包裹时生效
const quoted = (value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"));
if (!quoted) {
const sharp = value.indexOf('#');
if (sharp >= 0) {
value = value.slice(0, sharp).trim();
}
}
out[key] = unquote(value);
}
return out;
}
export function load_env() {
if (loaded) {
return;
}
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const env_path = path.resolve(__dirname, '../.env');
const text = fs.readFileSync(env_path, 'utf8');
env_map = parse_env_text(text);
loaded = true;
}
export function get_env(key) {
if (!loaded) {
load_env();
}
return env_map[key];
}
export function get_all_env() {
if (!loaded) {
load_env();
}
return { ...env_map };
}
load_env();

View File

@@ -1 +0,0 @@
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js

View File

@@ -9,7 +9,6 @@
"version": "1.0.0",
"dependencies": {
"@koa/router": "^12.0.1",
"dotenv": "^16.4.5",
"koa": "^2.15.3",
"koa-bodyparser": "^4.4.1",
"mysql2": "^3.11.0",
@@ -650,18 +649,6 @@
"integrity": "sha512-XxtPuC3PGakY6PD7dG66/o8KwJ/LkH2/EKe19Dcw58w53dv4/vSQEkn/SzuyhHE2q4zPgCkxQBxus3VV4ql+Pg==",
"license": "BSD-3-Clause"
},
"node_modules/dotenv": {
"version": "16.6.1",
"resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-16.6.1.tgz",
"integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dottie": {
"version": "2.0.7",
"resolved": "https://registry.npmmirror.com/dottie/-/dottie-2.0.7.tgz",

View File

@@ -9,7 +9,6 @@
},
"dependencies": {
"@koa/router": "^12.0.1",
"dotenv": "^16.4.5",
"koa": "^2.15.3",
"koa-bodyparser": "^4.4.1",
"mysql2": "^3.11.0",

View File

@@ -1 +0,0 @@
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js

View File

@@ -1 +0,0 @@
// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js

View File

@@ -1,22 +1,18 @@
import dotenv from 'dotenv';
import fs from 'node:fs';
import path from 'node:path';
import puppeteer from 'puppeteer';
dotenv.config();
import { get_app_config } from '../config/app_config.js';
let browser_singleton = null;
function get_action_timeout_ms() {
return Number(process.env.ACTION_TIMEOUT_MS || 300000);
const cfg = get_app_config();
return cfg.crawler.action_timeout_ms;
}
function get_crx_src_path() {
const crx_src_path = process.env.CRX_SRC_PATH;
if (!crx_src_path) {
throw new Error('缺少环境变量 CRX_SRC_PATH');
}
return crx_src_path;
const cfg = get_app_config();
return cfg.crawler.crx_src_path;
}
function get_extension_id_from_targets(targets) {
@@ -52,15 +48,8 @@ async function wait_for_extension_id(browser, timeout_ms) {
}
function get_chrome_executable_path() {
// 优先环境变量,方便你后续切换版本
const from_env = process.env.CHROME_EXECUTABLE_PATH;
if (from_env) {
return path.resolve(from_env);
}
// 默认使用项目根目录的 chrome-win/chrome.exe
// 当前进程 cwd 通常是 server/,所以回到上一级
return path.resolve(process.cwd(), '../chrome-win/chrome.exe');
const cfg = get_app_config();
return path.resolve(cfg.crawler.chrome_executable_path);
}
export async function get_or_create_browser() {
@@ -79,8 +68,9 @@ export async function get_or_create_browser() {
throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`);
}
const cfg = get_app_config();
const extension_path = raw_extension_path.replace(/\\/g, '/');
const headless = String(process.env.PUPPETEER_HEADLESS || 'false') === 'true';
const headless = cfg.crawler.puppeteer_headless;
const user_data_dir = path.resolve(process.cwd(), 'puppeteer_profile');
browser_singleton = await puppeteer.launch({