diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..6d729cb --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "type": "node", + "request": "launch", + "name": "同步数据库", + "skipFiles": [ + "/**" + ], + "program": "${workspaceFolder}\\server\\scripts\\db_sync.js" + } + ] +} \ No newline at end of file diff --git a/server/.env.example b/server/.env.example deleted file mode 100644 index e305bfe..0000000 --- a/server/.env.example +++ /dev/null @@ -1,15 +0,0 @@ -# MySQL -MYSQL_HOST=127.0.0.1 -MYSQL_PORT=3306 -MYSQL_USER=root -MYSQL_PASSWORD= -MYSQL_DATABASE=ecom_crawl - -# 扩展目录(未打包,含 manifest.json) -CRX_SRC_PATH=d:/项目/电商抓取项目/mv2_simple_crx/src - -SERVER_PORT=38080 -ACTION_TIMEOUT_MS=300000 -PUPPETEER_HEADLESS=false -# 可选:指定浏览器路径(不填默认用 ../chrome-win/chrome.exe) -CHROME_EXECUTABLE_PATH= diff --git a/server/app.js b/server/app.js index 25b7f80..1ffb267 100644 --- a/server/app.js +++ b/server/app.js @@ -1,4 +1,4 @@ -import dotenv from 'dotenv'; +import { get_app_config } from './config/app_config.js'; import Koa from 'koa'; import body_parser from 'koa-bodyparser'; @@ -6,8 +6,6 @@ import { sequelize } from './models/index.js'; import { crawl_router } from './routes/crawl.js'; import { start_all_cron_tasks } from './services/schedule_loader.js'; -dotenv.config(); - const app = new Koa(); app.use(body_parser({ jsonLimit: '10mb' })); @@ -18,7 +16,8 @@ app.use(async (ctx) => { ctx.body = { ok: false, error: 'not_found' }; }); -const port = Number(process.env.SERVER_PORT || 38080); +const cfg = get_app_config(); +const port = cfg.server.port; await sequelize.authenticate(); await sequelize.sync(); diff --git a/server/config/app_config.js b/server/config/app_config.js new file mode 100644 index 0000000..a7cf102 --- /dev/null +++ b/server/config/app_config.js @@ -0,0 +1,63 @@ +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { get_env } from './env.js'; + +function must_get(key) { + const v = get_env(key); + if (v === undefined || v === null || v === '') { + throw new Error(`缺少配置 ${key}`); + } + return v; +} + +function get_bool(key, default_value) { + const v = get_env(key); + if (v === undefined || v === null || v === '') { + return default_value; + } + return String(v).toLowerCase() === 'true'; +} + +function get_int(key, default_value) { + const v = get_env(key); + if (v === undefined || v === null || v === '') { + return default_value; + } + const n = Number(v); + if (Number.isNaN(n)) { + throw new Error(`配置 ${key} 必须是数字`); + } + return n; +} + +let cached = null; + +export function get_app_config() { + if (cached) { + return cached; + } + + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + + cached = { + mysql: { + host: must_get('MYSQL_HOST'), + port: get_int('MYSQL_PORT', 3306), + user: must_get('MYSQL_USER'), + password: must_get('MYSQL_PASSWORD'), + database: must_get('MYSQL_DATABASE') + }, + server: { + port: get_int('SERVER_PORT', 38080) + }, + crawler: { + crx_src_path: must_get('CRX_SRC_PATH'), + action_timeout_ms: get_int('ACTION_TIMEOUT_MS', 300000), + puppeteer_headless: get_bool('PUPPETEER_HEADLESS', false), + chrome_executable_path: (get_env('CHROME_EXECUTABLE_PATH') || '').trim() || path.resolve(__dirname, '../../chrome-win/chrome.exe') + } + }; + + return cached; +} diff --git a/server/config/database.js b/server/config/database.js index b1322b5..2edf05f 100644 --- a/server/config/database.js +++ b/server/config/database.js @@ -1,14 +1,14 @@ -import dotenv from 'dotenv'; - -dotenv.config(); +import { get_app_config } from './app_config.js'; export function get_sequelize_options() { + const cfg = get_app_config(); + return { - host: process.env.MYSQL_HOST || '127.0.0.1', - port: Number(process.env.MYSQL_PORT || 3306), - username: process.env.MYSQL_USER || 'root', - password: process.env.MYSQL_PASSWORD || '', - database: process.env.MYSQL_DATABASE || 'ecom_crawl', + host: cfg.mysql.host, + port: cfg.mysql.port, + username: cfg.mysql.user, + password: cfg.mysql.password, + database: cfg.mysql.database, dialect: 'mysql', logging: false, define: { diff --git a/server/config/env.js b/server/config/env.js new file mode 100644 index 0000000..c7ab09d --- /dev/null +++ b/server/config/env.js @@ -0,0 +1,74 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +let loaded = false; +let env_map = {}; + +function unquote(value) { + const v = String(value); + if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) { + return v.slice(1, -1); + } + return v; +} + +function parse_env_text(text) { + const out = {}; + const lines = String(text).split(/\r?\n/); + + for (const raw_line of lines) { + const line = raw_line.trim(); + if (!line) continue; + if (line.startsWith('#')) continue; + + const idx = line.indexOf('='); + if (idx <= 0) continue; + + const key = line.slice(0, idx).trim(); + let value = line.slice(idx + 1).trim(); + + // 去掉行尾注释:仅在未被引号包裹时生效 + const quoted = (value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'")); + if (!quoted) { + const sharp = value.indexOf('#'); + if (sharp >= 0) { + value = value.slice(0, sharp).trim(); + } + } + + out[key] = unquote(value); + } + + return out; +} + +export function load_env() { + if (loaded) { + return; + } + + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + const env_path = path.resolve(__dirname, '../.env'); + + const text = fs.readFileSync(env_path, 'utf8'); + env_map = parse_env_text(text); + loaded = true; +} + +export function get_env(key) { + if (!loaded) { + load_env(); + } + return env_map[key]; +} + +export function get_all_env() { + if (!loaded) { + load_env(); + } + return { ...env_map }; +} + +load_env(); diff --git a/server/models/schedule_task.js b/server/models/schedule_task.js deleted file mode 100644 index 0266733..0000000 --- a/server/models/schedule_task.js +++ /dev/null @@ -1 +0,0 @@ -// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/package-lock.json b/server/package-lock.json index e863dcf..fd8d24d 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -9,7 +9,6 @@ "version": "1.0.0", "dependencies": { "@koa/router": "^12.0.1", - "dotenv": "^16.4.5", "koa": "^2.15.3", "koa-bodyparser": "^4.4.1", "mysql2": "^3.11.0", @@ -650,18 +649,6 @@ "integrity": "sha512-XxtPuC3PGakY6PD7dG66/o8KwJ/LkH2/EKe19Dcw58w53dv4/vSQEkn/SzuyhHE2q4zPgCkxQBxus3VV4ql+Pg==", "license": "BSD-3-Clause" }, - "node_modules/dotenv": { - "version": "16.6.1", - "resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-16.6.1.tgz", - "integrity": "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://dotenvx.com" - } - }, "node_modules/dottie": { "version": "2.0.7", "resolved": "https://registry.npmmirror.com/dottie/-/dottie-2.0.7.tgz", diff --git a/server/package.json b/server/package.json index 834df29..222dc40 100644 --- a/server/package.json +++ b/server/package.json @@ -9,7 +9,6 @@ }, "dependencies": { "@koa/router": "^12.0.1", - "dotenv": "^16.4.5", "koa": "^2.15.3", "koa-bodyparser": "^4.4.1", "mysql2": "^3.11.0", diff --git a/server/routes/schedule_task.js b/server/routes/schedule_task.js deleted file mode 100644 index 0266733..0000000 --- a/server/routes/schedule_task.js +++ /dev/null @@ -1 +0,0 @@ -// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/services/cron_manager.js b/server/services/cron_manager.js deleted file mode 100644 index 0266733..0000000 --- a/server/services/cron_manager.js +++ /dev/null @@ -1 +0,0 @@ -// 已废弃:按需求改为写死定时任务(见 config/cron_tasks.js) diff --git a/server/services/puppeteer_runner.js b/server/services/puppeteer_runner.js index 617fad6..5d9b223 100644 --- a/server/services/puppeteer_runner.js +++ b/server/services/puppeteer_runner.js @@ -1,22 +1,18 @@ -import dotenv from 'dotenv'; import fs from 'node:fs'; import path from 'node:path'; import puppeteer from 'puppeteer'; - -dotenv.config(); +import { get_app_config } from '../config/app_config.js'; let browser_singleton = null; function get_action_timeout_ms() { - return Number(process.env.ACTION_TIMEOUT_MS || 300000); + const cfg = get_app_config(); + return cfg.crawler.action_timeout_ms; } function get_crx_src_path() { - const crx_src_path = process.env.CRX_SRC_PATH; - if (!crx_src_path) { - throw new Error('缺少环境变量 CRX_SRC_PATH'); - } - return crx_src_path; + const cfg = get_app_config(); + return cfg.crawler.crx_src_path; } function get_extension_id_from_targets(targets) { @@ -52,15 +48,8 @@ async function wait_for_extension_id(browser, timeout_ms) { } function get_chrome_executable_path() { - // 优先环境变量,方便你后续切换版本 - const from_env = process.env.CHROME_EXECUTABLE_PATH; - if (from_env) { - return path.resolve(from_env); - } - - // 默认使用项目根目录的 chrome-win/chrome.exe - // 当前进程 cwd 通常是 server/,所以回到上一级 - return path.resolve(process.cwd(), '../chrome-win/chrome.exe'); + const cfg = get_app_config(); + return path.resolve(cfg.crawler.chrome_executable_path); } export async function get_or_create_browser() { @@ -79,8 +68,9 @@ export async function get_or_create_browser() { throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`); } + const cfg = get_app_config(); const extension_path = raw_extension_path.replace(/\\/g, '/'); - const headless = String(process.env.PUPPETEER_HEADLESS || 'false') === 'true'; + const headless = cfg.crawler.puppeteer_headless; const user_data_dir = path.resolve(process.cwd(), 'puppeteer_profile'); browser_singleton = await puppeteer.launch({