Files
mv2_simple_crx/server/services/puppeteer/puppeteer_runner.js
张成 7b42ee8ef5 1
2026-03-18 16:40:09 +08:00

202 lines
5.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import fs from 'node:fs';
import path from 'node:path';
import puppeteer from 'puppeteer';
import { get_app_config } from '../../config/app_config.js';
import { apply_page_stealth_defaults, get_stealth_puppeteer } from './puppeteer_stealth.js';
let browser_singleton = null;
function get_action_timeout_ms() {
const cfg = get_app_config();
return cfg.crawler.action_timeout_ms;
}
function get_crx_src_path() {
const cfg = get_app_config();
return cfg.crawler.crx_src_path;
}
function get_extension_id_from_targets(targets) {
for (const target of targets) {
const url = target.url();
if (!url) continue;
if (url.startsWith('chrome-extension://')) {
const match = url.match(/^chrome-extension:\/\/([^/]+)\//);
if (match && match[1]) return match[1];
}
}
return null;
}
async function wait_for_extension_id(browser, timeout_ms) {
const existing = get_extension_id_from_targets(browser.targets());
if (existing) {
return existing;
}
const target = await browser
.waitForTarget((t) => {
const url = t.url();
return typeof url === 'string' && url.startsWith('chrome-extension://');
}, { timeout: timeout_ms })
.catch(() => null);
if (!target) {
return null;
}
return get_extension_id_from_targets([target]);
}
function get_chrome_executable_path() {
const cfg = get_app_config();
return path.resolve(cfg.crawler.chrome_executable_path);
}
export async function get_or_create_browser() {
if (browser_singleton) {
return browser_singleton;
}
const chrome_executable_path = get_chrome_executable_path();
if (!fs.existsSync(chrome_executable_path)) {
throw new Error(`Chrome 不存在: ${chrome_executable_path}`);
}
const raw_extension_path = path.resolve(get_crx_src_path());
const manifest_path = path.resolve(raw_extension_path, 'manifest.json');
if (!fs.existsSync(manifest_path)) {
throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`);
}
const cfg = get_app_config();
const extension_path = raw_extension_path.replace(/\\/g, '/');
const headless = cfg.crawler.puppeteer_headless;
const cfg2 = get_app_config();
const pptr = cfg2.crawler.enable_stealth ? get_stealth_puppeteer(puppeteer) : puppeteer;
browser_singleton = await pptr.launch({
executablePath: chrome_executable_path,
headless,
args: [
'--enable-extensions',
`--disable-extensions-except=${extension_path}`,
`--load-extension=${extension_path}`,
'--no-default-browser-check',
'--disable-popup-blocking',
'--disable-dev-shm-usage',
'--disable-features=ExtensionManifestV2Disabled,ExtensionManifestV2Unsupported',
'--enable-features=AllowLegacyMV2Extensions'
]
});
return browser_singleton;
}
export async function invoke_extension_action(action_name, action_payload) {
const cfg = get_app_config();
const browser = await get_or_create_browser();
const started_at = Date.now();
const log_enabled = cfg.crawler.log_invoke_action;
if (log_enabled) {
// eslint-disable-next-line no-console
console.log('[invoke_extension_action] start', {
action_name,
has_payload: !!action_payload,
keys: action_payload && typeof action_payload === 'object' ? Object.keys(action_payload).slice(0, 20) : []
});
}
let page = null;
try {
page = await browser.newPage();
if (cfg.crawler.enable_stealth) {
await apply_page_stealth_defaults(page);
}
await page.goto('about:blank');
// 尝试先打开 chrome://extensions 触发扩展初始化(某些环境下扩展 target 不会立刻出现)
try {
await page.goto('chrome://extensions/', { waitUntil: 'domcontentloaded' });
} catch (err) {
// ignore
}
const extension_id = await wait_for_extension_id(browser, 15000);
if (!extension_id) {
throw new Error(
'未找到扩展 extension_idChrome 未加载扩展常见原因MV2 被禁用/企业策略未生效/CRX_SRC_PATH 不正确/使用了 headless'
);
}
const bridge_url = `chrome-extension://${extension_id}/bridge/bridge.html`;
await page.goto(bridge_url, { waitUntil: 'domcontentloaded' });
const timeout_ms = get_action_timeout_ms();
const action_res = await page.evaluate(
async (action, payload, timeout) => {
function with_timeout(promise, timeout_ms_inner) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => reject(new Error('action_timeout')), timeout_ms_inner);
promise
.then((v) => {
clearTimeout(timer);
resolve(v);
})
.catch((e) => {
clearTimeout(timer);
reject(e);
});
});
}
if (!window.server_bridge_invoke) {
throw new Error('bridge 未注入 window.server_bridge_invoke');
}
return await with_timeout(window.server_bridge_invoke(action, payload), timeout);
},
action_name,
action_payload || {},
timeout_ms
);
if (log_enabled) {
// eslint-disable-next-line no-console
console.log('[invoke_extension_action] ok', { action_name, cost_ms: Date.now() - started_at });
}
return action_res;
} catch (err) {
if (log_enabled) {
// eslint-disable-next-line no-console
console.log('[invoke_extension_action] fail', {
action_name,
cost_ms: Date.now() - started_at,
error: (err && err.message) || String(err)
});
}
throw err;
} finally {
if (page) {
try {
await page.close();
} catch (err) {
// ignore
}
}
if (cfg.crawler.auto_close_browser) {
try {
await browser.close();
} catch (err) {
// ignore
}
browser_singleton = null;
}
}
}