202 lines
5.8 KiB
JavaScript
202 lines
5.8 KiB
JavaScript
import fs from 'node:fs';
|
||
import path from 'node:path';
|
||
import puppeteer from 'puppeteer';
|
||
import { get_app_config } from '../../config/app_config.js';
|
||
import { apply_page_stealth_defaults, get_stealth_puppeteer } from './puppeteer_stealth.js';
|
||
|
||
let browser_singleton = null;
|
||
|
||
function get_action_timeout_ms() {
|
||
const cfg = get_app_config();
|
||
return cfg.crawler.action_timeout_ms;
|
||
}
|
||
|
||
function get_crx_src_path() {
|
||
const cfg = get_app_config();
|
||
return cfg.crawler.crx_src_path;
|
||
}
|
||
|
||
function get_extension_id_from_targets(targets) {
|
||
for (const target of targets) {
|
||
const url = target.url();
|
||
if (!url) continue;
|
||
if (url.startsWith('chrome-extension://')) {
|
||
const match = url.match(/^chrome-extension:\/\/([^/]+)\//);
|
||
if (match && match[1]) return match[1];
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
async function wait_for_extension_id(browser, timeout_ms) {
|
||
const existing = get_extension_id_from_targets(browser.targets());
|
||
if (existing) {
|
||
return existing;
|
||
}
|
||
|
||
const target = await browser
|
||
.waitForTarget((t) => {
|
||
const url = t.url();
|
||
return typeof url === 'string' && url.startsWith('chrome-extension://');
|
||
}, { timeout: timeout_ms })
|
||
.catch(() => null);
|
||
|
||
if (!target) {
|
||
return null;
|
||
}
|
||
|
||
return get_extension_id_from_targets([target]);
|
||
}
|
||
|
||
function get_chrome_executable_path() {
|
||
const cfg = get_app_config();
|
||
return path.resolve(cfg.crawler.chrome_executable_path);
|
||
}
|
||
|
||
export async function get_or_create_browser() {
|
||
if (browser_singleton) {
|
||
return browser_singleton;
|
||
}
|
||
|
||
const chrome_executable_path = get_chrome_executable_path();
|
||
if (!fs.existsSync(chrome_executable_path)) {
|
||
throw new Error(`Chrome 不存在: ${chrome_executable_path}`);
|
||
}
|
||
|
||
const raw_extension_path = path.resolve(get_crx_src_path());
|
||
const manifest_path = path.resolve(raw_extension_path, 'manifest.json');
|
||
if (!fs.existsSync(manifest_path)) {
|
||
throw new Error(`扩展 manifest.json 不存在: ${manifest_path}`);
|
||
}
|
||
|
||
const cfg = get_app_config();
|
||
const extension_path = raw_extension_path.replace(/\\/g, '/');
|
||
const headless = cfg.crawler.puppeteer_headless;
|
||
|
||
const cfg2 = get_app_config();
|
||
const pptr = cfg2.crawler.enable_stealth ? get_stealth_puppeteer(puppeteer) : puppeteer;
|
||
|
||
browser_singleton = await pptr.launch({
|
||
executablePath: chrome_executable_path,
|
||
headless,
|
||
args: [
|
||
'--enable-extensions',
|
||
`--disable-extensions-except=${extension_path}`,
|
||
`--load-extension=${extension_path}`,
|
||
'--no-default-browser-check',
|
||
'--disable-popup-blocking',
|
||
'--disable-dev-shm-usage',
|
||
'--disable-features=ExtensionManifestV2Disabled,ExtensionManifestV2Unsupported',
|
||
'--enable-features=AllowLegacyMV2Extensions'
|
||
]
|
||
});
|
||
|
||
return browser_singleton;
|
||
}
|
||
|
||
export async function invoke_extension_action(action_name, action_payload) {
|
||
const cfg = get_app_config();
|
||
const browser = await get_or_create_browser();
|
||
|
||
const started_at = Date.now();
|
||
|
||
const log_enabled = cfg.crawler.log_invoke_action;
|
||
if (log_enabled) {
|
||
// eslint-disable-next-line no-console
|
||
console.log('[invoke_extension_action] start', {
|
||
action_name,
|
||
has_payload: !!action_payload,
|
||
keys: action_payload && typeof action_payload === 'object' ? Object.keys(action_payload).slice(0, 20) : []
|
||
});
|
||
}
|
||
|
||
let page = null;
|
||
try {
|
||
page = await browser.newPage();
|
||
if (cfg.crawler.enable_stealth) {
|
||
await apply_page_stealth_defaults(page);
|
||
}
|
||
await page.goto('about:blank');
|
||
|
||
// 尝试先打开 chrome://extensions 触发扩展初始化(某些环境下扩展 target 不会立刻出现)
|
||
try {
|
||
await page.goto('chrome://extensions/', { waitUntil: 'domcontentloaded' });
|
||
} catch (err) {
|
||
// ignore
|
||
}
|
||
|
||
const extension_id = await wait_for_extension_id(browser, 15000);
|
||
if (!extension_id) {
|
||
throw new Error(
|
||
'未找到扩展 extension_id:Chrome 未加载扩展(常见原因:MV2 被禁用/企业策略未生效/CRX_SRC_PATH 不正确/使用了 headless)'
|
||
);
|
||
}
|
||
|
||
const bridge_url = `chrome-extension://${extension_id}/bridge/bridge.html`;
|
||
await page.goto(bridge_url, { waitUntil: 'domcontentloaded' });
|
||
|
||
const timeout_ms = get_action_timeout_ms();
|
||
const action_res = await page.evaluate(
|
||
async (action, payload, timeout) => {
|
||
function with_timeout(promise, timeout_ms_inner) {
|
||
return new Promise((resolve, reject) => {
|
||
const timer = setTimeout(() => reject(new Error('action_timeout')), timeout_ms_inner);
|
||
promise
|
||
.then((v) => {
|
||
clearTimeout(timer);
|
||
resolve(v);
|
||
})
|
||
.catch((e) => {
|
||
clearTimeout(timer);
|
||
reject(e);
|
||
});
|
||
});
|
||
}
|
||
|
||
if (!window.server_bridge_invoke) {
|
||
throw new Error('bridge 未注入 window.server_bridge_invoke');
|
||
}
|
||
|
||
return await with_timeout(window.server_bridge_invoke(action, payload), timeout);
|
||
},
|
||
action_name,
|
||
action_payload || {},
|
||
timeout_ms
|
||
);
|
||
|
||
if (log_enabled) {
|
||
// eslint-disable-next-line no-console
|
||
console.log('[invoke_extension_action] ok', { action_name, cost_ms: Date.now() - started_at });
|
||
}
|
||
|
||
return action_res;
|
||
} catch (err) {
|
||
if (log_enabled) {
|
||
// eslint-disable-next-line no-console
|
||
console.log('[invoke_extension_action] fail', {
|
||
action_name,
|
||
cost_ms: Date.now() - started_at,
|
||
error: (err && err.message) || String(err)
|
||
});
|
||
}
|
||
throw err;
|
||
} finally {
|
||
if (page) {
|
||
try {
|
||
await page.close();
|
||
} catch (err) {
|
||
// ignore
|
||
}
|
||
}
|
||
|
||
if (cfg.crawler.auto_close_browser) {
|
||
try {
|
||
await browser.close();
|
||
} catch (err) {
|
||
// ignore
|
||
}
|
||
browser_singleton = null;
|
||
}
|
||
}
|
||
}
|