diff --git a/src/browser/cdp.test.ts b/src/browser/cdp.test.ts new file mode 100644 index 00000000..480f32ae --- /dev/null +++ b/src/browser/cdp.test.ts @@ -0,0 +1,66 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { MockWebSocket } = vi.hoisted(() => { + class MockWebSocket { + static OPEN = 1; + readyState = 1; + private handlers = new Map void>>(); + + constructor(_url: string) { + queueMicrotask(() => this.emit('open')); + } + + on(event: string, handler: (...args: any[]) => void): void { + const handlers = this.handlers.get(event) ?? []; + handlers.push(handler); + this.handlers.set(event, handlers); + } + + send(_message: string): void {} + + close(): void { + this.readyState = 3; + } + + private emit(event: string, ...args: any[]): void { + for (const handler of this.handlers.get(event) ?? []) { + handler(...args); + } + } + } + + return { MockWebSocket }; +}); + +vi.mock('ws', () => ({ + WebSocket: MockWebSocket, +})); + +import { CDPBridge } from './cdp.js'; + +describe('CDPBridge cookies', () => { + beforeEach(() => { + vi.unstubAllEnvs(); + }); + + it('filters cookies by actual domain match instead of substring match', async () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', 'ws://127.0.0.1:9222/devtools/page/1'); + + const bridge = new CDPBridge(); + vi.spyOn(bridge, 'send').mockResolvedValue({ + cookies: [ + { name: 'good', value: '1', domain: '.example.com' }, + { name: 'exact', value: '2', domain: 'example.com' }, + { name: 'bad', value: '3', domain: 'notexample.com' }, + ], + }); + + const page = await bridge.connect(); + const cookies = await page.getCookies({ domain: 'example.com' }); + + expect(cookies).toEqual([ + { name: 'good', value: '1', domain: '.example.com' }, + { name: 'exact', value: '2', domain: 'example.com' }, + ]); + }); +}); diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 04966bfb..335671fd 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -218,7 +218,7 @@ class CDPPage implements IPage { const cookies = isRecord(result) && Array.isArray(result.cookies) ? result.cookies : []; const domain = opts.domain; return domain - ? cookies.filter((cookie): cookie is BrowserCookie => isCookie(cookie) && cookie.domain.includes(domain)) + ? cookies.filter((cookie): cookie is BrowserCookie => isCookie(cookie) && matchesCookieDomain(cookie.domain, domain)) : cookies; } @@ -346,6 +346,13 @@ function isCookie(value: unknown): value is BrowserCookie { && typeof value.domain === 'string'; } +function matchesCookieDomain(cookieDomain: string, targetDomain: string): boolean { + const normalizedCookieDomain = cookieDomain.replace(/^\./, '').toLowerCase(); + const normalizedTargetDomain = targetDomain.replace(/^\./, '').toLowerCase(); + return normalizedTargetDomain === normalizedCookieDomain + || normalizedTargetDomain.endsWith(`.${normalizedCookieDomain}`); +} + // ── CDP target selection (unchanged) ── function selectCDPTarget(targets: CDPTarget[]): CDPTarget | undefined { diff --git a/src/download/index.test.ts b/src/download/index.test.ts index 6329f173..63410f1c 100644 --- a/src/download/index.test.ts +++ b/src/download/index.test.ts @@ -14,15 +14,15 @@ afterEach(async () => { servers.length = 0; }); -async function startServer(handler: http.RequestListener): Promise { +async function startServer(handler: http.RequestListener, hostname = '127.0.0.1'): Promise { const server = http.createServer(handler); servers.push(server); - await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + await new Promise((resolve) => server.listen(0, hostname, resolve)); const address = server.address(); if (!address || typeof address === 'string') { throw new Error('Failed to start test server'); } - return `http://127.0.0.1:${address.port}`; + return `http://${hostname}:${address.port}`; } describe('download helpers', () => { @@ -56,4 +56,52 @@ describe('download helpers', () => { }); expect(fs.existsSync(destPath)).toBe(false); }); + + it('does not forward cookies across cross-domain redirects', async () => { + let forwardedCookie: string | undefined; + const targetUrl = await startServer((req, res) => { + forwardedCookie = req.headers.cookie; + res.statusCode = 200; + res.end('ok'); + }, 'localhost'); + + const redirectUrl = await startServer((_req, res) => { + res.statusCode = 302; + res.setHeader('Location', targetUrl); + res.end(); + }); + + const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'opencli-download-')); + const destPath = path.join(tempDir, 'redirect.txt'); + const result = await httpDownload(`${redirectUrl}/start`, destPath, { cookies: 'sid=abc' }); + + expect(result).toEqual({ success: true, size: 2 }); + expect(forwardedCookie).toBeUndefined(); + expect(fs.readFileSync(destPath, 'utf8')).toBe('ok'); + }); + + it('does not forward cookie headers across cross-domain redirects', async () => { + let forwardedCookie: string | undefined; + const targetUrl = await startServer((req, res) => { + forwardedCookie = req.headers.cookie; + res.statusCode = 200; + res.end('ok'); + }, 'localhost'); + + const redirectUrl = await startServer((_req, res) => { + res.statusCode = 302; + res.setHeader('Location', targetUrl); + res.end(); + }); + + const tempDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'opencli-download-')); + const destPath = path.join(tempDir, 'redirect-header.txt'); + const result = await httpDownload(`${redirectUrl}/start`, destPath, { + headers: { Cookie: 'sid=header-cookie' }, + }); + + expect(result).toEqual({ success: true, size: 2 }); + expect(forwardedCookie).toBeUndefined(); + expect(fs.readFileSync(destPath, 'utf8')).toBe('ok'); + }); }); diff --git a/src/download/index.ts b/src/download/index.ts index 62554fb1..71064551 100644 --- a/src/download/index.ts +++ b/src/download/index.ts @@ -114,10 +114,17 @@ export async function httpDownload( resolve({ success: false, size: 0, error: `Too many redirects (> ${maxRedirects})` }); return; } + const redirectUrl = resolveRedirectUrl(url, response.headers.location); + const originalHost = new URL(url).hostname; + const redirectHost = new URL(redirectUrl).hostname; + // Do not forward cookies when a redirect crosses host boundaries. + const redirectOptions = originalHost === redirectHost + ? options + : { ...options, cookies: undefined, headers: stripCookieHeaders(options.headers) }; httpDownload( - resolveRedirectUrl(url, response.headers.location), + redirectUrl, destPath, - options, + redirectOptions, redirectCount + 1, ).then(resolve); return; @@ -167,6 +174,13 @@ export function resolveRedirectUrl(currentUrl: string, location: string): string return new URL(location, currentUrl).toString(); } +function stripCookieHeaders(headers?: Record): Record | undefined { + if (!headers) return headers; + return Object.fromEntries( + Object.entries(headers).filter(([key]) => key.toLowerCase() !== 'cookie'), + ); +} + /** * Export cookies to Netscape format for yt-dlp. */ diff --git a/src/pipeline/steps/download.test.ts b/src/pipeline/steps/download.test.ts new file mode 100644 index 00000000..ffd49150 --- /dev/null +++ b/src/pipeline/steps/download.test.ts @@ -0,0 +1,134 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; + +const { mockHttpDownload, mockYtdlpDownload, mockExportCookiesToNetscape } = vi.hoisted(() => ({ + mockHttpDownload: vi.fn(), + mockYtdlpDownload: vi.fn(), + mockExportCookiesToNetscape: vi.fn(), +})); + +vi.mock('../../download/index.js', async () => { + const actual = await vi.importActual('../../download/index.js'); + return { + ...actual, + httpDownload: mockHttpDownload, + ytdlpDownload: mockYtdlpDownload, + exportCookiesToNetscape: mockExportCookiesToNetscape, + }; +}); + +import { stepDownload } from './download.js'; + +function createMockPage(getCookies: IPage['getCookies']): IPage { + return { + goto: vi.fn(), + evaluate: vi.fn().mockResolvedValue(null), + getCookies, + snapshot: vi.fn().mockResolvedValue(''), + click: vi.fn(), + typeText: vi.fn(), + pressKey: vi.fn(), + scrollTo: vi.fn(), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn(), + tabs: vi.fn().mockResolvedValue([]), + closeTab: vi.fn(), + newTab: vi.fn(), + selectTab: vi.fn(), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn(), + autoScroll: vi.fn(), + installInterceptor: vi.fn(), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + screenshot: vi.fn().mockResolvedValue(''), + }; +} + +describe('stepDownload', () => { + beforeEach(() => { + mockHttpDownload.mockReset(); + mockHttpDownload.mockResolvedValue({ success: true, size: 2 }); + mockYtdlpDownload.mockReset(); + mockYtdlpDownload.mockResolvedValue({ success: true, size: 2 }); + mockExportCookiesToNetscape.mockReset(); + }); + + it('scopes browser cookies to each direct-download target domain', async () => { + const page = createMockPage(vi.fn().mockImplementation(async (opts?: { domain?: string }) => { + const domain = opts?.domain ?? 'unknown'; + return [{ name: 'sid', value: domain, domain }]; + })); + + await stepDownload( + page, + { + url: '${{ item.url }}', + dir: '/tmp/opencli-download-test', + filename: '${{ index }}.txt', + progress: false, + concurrency: 1, + }, + [ + { url: 'https://a.example/file-1.txt' }, + { url: 'https://b.example/file-2.txt' }, + ], + {}, + ); + + expect(mockHttpDownload).toHaveBeenNthCalledWith( + 1, + 'https://a.example/file-1.txt', + '/tmp/opencli-download-test/0.txt', + expect.objectContaining({ cookies: 'sid=a.example' }), + ); + expect(mockHttpDownload).toHaveBeenNthCalledWith( + 2, + 'https://b.example/file-2.txt', + '/tmp/opencli-download-test/1.txt', + expect.objectContaining({ cookies: 'sid=b.example' }), + ); + }); + + it('builds yt-dlp cookies from all target domains instead of only the first item', async () => { + const getCookies = vi.fn().mockImplementation(async (opts?: { domain?: string }) => { + const domain = opts?.domain ?? 'unknown'; + return [{ + name: `sid-${domain}`, + value: domain, + domain, + path: '/', + secure: false, + httpOnly: false, + }]; + }); + const page = createMockPage(getCookies); + + await stepDownload( + page, + { + url: '${{ item.url }}', + dir: '/tmp/opencli-download-test', + filename: '${{ index }}.mp4', + progress: false, + concurrency: 1, + }, + [ + { url: 'https://www.youtube.com/watch?v=one' }, + { url: 'https://www.bilibili.com/video/BV1xx411c7mD' }, + ], + {}, + ); + + expect(getCookies).toHaveBeenCalledWith({ domain: 'www.youtube.com' }); + expect(getCookies).toHaveBeenCalledWith({ domain: 'www.bilibili.com' }); + expect(mockExportCookiesToNetscape).toHaveBeenCalledWith( + expect.arrayContaining([ + expect.objectContaining({ name: 'sid-www.youtube.com', domain: 'www.youtube.com' }), + expect.objectContaining({ name: 'sid-www.bilibili.com', domain: 'www.bilibili.com' }), + ]), + expect.any(String), + ); + expect(mockYtdlpDownload).toHaveBeenCalledTimes(2); + }); +}); diff --git a/src/pipeline/steps/download.ts b/src/pipeline/steps/download.ts index 99d09400..57eb5176 100644 --- a/src/pipeline/steps/download.ts +++ b/src/pipeline/steps/download.ts @@ -41,9 +41,9 @@ export interface DownloadResult { /** * Extract cookies from browser page. */ -async function extractBrowserCookies(page: IPage, domain?: string): Promise { +async function extractBrowserCookies(page: IPage, domain: string): Promise { try { - const cookies = await page.getCookies(domain ? { domain } : {}); + const cookies = await page.getCookies({ domain }); return formatCookieHeader(cookies); } catch { return ''; @@ -74,6 +74,16 @@ async function extractCookiesArray( } } +function dedupeCookies( + cookies: Array<{ name: string; value: string; domain: string; path: string; secure: boolean; httpOnly: boolean }>, +): Array<{ name: string; value: string; domain: string; path: string; secure: boolean; httpOnly: boolean }> { + const deduped = new Map(); + for (const cookie of cookies) { + deduped.set(`${cookie.domain}\t${cookie.path}\t${cookie.name}`, cookie); + } + return [...deduped.values()]; +} + /** * Download step handler for YAML pipelines. * @@ -123,23 +133,29 @@ export async function stepDownload( // Create progress tracker const tracker = new DownloadProgressTracker(items.length, showProgress); - // Extract cookies if browser is available - let cookies = ''; + // Cache cookie lookups per domain so mixed-domain batches stay isolated without repeated browser calls. + const cookieHeaderCache = new Map>(); let cookiesFile: string | undefined; if (page) { - cookies = await extractBrowserCookies(page); - // For yt-dlp, we need to export cookies to Netscape format if (useYtdlp || items.some((item, index) => { const url = String(render(urlTemplate, { args, data, item, index })); return requiresYtdlp(url); })) { try { - // Try to get domain from first URL - const firstUrl = String(render(urlTemplate, { args, data, item: items[0], index: 0 })); - const domain = new URL(firstUrl).hostname; - const cookiesArray = await extractCookiesArray(page, domain); + const ytdlpDomains = [...new Set(items.flatMap((item, index) => { + const url = String(render(urlTemplate, { args, data, item, index })); + if (!useYtdlp && !requiresYtdlp(url)) return []; + try { + return [new URL(url).hostname]; + } catch { + return []; + } + }))]; + const cookiesArray = dedupeCookies( + (await Promise.all(ytdlpDomains.map((domain) => extractCookiesArray(page, domain)))).flat(), + ); if (cookiesArray.length > 0) { const tempDir = getTempDir(); @@ -234,6 +250,21 @@ export async function stepDownload( } } else { // Direct HTTP download + let cookies = ''; + if (page) { + try { + const targetDomain = new URL(url).hostname; + let cookiePromise = cookieHeaderCache.get(targetDomain); + if (!cookiePromise) { + cookiePromise = extractBrowserCookies(page, targetDomain); + cookieHeaderCache.set(targetDomain, cookiePromise); + } + cookies = await cookiePromise; + } catch { + cookies = ''; + } + } + result = await httpDownload(url, destPath, { cookies, timeout,