From b02f079d7d0716f63d891b22bff95e671a3b01d5 Mon Sep 17 00:00:00 2001 From: jorge guerrero Date: Mon, 23 Feb 2026 21:27:42 -0500 Subject: [PATCH] fs: preserve raw filename bytes in glob buffer mode Signed-off-by: jorge guerrero --- doc/api/fs.md | 12 ++- lib/internal/fs/glob.js | 180 +++++++++++++++++++++++---------- test/parallel/test-fs-glob.mjs | 105 ++++++++++++++++++- 3 files changed, 242 insertions(+), 55 deletions(-) diff --git a/doc/api/fs.md b/doc/api/fs.md index 66d29bc80fbf18..b24a72d1f2954c 100644 --- a/doc/api/fs.md +++ b/doc/api/fs.md @@ -1115,6 +1115,9 @@ changes: If a string array is provided, each string should be a glob pattern that specifies paths to exclude. Note: Negation patterns (e.g., '!foo.js') are not supported. + * `encoding` {string} The path encoding. If set to `'buffer'`, the iterator + yields `Buffer` paths (or `Dirent` entries with `Buffer` names when + `withFileTypes` is `true`). **Default:** `'utf8'`. * `withFileTypes` {boolean} `true` if the glob should return paths as Dirents, `false` otherwise. **Default:** `false`. * Returns: {AsyncIterator} An AsyncIterator that yields the paths of files @@ -3222,11 +3225,15 @@ changes: * `exclude` {Function|string\[]} Function to filter out files/directories or a list of glob patterns to be excluded. If a function is provided, return `true` to exclude the item, `false` to include it. **Default:** `undefined`. + * `encoding` {string} The path encoding. If set to `'buffer'`, `matches` + contains `Buffer` paths (or `Dirent` entries with `Buffer` names when + `withFileTypes` is `true`). **Default:** `'utf8'`. * `withFileTypes` {boolean} `true` if the glob should return paths as Dirents, `false` otherwise. **Default:** `false`. * `callback` {Function} * `err` {Error} + * `matches` {string\[]|Buffer\[]|fs.Dirent\[]} * Retrieves the files matching the specified pattern. @@ -5786,9 +5793,12 @@ changes: * `exclude` {Function|string\[]} Function to filter out files/directories or a list of glob patterns to be excluded. If a function is provided, return `true` to exclude the item, `false` to include it. **Default:** `undefined`. + * `encoding` {string} The path encoding. If set to `'buffer'`, returns + `Buffer` paths (or `Dirent` entries with `Buffer` names when + `withFileTypes` is `true`). **Default:** `'utf8'`. * `withFileTypes` {boolean} `true` if the glob should return paths as Dirents, `false` otherwise. **Default:** `false`. -* Returns: {string\[]} paths of files that match the pattern. +* Returns: {string\[]|Buffer\[]|fs.Dirent\[]} paths of files that match the pattern. ```mjs import { globSync } from 'node:fs'; diff --git a/lib/internal/fs/glob.js b/lib/internal/fs/glob.js index 526efd4c010d7c..5ac5b8467b789b 100644 --- a/lib/internal/fs/glob.js +++ b/lib/internal/fs/glob.js @@ -18,7 +18,8 @@ const { const { lstatSync, readdirSync } = require('fs'); const { lstat, readdir } = require('fs/promises'); -const { join, resolve, basename, isAbsolute, dirname } = require('path'); +const { join: pathJoin, resolve, basename, isAbsolute, dirname } = require('path'); +const { Buffer } = require('buffer'); const { kEmptyObject, @@ -30,7 +31,7 @@ const { validateString, validateStringArray, } = require('internal/validators'); -const { DirentFromStats } = require('internal/fs/utils'); +const { DirentFromStats, assertEncoding } = require('internal/fs/utils'); const { codes: { ERR_INVALID_ARG_TYPE, @@ -47,31 +48,33 @@ function lazyMinimatch() { } /** - * @param {string} path + * @param {string|Buffer} path + * @param {string} pathForName * @returns {Promise} */ -async function getDirent(path) { +async function getDirent(path, pathForName = path) { let stat; try { stat = await lstat(path); } catch { return null; } - return new DirentFromStats(basename(path), stat, dirname(path)); + return new DirentFromStats(basename(pathForName), stat, dirname(pathForName)); } /** - * @param {string} path + * @param {string|Buffer} path + * @param {string} pathForName * @returns {DirentFromStats|null} */ -function getDirentSync(path) { +function getDirentSync(path, pathForName = path) { let stat; try { stat = lstatSync(path); } catch { return null; } - return new DirentFromStats(basename(path), stat, dirname(path)); + return new DirentFromStats(basename(pathForName), stat, dirname(pathForName)); } /** @@ -117,13 +120,25 @@ class Cache { #cache = new SafeMap(); #statsCache = new SafeMap(); #readdirCache = new SafeMap(); + #encoding; + + constructor(encoding = 'utf8') { + this.#encoding = encoding; + } + + #toFsPath(path) { + if (this.#encoding === 'buffer') { + return Buffer.from(path, 'latin1'); + } + return path; + } stat(path) { const cached = this.#statsCache.get(path); if (cached) { return cached; } - const promise = getDirent(path); + const promise = getDirent(this.#toFsPath(path), path); this.#statsCache.set(path, promise); return promise; } @@ -133,7 +148,7 @@ class Cache { if (cached && !(cached instanceof Promise)) { return cached; } - const val = getDirentSync(path); + const val = getDirentSync(this.#toFsPath(path), path); this.#statsCache.set(path, val); return val; } @@ -145,7 +160,15 @@ class Cache { if (cached) { return cached; } - const promise = PromisePrototypeThen(readdir(path, { __proto__: null, withFileTypes: true }), null, () => []); + const promise = PromisePrototypeThen( + readdir(this.#toFsPath(path), { + __proto__: null, + withFileTypes: true, + encoding: this.#encoding, + }), + null, + () => [], + ); this.#readdirCache.set(path, promise); return promise; } @@ -156,7 +179,11 @@ class Cache { } let val; try { - val = readdirSync(path, { __proto__: null, withFileTypes: true }); + val = readdirSync(this.#toFsPath(path), { + __proto__: null, + withFileTypes: true, + encoding: this.#encoding, + }); } catch { val = []; } @@ -260,17 +287,30 @@ class ResultSet extends SafeSet { class Glob { #root; #exclude; - #cache = new Cache(); + #cache; #results = new ResultSet(); #queue = []; #subpatterns = new SafeMap(); #patterns; #withFileTypes; #isExcluded = () => false; + #encoding; constructor(pattern, options = kEmptyObject) { validateObject(options, 'options'); - const { exclude, cwd, withFileTypes } = options; + const { exclude, cwd, withFileTypes, encoding } = options; + if (encoding != null) { + if (encoding !== 'buffer') { + assertEncoding(encoding); + } + this.#encoding = encoding; + } else { + this.#encoding = 'utf8'; + } + this.#cache = new Cache(this.#encoding); this.#root = toPathIfFileURL(cwd) ?? '.'; + if (this.#encoding === 'buffer') { + this.#root = Buffer.from(this.#root).toString('latin1'); + } this.#withFileTypes = !!withFileTypes; if (exclude != null) { validateStringArrayOrFunction(exclude, 'options.exclude'); @@ -306,6 +346,29 @@ class Glob { ))); } + #toPathForMatching(path) { + if (Buffer.isBuffer(path)) { + return path.toString('latin1'); + } + return path; + } + + #toResultPath(path) { + if (this.#encoding === 'buffer') { + return Buffer.from(path, 'latin1'); + } + return path; + } + + #toResultDirent(dirent) { + if (this.#encoding === 'buffer' && + dirent !== null && + !Buffer.isBuffer(dirent.name)) { + dirent.name = Buffer.from(dirent.name, 'latin1'); + } + return dirent; + } + globSync() { ArrayPrototypePush(this.#queue, { __proto__: null, path: '.', patterns: this.#patterns }); while (this.#queue.length > 0) { @@ -317,14 +380,18 @@ class Glob { .forEach((patterns, path) => ArrayPrototypePush(this.#queue, { __proto__: null, path, patterns })); this.#subpatterns.clear(); } - return ArrayFrom( + const results = ArrayFrom( this.#results, - this.#withFileTypes ? (path) => this.#cache.statSync( + this.#withFileTypes ? (path) => this.#toResultDirent(this.#cache.statSync( isAbsolute(path) ? path : - join(this.#root, path), - ) : undefined, + pathJoin(this.#root, path), + )) : undefined, ); + if (this.#encoding === 'buffer' && !this.#withFileTypes) { + return results.map((r) => this.#toResultPath(r)); + } + return results; } #addSubpattern(path, pattern) { if (this.#isExcluded(path)) { @@ -341,7 +408,7 @@ class Glob { if (this.#withFileTypes) { const stat = this.#cache.statSync(path); if (stat !== null) { - if (this.#exclude(stat)) { + if (this.#exclude(this.#toResultDirent(stat))) { return; } } @@ -394,9 +461,9 @@ class Glob { if (isLast && typeof pattern.at(-1) === 'string') { // Add result if it exists const p = pattern.at(-1); - const stat = this.#cache.statSync(join(fullpath, p)); + const stat = this.#cache.statSync(pathJoin(fullpath, p)); if (stat && (p || isDirectory)) { - this.#results.add(join(path, p)); + this.#results.add(pathJoin(path, p)); } if (pattern.indexes.size === 1 && pattern.indexes.has(last)) { return; @@ -415,7 +482,7 @@ class Glob { let children; const firstPattern = pattern.indexes.size === 1 && pattern.at(pattern.indexes.values().next().value); if (typeof firstPattern === 'string') { - const stat = this.#cache.statSync(join(fullpath, firstPattern)); + const stat = this.#cache.statSync(pathJoin(fullpath, firstPattern)); if (stat) { stat.name = firstPattern; children = [stat]; @@ -428,8 +495,9 @@ class Glob { for (let i = 0; i < children.length; i++) { const entry = children[i]; - const entryPath = join(path, entry.name); - this.#cache.addToStatCache(join(fullpath, entry.name), entry); + const entryNameStr = this.#toPathForMatching(entry.name); + const entryPath = pathJoin(path, entryNameStr); + this.#cache.addToStatCache(pathJoin(fullpath, entryNameStr), entry); const subPatterns = new SafeSet(); const nSymlinks = new SafeSet(); @@ -444,18 +512,19 @@ class Glob { const fromSymlink = pattern.symlinks.has(index); if (current === lazyMinimatch().GLOBSTAR) { - const isDot = entry.name[0] === '.'; - const nextMatches = pattern.test(nextIndex, entry.name); + const isDot = entryNameStr[0] === '.'; + const nextMatches = pattern.test(nextIndex, entryNameStr); let nextNonGlobIndex = nextIndex; while (pattern.at(nextNonGlobIndex) === lazyMinimatch().GLOBSTAR) { nextNonGlobIndex++; } - const matchesDot = isDot && pattern.test(nextNonGlobIndex, entry.name); + const matchesDot = isDot && pattern.test(nextNonGlobIndex, entryNameStr); if ((isDot && !matchesDot) || - (this.#exclude && this.#exclude(this.#withFileTypes ? entry : entry.name))) { + (this.#exclude && + this.#exclude(this.#withFileTypes ? this.#toResultDirent(entry) : entryNameStr))) { continue; } if (!fromSymlink && entry.isDirectory()) { @@ -492,7 +561,7 @@ class Glob { // In case pattern is "**/..", // both parent and current directory should be added to the queue // if this is the last pattern, add to results instead - const parent = join(path, '..'); + const parent = pathJoin(path, '..'); if (nextIndex < last) { if (!this.#subpatterns.has(path) && !this.#cache.seen(path, pattern, nextIndex + 1)) { this.#subpatterns.set(path, [pattern.child(new SafeSet().add(nextIndex + 1))]); @@ -513,11 +582,11 @@ class Glob { } } if (typeof current === 'string') { - if (pattern.test(index, entry.name) && index !== last) { + if (pattern.test(index, entryNameStr) && index !== last) { // If current pattern matches entry name // the next pattern is a potential pattern subPatterns.add(nextIndex); - } else if (current === '.' && pattern.test(nextIndex, entry.name)) { + } else if (current === '.' && pattern.test(nextIndex, entryNameStr)) { // If current pattern is ".", proceed to test next pattern if (nextIndex === last) { this.#results.add(entryPath); @@ -526,7 +595,7 @@ class Glob { } } } - if (typeof current === 'object' && pattern.test(index, entry.name)) { + if (typeof current === 'object' && pattern.test(index, entryNameStr)) { // If current pattern is a regex that matches entry name (e.g *.js) // add next pattern to potential patterns, or to results if it's the last pattern if (index === last) { @@ -595,12 +664,12 @@ class Glob { if (isLast && typeof pattern.at(-1) === 'string') { // Add result if it exists const p = pattern.at(-1); - const stat = await this.#cache.stat(join(fullpath, p)); + const stat = await this.#cache.stat(pathJoin(fullpath, p)); if (stat && (p || isDirectory)) { - const result = join(path, p); + const result = pathJoin(path, p); if (!this.#results.has(result)) { if (this.#results.add(result)) { - yield this.#withFileTypes ? stat : result; + yield this.#withFileTypes ? this.#toResultDirent(stat) : this.#toResultPath(result); } } } @@ -613,7 +682,7 @@ class Glob { // if path is ".", add it only if pattern starts with "." or pattern is exactly "**" if (!this.#results.has(path)) { if (this.#results.add(path)) { - yield this.#withFileTypes ? stat : path; + yield this.#withFileTypes ? this.#toResultDirent(stat) : this.#toResultPath(path); } } } @@ -625,7 +694,7 @@ class Glob { let children; const firstPattern = pattern.indexes.size === 1 && pattern.at(pattern.indexes.values().next().value); if (typeof firstPattern === 'string') { - const stat = await this.#cache.stat(join(fullpath, firstPattern)); + const stat = await this.#cache.stat(pathJoin(fullpath, firstPattern)); if (stat) { stat.name = firstPattern; children = [stat]; @@ -638,8 +707,9 @@ class Glob { for (let i = 0; i < children.length; i++) { const entry = children[i]; - const entryPath = join(path, entry.name); - this.#cache.addToStatCache(join(fullpath, entry.name), entry); + const entryNameStr = this.#toPathForMatching(entry.name); + const entryPath = pathJoin(path, entryNameStr); + this.#cache.addToStatCache(pathJoin(fullpath, entryNameStr), entry); const subPatterns = new SafeSet(); const nSymlinks = new SafeSet(); @@ -654,18 +724,18 @@ class Glob { const fromSymlink = pattern.symlinks.has(index); if (current === lazyMinimatch().GLOBSTAR) { - const isDot = entry.name[0] === '.'; - const nextMatches = pattern.test(nextIndex, entry.name); + const isDot = entryNameStr[0] === '.'; + const nextMatches = pattern.test(nextIndex, entryNameStr); let nextNonGlobIndex = nextIndex; while (pattern.at(nextNonGlobIndex) === lazyMinimatch().GLOBSTAR) { nextNonGlobIndex++; } - const matchesDot = isDot && pattern.test(nextNonGlobIndex, entry.name); + const matchesDot = isDot && pattern.test(nextNonGlobIndex, entryNameStr); if ((isDot && !matchesDot) || - (this.#exclude && this.#exclude(this.#withFileTypes ? entry : entry.name))) { + (this.#exclude && this.#exclude(this.#withFileTypes ? this.#toResultDirent(entry) : entryNameStr))) { continue; } if (!fromSymlink && entry.isDirectory()) { @@ -674,7 +744,7 @@ class Glob { } else if (!fromSymlink && index === last) { // If ** is last, add to results if (!this.#results.has(entryPath) && this.#results.add(entryPath)) { - yield this.#withFileTypes ? entry : entryPath; + yield this.#withFileTypes ? this.#toResultDirent(entry) : this.#toResultPath(entryPath); } } @@ -683,7 +753,7 @@ class Glob { if (nextMatches && nextIndex === last && !isLast) { // If next pattern is the last one, add to results if (!this.#results.has(entryPath) && this.#results.add(entryPath)) { - yield this.#withFileTypes ? entry : entryPath; + yield this.#withFileTypes ? this.#toResultDirent(entry) : this.#toResultPath(entryPath); } } else if (nextMatches && entry.isDirectory()) { // Pattern matched, meaning two patterns forward @@ -706,7 +776,7 @@ class Glob { // In case pattern is "**/..", // both parent and current directory should be added to the queue // if this is the last pattern, add to results instead - const parent = join(path, '..'); + const parent = pathJoin(path, '..'); if (nextIndex < last) { if (!this.#subpatterns.has(path) && !this.#cache.seen(path, pattern, nextIndex + 1)) { this.#subpatterns.set(path, [pattern.child(new SafeSet().add(nextIndex + 1))]); @@ -719,7 +789,9 @@ class Glob { this.#cache.add(path, pattern.child(new SafeSet().add(nextIndex))); if (!this.#results.has(path)) { if (this.#results.add(path)) { - yield this.#withFileTypes ? this.#cache.statSync(fullpath) : path; + yield this.#withFileTypes ? + this.#toResultDirent(this.#cache.statSync(fullpath)) : + this.#toResultPath(path); } } } @@ -727,7 +799,9 @@ class Glob { this.#cache.add(parent, pattern.child(new SafeSet().add(nextIndex))); if (!this.#results.has(parent)) { if (this.#results.add(parent)) { - yield this.#withFileTypes ? this.#cache.statSync(join(this.#root, parent)) : parent; + yield this.#withFileTypes ? + this.#toResultDirent(this.#cache.statSync(pathJoin(this.#root, parent))) : + this.#toResultPath(parent); } } } @@ -735,16 +809,16 @@ class Glob { } } if (typeof current === 'string') { - if (pattern.test(index, entry.name) && index !== last) { + if (pattern.test(index, entryNameStr) && index !== last) { // If current pattern matches entry name // the next pattern is a potential pattern subPatterns.add(nextIndex); - } else if (current === '.' && pattern.test(nextIndex, entry.name)) { + } else if (current === '.' && pattern.test(nextIndex, entryNameStr)) { // If current pattern is ".", proceed to test next pattern if (nextIndex === last) { if (!this.#results.has(entryPath)) { if (this.#results.add(entryPath)) { - yield this.#withFileTypes ? entry : entryPath; + yield this.#withFileTypes ? this.#toResultDirent(entry) : this.#toResultPath(entryPath); } } } else { @@ -752,13 +826,13 @@ class Glob { } } } - if (typeof current === 'object' && pattern.test(index, entry.name)) { + if (typeof current === 'object' && pattern.test(index, entryNameStr)) { // If current pattern is a regex that matches entry name (e.g *.js) // add next pattern to potential patterns, or to results if it's the last pattern if (index === last) { if (!this.#results.has(entryPath)) { if (this.#results.add(entryPath)) { - yield this.#withFileTypes ? entry : entryPath; + yield this.#withFileTypes ? this.#toResultDirent(entry) : this.#toResultPath(entryPath); } } } else if (entry.isDirectory()) { diff --git a/test/parallel/test-fs-glob.mjs b/test/parallel/test-fs-glob.mjs index 74791deba373e3..eff67fb36c0894 100644 --- a/test/parallel/test-fs-glob.mjs +++ b/test/parallel/test-fs-glob.mjs @@ -2,7 +2,7 @@ import * as common from '../common/index.mjs'; import tmpdir from '../common/tmpdir.js'; import { resolve, dirname, sep, relative, join, isAbsolute } from 'node:path'; import { mkdir, writeFile, symlink, glob as asyncGlob } from 'node:fs/promises'; -import { glob, globSync, Dirent, chmodSync, writeFileSync, rmSync } from 'node:fs'; +import { glob, globSync, Dirent, chmodSync, writeFileSync, rmSync, mkdirSync } from 'node:fs'; import { test, describe } from 'node:test'; import { pathToFileURL } from 'node:url'; import { promisify } from 'node:util'; @@ -561,3 +561,106 @@ describe('globSync - ENOTDIR', function() { } }); }); + +describe('glob - encoding option', function() { + test('globSync with encoding buffer preserves non-UTF-8 bytes', { + skip: common.isWindows || common.isMacOS, + }, () => { + const cwd = tmpdir.resolve('encoding-buffer-nonutf8'); + mkdirSync(cwd, { recursive: true }); + const filename = Buffer.from([0xe9]); + const filepath = Buffer.concat([ + Buffer.from(cwd), + Buffer.from(sep), + filename, + ]); + writeFileSync(filepath, ''); + + const actual = globSync('[^a-z]', { cwd, encoding: 'buffer' }); + assert.deepStrictEqual(actual, [filename]); + }); + + test('globSync with encoding buffer traverses non-UTF-8 directories', { + skip: common.isWindows || common.isMacOS, + }, () => { + const cwd = tmpdir.resolve('encoding-buffer-nonutf8-dir'); + mkdirSync(cwd, { recursive: true }); + + const dir = Buffer.from([0xe9]); + const file = Buffer.from('x'); + const dirPath = Buffer.concat([Buffer.from(cwd), Buffer.from(sep), dir]); + const filePath = Buffer.concat([dirPath, Buffer.from(sep), file]); + + mkdirSync(dirPath, { recursive: true }); + writeFileSync(filePath, ''); + + const actual = globSync('*/*', { cwd, encoding: 'buffer' }); + const expected = Buffer.concat([dir, Buffer.from(sep), file]); + assert.deepStrictEqual(actual, [expected]); + }); + + test('globSync with encoding buffer returns Buffer results', () => { + const actual = globSync('a/**', { cwd: fixtureDir, encoding: 'buffer' }); + assert.ok(actual.length > 0); + assert.ok(actual.every((item) => Buffer.isBuffer(item))); + }); + + test('glob with encoding buffer returns Buffer results', async () => { + const promisified = promisify(glob); + const actual = await promisified('a/**', { cwd: fixtureDir, encoding: 'buffer' }); + assert.ok(actual.length > 0); + assert.ok(actual.every((item) => Buffer.isBuffer(item))); + }); + + test('fsPromises.glob with encoding buffer returns Buffer results', async () => { + const actual = []; + for await (const item of asyncGlob('a/**', { cwd: fixtureDir, encoding: 'buffer' })) { + actual.push(item); + } + assert.ok(actual.length > 0); + assert.ok(actual.every((item) => Buffer.isBuffer(item))); + }); + + test('fsPromises.glob with encoding buffer traverses non-UTF-8 directories', { + skip: common.isWindows || common.isMacOS, + }, async () => { + const cwd = tmpdir.resolve('encoding-buffer-nonutf8-dir-async'); + mkdirSync(cwd, { recursive: true }); + + const dir = Buffer.from([0xe9]); + const file = Buffer.from('x'); + const dirPath = Buffer.concat([Buffer.from(cwd), Buffer.from(sep), dir]); + const filePath = Buffer.concat([dirPath, Buffer.from(sep), file]); + + mkdirSync(dirPath, { recursive: true }); + writeFileSync(filePath, ''); + + const actual = []; + for await (const item of asyncGlob('*/*', { cwd, encoding: 'buffer' })) { + actual.push(item); + } + + const expected = Buffer.concat([dir, Buffer.from(sep), file]); + assert.deepStrictEqual(actual, [expected]); + }); + + test('globSync with encoding buffer and withFileTypes returns Dirents with Buffer names', () => { + const actual = globSync('a/**', { cwd: fixtureDir, encoding: 'buffer', withFileTypes: true }); + assert.ok(actual.length > 0); + assertDirents(actual); + assert.ok(actual.every((item) => Buffer.isBuffer(item.name))); + }); + + test('globSync with invalid encoding throws error', () => { + assert.throws( + () => globSync('a/**', { cwd: fixtureDir, encoding: 'invalid' }), + { code: 'ERR_INVALID_ARG_VALUE' } + ); + }); + + test('globSync with encoding utf8 returns string results', () => { + const actual = globSync('a/**', { cwd: fixtureDir, encoding: 'utf8' }); + assert.ok(actual.length > 0); + assert.ok(actual.every((item) => typeof item === 'string')); + }); +});