{
- await page.evaluate(() =>
- (window as any).__EXPERIMENTAL_NEXT_TESTING__?.navigation.lock()
- )
+ // Acquire the lock by setting the cookie from within the page context.
+ // This triggers the CookieStore change event in navigation-testing-lock.ts,
+ // which acquires the in-memory navigation lock.
+ await page.evaluate((name) => {
+ document.cookie = name + '=1; path=/'
+ }, INSTANT_COOKIE)
try {
return await fn()
} finally {
- // Wait for the page to be ready before unlocking. This is only necessary
- // when fn() triggers a full page navigation (e.g. page.reload() or
- // clicking a plain anchor), since the new page needs time to initialize.
- await page.waitForFunction(
- () =>
- typeof (window as any).__EXPERIMENTAL_NEXT_TESTING__ !== 'undefined'
- )
- await page.evaluate(() =>
- (window as any).__EXPERIMENTAL_NEXT_TESTING__?.navigation.unlock()
- )
+ // Release the lock by clearing the cookie. For SPA navigations, this
+ // triggers the CookieStore change event which resolves the in-memory
+ // lock. For MPA navigations (reload, plain anchor), the listener in
+ // app-bootstrap.ts triggers a page reload to fetch dynamic data.
+ await page.evaluate((name) => {
+ document.cookie = name + '=; path=/; max-age=0'
+ }, INSTANT_COOKIE)
}
}
@@ -167,19 +170,23 @@ describe('instant-navigation-testing-api', () => {
it('logs an error when attempting to nest instant scopes', async () => {
const page = await openPage('/')
- const errors: string[] = []
- page.on('console', (msg) => {
- if (msg.type() === 'error') {
- errors.push(msg.text())
- }
+ // Listen for the specific error message
+ const consolePromise = page.waitForEvent('console', {
+ predicate: (msg) =>
+ msg.type() === 'error' && msg.text().includes('already acquired'),
+ timeout: 5000,
})
await instant(page, async () => {
- // Attempt to nest another instant scope - should log an error
- await instant(page, async () => {})
+ // Attempt to acquire the lock again by changing the cookie value.
+ // The CookieStore change event fires, and the handler detects that
+ // the lock is already held, logging an error.
+ await page.evaluate((name) => {
+ document.cookie = name + '=nested; path=/'
+ }, INSTANT_COOKIE)
+ const msg = await consolePromise
+ expect(msg.text()).toContain('already acquired')
})
-
- expect(errors.some((e) => e.includes('already acquired'))).toBe(true)
})
it('renders static shell on page reload', async () => {
diff --git a/test/e2e/app-dir/instant-validation/app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx b/test/e2e/app-dir/instant-validation/app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx
index 2a7203b05fbf2..9fbae8b59ca9b 100644
--- a/test/e2e/app-dir/instant-validation/app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx
+++ b/test/e2e/app-dir/instant-validation/app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx
@@ -28,7 +28,6 @@ async function Runtime() {
}
async function Dynamic() {
- await new Promise((resolve) => setTimeout(resolve, 1000))
await connection()
return Dynamic content from page
}
diff --git a/test/e2e/app-dir/instant-validation/instant-validation.test.ts b/test/e2e/app-dir/instant-validation/instant-validation.test.ts
index e6208c300a8cd..472f7e9c72669 100644
--- a/test/e2e/app-dir/instant-validation/instant-validation.test.ts
+++ b/test/e2e/app-dir/instant-validation/instant-validation.test.ts
@@ -15,6 +15,9 @@ describe('instant validation', () => {
const { next, skipped, isNextDev } = nextTestSetup({
files: __dirname,
skipDeployment: true,
+ env: {
+ NEXT_TEST_LOG_VALIDATION: '1',
+ },
})
if (skipped) return
if (!isNextDev) {
@@ -22,6 +25,102 @@ describe('instant validation', () => {
return
}
+ let currentCliOutputIndex = 0
+ beforeEach(() => {
+ currentCliOutputIndex = next.cliOutput.length
+ })
+
+ function getCliOutputSinceMark(): string {
+ if (next.cliOutput.length < currentCliOutputIndex) {
+ // cliOutput shrank since we started the test, so something (like a `sandbox`) reset the logs
+ currentCliOutputIndex = 0
+ }
+ return next.cliOutput.slice(currentCliOutputIndex)
+ }
+
+ type ValidationEvent =
+ | { type: 'validation_start'; requestId: string; url: string }
+ | { type: 'validation_end'; requestId: string; url: string }
+
+ async function waitForValidationStart(targetUrl: string): Promise {
+ const parsedTargetUrl = new URL(targetUrl)
+ const relativeTargetUrl =
+ parsedTargetUrl.pathname + parsedTargetUrl.search + parsedTargetUrl.hash
+
+ const requestId = await retry(
+ async () => {
+ const events = parseValidationMessages(getCliOutputSinceMark())
+ const start = events.find(
+ (e) =>
+ e.type === 'validation_start' &&
+ normalizeValidationUrl(e.url) === relativeTargetUrl
+ )
+ expect(start).toBeDefined()
+ return start!.requestId
+ },
+ undefined,
+ undefined,
+ `wait for validation of '${relativeTargetUrl}' to start`
+ )
+ return requestId
+ }
+
+ async function waitForValidationEnd(requestId: string): Promise {
+ await retry(
+ async () => {
+ const events = parseValidationMessages(getCliOutputSinceMark())
+ const end = events.find(
+ (e) => e.type === 'validation_end' && e.requestId === requestId
+ )
+ expect(end).toBeDefined()
+ },
+ undefined,
+ undefined,
+ 'wait for validation to end'
+ )
+ }
+
+ async function waitForValidation(url: string) {
+ const requestId = await waitForValidationStart(url)
+ await waitForValidationEnd(requestId)
+ }
+
+ const NO_VALIDATION_ERRORS_WAIT: Parameters[1] = {
+ waitInMs: 500,
+ }
+
+ async function expectNoValidationErrors(
+ browser: Awaited>,
+ url: string
+ ): Promise {
+ await waitForValidation(url)
+ await waitForNoErrorToast(browser, NO_VALIDATION_ERRORS_WAIT)
+ }
+
+ function parseValidationMessages(output: string): ValidationEvent[] {
+ const messageRe = /(.*?)<\/VALIDATION_MESSAGE>/g
+ const events: ValidationEvent[] = []
+ let match: RegExpExecArray | null
+ while ((match = messageRe.exec(output)) !== null) {
+ try {
+ events.push(JSON.parse(match[1]))
+ } catch (err) {
+ throw new Error(`Failed to parse message '${match[1]}'`, {
+ cause: err,
+ })
+ }
+ }
+ return events
+ }
+
+ function normalizeValidationUrl(url: string): string {
+ // RSC requests include ?_rsc=... in the URL. Strip it so the event URL
+ // matches what browser.url() returns (which has no _rsc param).
+ const parsed = new URL(url, 'http://n')
+ parsed.searchParams.delete('_rsc')
+ return parsed.pathname + parsed.search + parsed.hash
+ }
+
describe.each([
{ isClientNav: false, description: 'initial load' },
{ isClientNav: true, description: 'client navigation' },
@@ -56,7 +155,7 @@ describe('instant validation', () => {
expect(await browser.url()).toContain(href)
},
undefined,
- undefined,
+ 100,
'wait for url to change'
)
@@ -72,13 +171,13 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/static/suspense-around-dynamic'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - runtime prefetch - suspense only around dynamic', async () => {
const browser = await navigateTo(
'/suspense-in-root/runtime/suspense-around-dynamic'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - static prefetch - missing suspense around runtime', async () => {
@@ -276,7 +375,7 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/runtime/valid-no-suspense-around-params/123'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - static prefetch - missing suspense around search params', async () => {
@@ -316,7 +415,7 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/runtime/valid-no-suspense-around-search-params?foo=bar'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - target segment not visible in all navigations', async () => {
@@ -330,7 +429,7 @@ describe('instant validation', () => {
// in all navigations (which would require that its parent layouts must never
// block the children slots)
const browser = await navigateTo('/default/static/valid-blocked-children')
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - static prefetch - suspense too high', async () => {
@@ -479,7 +578,7 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/runtime/valid-sync-io-in-static-parent'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - missing suspense around dynamic (with loading.js)', async () => {
@@ -503,11 +602,11 @@ describe('instant validation', () => {
Learn more: https://nextjs.org/docs/messages/blocking-route",
"environmentLabel": "Server",
"label": "Blocking Route",
- "source": "app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx (32:19) @ Dynamic
- > 32 | await connection()
+ "source": "app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx (31:19) @ Dynamic
+ > 31 | await connection()
| ^",
"stack": [
- "Dynamic app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx (32:19)",
+ "Dynamic app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx (31:19)",
"Page app/suspense-in-root/static/invalid-only-loading-around-dynamic/page.tsx (19:9)",
],
}
@@ -519,7 +618,7 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/static/blocking-layout'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - missing suspense inside blocking layout', async () => {
const browser = await navigateTo(
@@ -558,13 +657,13 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/default/static/valid-blocking-inside-static'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - blocking page inside a runtime layout is allowed if the layout has suspense', async () => {
const browser = await navigateTo(
'/suspense-in-root/runtime/valid-blocking-inside-runtime'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('invalid - blocking page inside a static layout is not allowed if the layout has no suspense', async () => {
@@ -757,26 +856,26 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/static/valid-client-data-does-not-block-validation'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - parent uses sync IO in a client component', async () => {
const browser = await navigateTo(
'/suspense-in-root/static/valid-client-api-in-parent/sync-io'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - parent uses dynamic usePathname() in a client component', async () => {
const browser = await navigateTo(
'/suspense-in-root/static/valid-client-api-in-parent/dynamic-params/123'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
it('valid - parent uses useSearchPatams() in a client component', async () => {
const browser = await navigateTo(
'/suspense-in-root/static/valid-client-api-in-parent/search-params'
)
- await waitForNoErrorToast(browser)
+ await expectNoValidationErrors(browser, await browser.url())
})
})
@@ -910,9 +1009,10 @@ describe('instant validation', () => {
const browser = await navigateTo(
'/suspense-in-root/static/valid-client-error-in-parent-does-not-block-validation'
)
+ await waitForValidation(await browser.url())
if (isClientNav) {
// In a client nav, no errors should be reported.
- await waitForNoErrorToast(browser)
+ await waitForNoErrorToast(browser, NO_VALIDATION_ERRORS_WAIT)
} else {
// In SSR, we expect to only see the error coming from react.
await expect(browser).toDisplayCollapsedRedbox(`
@@ -935,23 +1035,26 @@ describe('instant validation', () => {
})
describe('disabling validation', () => {
+ // We don't log any messages if validation is skipped, so the best we can do is wait.
+ const VALIDATION_SKIPPED_WAIT: Parameters[1] =
+ { waitInMs: 3000 }
it('in a layout', async () => {
const browser = await navigateTo(
'/suspense-in-root/disable-validation/in-layout'
)
- await waitForNoErrorToast(browser)
+ await waitForNoErrorToast(browser, VALIDATION_SKIPPED_WAIT)
})
it('in a page', async () => {
const browser = await navigateTo(
'/suspense-in-root/disable-validation/in-page'
)
- await waitForNoErrorToast(browser)
+ await waitForNoErrorToast(browser, VALIDATION_SKIPPED_WAIT)
})
it('in a page with a parent that has a config', async () => {
const browser = await navigateTo(
'/suspense-in-root/disable-validation/in-page-with-outer'
)
- await waitForNoErrorToast(browser)
+ await waitForNoErrorToast(browser, VALIDATION_SKIPPED_WAIT)
})
})
})
diff --git a/test/lib/next-test-utils.ts b/test/lib/next-test-utils.ts
index d633d984d34bd..00dcf89e5652e 100644
--- a/test/lib/next-test-utils.ts
+++ b/test/lib/next-test-utils.ts
@@ -896,11 +896,14 @@ export async function waitForNoRedbox(
}
}
-export async function waitForNoErrorToast(browser: Playwright): Promise {
+export async function waitForNoErrorToast(
+ browser: Playwright,
+ { waitInMs }: { waitInMs?: number } = {}
+): Promise {
let didOpenRedbox = false
try {
- await browser.waitForElementByCss('[data-issues]').click()
+ await browser.waitForElementByCss('[data-issues]', waitInMs).click()
didOpenRedbox = true
} catch {
// We expect this to fail.
diff --git a/test/production/next-server-nft/next-server-nft.test.ts b/test/production/next-server-nft/next-server-nft.test.ts
index d54128bfc66d8..71106b3add934 100644
--- a/test/production/next-server-nft/next-server-nft.test.ts
+++ b/test/production/next-server-nft/next-server-nft.test.ts
@@ -283,12 +283,7 @@ async function readNormalizedNFT(next, name) {
"/node_modules/next/dist/lib/memory/trace.js",
"/node_modules/next/dist/lib/metadata/constants.js",
"/node_modules/next/dist/lib/metadata/default-metadata.js",
- "/node_modules/next/dist/lib/metadata/generate/alternate.js",
- "/node_modules/next/dist/lib/metadata/generate/basic.js",
"/node_modules/next/dist/lib/metadata/generate/icon-mark.js",
- "/node_modules/next/dist/lib/metadata/generate/icons.js",
- "/node_modules/next/dist/lib/metadata/generate/meta.js",
- "/node_modules/next/dist/lib/metadata/generate/opengraph.js",
"/node_modules/next/dist/lib/metadata/generate/utils.js",
"/node_modules/next/dist/lib/metadata/get-metadata-route.js",
"/node_modules/next/dist/lib/metadata/is-metadata-route.js",
@@ -484,6 +479,7 @@ async function readNormalizedNFT(next, name) {
"/node_modules/next/dist/server/load-manifest.external.js",
"/node_modules/next/dist/server/node-environment-extensions/console-dim.external.js",
"/node_modules/next/dist/server/node-environment-extensions/fast-set-immediate.external.js",
+ "/node_modules/next/dist/server/node-environment-extensions/unhandled-rejection.external.js",
"/node_modules/next/dist/server/response-cache/types.js",
"/node_modules/next/dist/server/route-modules/app-page/module.compiled.js",
"/node_modules/next/dist/server/route-modules/app-page/vendored/contexts/app-router-context.js",
diff --git a/turbopack/crates/turbo-persistence/Cargo.toml b/turbopack/crates/turbo-persistence/Cargo.toml
index 2a40642afd466..bc5e91d43559c 100644
--- a/turbopack/crates/turbo-persistence/Cargo.toml
+++ b/turbopack/crates/turbo-persistence/Cargo.toml
@@ -42,6 +42,10 @@ rayon = { workspace = true }
tempfile = { workspace = true }
turbo-tasks-malloc = { workspace = true }
+[[bin]]
+name = "sst_inspect"
+path = "src/bin/sst_inspect.rs"
+
[lints]
workspace = true
diff --git a/turbopack/crates/turbo-persistence/README.md b/turbopack/crates/turbo-persistence/README.md
index 8af39c9bba727..9c49c82919669 100644
--- a/turbopack/crates/turbo-persistence/README.md
+++ b/turbopack/crates/turbo-persistence/README.md
@@ -105,7 +105,7 @@ The hashes are sorted.
- foreach entry
- 1 byte type
- 3 bytes position in block after header
-- Max block size: 16 MB
+- Max block size: 16 KB
A Key block contains n keys, which specify n key value pairs.
@@ -147,6 +147,9 @@ Depending on the `type` field entry has a different format:
The entries are sorted by key hash and key.
+Future:
+ * Some tables have fixed sized keys with consistent values (4 byte task ids). We could optimize key block representation in this case by skipping offset tables.
+
#### Value Block
- no header, all bytes are data referenced by other blocks
@@ -170,6 +173,7 @@ Reading start from the current sequence number and goes downwards.
- not found -> break
- Key Block: find key by binary search
- found -> lookup value from value block, return
+ - read value as inline, or by using the block index in the key to find the value elsewhere in the file.
- not found -> break
## Writing
diff --git a/turbopack/crates/turbo-persistence/src/bin/sst_inspect.rs b/turbopack/crates/turbo-persistence/src/bin/sst_inspect.rs
new file mode 100644
index 0000000000000..c4d34774a9068
--- /dev/null
+++ b/turbopack/crates/turbo-persistence/src/bin/sst_inspect.rs
@@ -0,0 +1,785 @@
+//! SST file inspector binary for turbo-persistence databases.
+//!
+//! This tool inspects SST files to report entry type statistics per family,
+//! useful for verifying that inline value optimization is being used.
+//!
+//! Entry types:
+//! - 0: Small value (stored in value block)
+//! - 1: Blob reference
+//! - 2: Deleted/tombstone
+//! - 3: Medium value
+//! - 8-255: Inline value where (type - 8) = value byte count
+
+use std::{
+ collections::BTreeMap,
+ fs::{self, File},
+ path::{Path, PathBuf},
+ sync::Arc,
+};
+
+use anyhow::{Context, Result, bail};
+use byteorder::{BE, ReadBytesExt};
+use lzzzz::lz4::{decompress, decompress_with_dict};
+use memmap2::Mmap;
+use turbo_persistence::meta_file::MetaFile;
+// Import shared constants from the crate
+use turbo_persistence::static_sorted_file::{
+ BLOCK_TYPE_INDEX, BLOCK_TYPE_KEY_NO_HASH, BLOCK_TYPE_KEY_WITH_HASH, KEY_BLOCK_ENTRY_TYPE_BLOB,
+ KEY_BLOCK_ENTRY_TYPE_DELETED, KEY_BLOCK_ENTRY_TYPE_INLINE_MIN, KEY_BLOCK_ENTRY_TYPE_MEDIUM,
+ KEY_BLOCK_ENTRY_TYPE_SMALL,
+};
+
+/// Block size information
+#[derive(Default, Debug, Clone)]
+struct BlockSizeInfo {
+ /// Size as stored on disk (after compression, if any)
+ stored_size: u64,
+ /// Actual size (after decompression)
+ actual_size: u64,
+ /// Number of blocks that were compressed
+ compressed_count: u64,
+ /// Number of blocks stored uncompressed
+ uncompressed_count: u64,
+}
+
+impl BlockSizeInfo {
+ fn add(&mut self, stored: u64, actual: u64, was_compressed: bool) {
+ self.stored_size += stored;
+ self.actual_size += actual;
+ if was_compressed {
+ self.compressed_count += 1;
+ } else {
+ self.uncompressed_count += 1;
+ }
+ }
+
+ fn total_count(&self) -> u64 {
+ self.compressed_count + self.uncompressed_count
+ }
+
+ fn merge(&mut self, other: &BlockSizeInfo) {
+ self.stored_size += other.stored_size;
+ self.actual_size += other.actual_size;
+ self.compressed_count += other.compressed_count;
+ self.uncompressed_count += other.uncompressed_count;
+ }
+}
+
+/// Statistics for a single SST file
+#[derive(Default, Debug, Clone)]
+struct SstStats {
+ /// Count of entries by type
+ entry_type_counts: BTreeMap,
+ /// Total entries
+ total_entries: u64,
+
+ /// Index block sizes
+ index_blocks: BlockSizeInfo,
+ /// Key block sizes
+ key_blocks: BlockSizeInfo,
+ /// Value block sizes (small values)
+ value_blocks: BlockSizeInfo,
+
+ /// Key compression dictionary size
+ key_dict_size: u64,
+ /// Block directory size (block_count * 4 bytes at end of file)
+ block_directory_size: u64,
+
+ /// Value sizes by type (inline values track actual bytes)
+ inline_value_bytes: u64,
+ small_value_refs: u64, // Count of references to value blocks
+ medium_value_refs: u64, // Count of references to medium values
+ blob_refs: u64, // Count of blob references
+ deleted_count: u64, // Count of deleted entries
+
+ /// File size in bytes
+ file_size: u64,
+}
+
+impl SstStats {
+ fn merge(&mut self, other: &SstStats) {
+ for (ty, count) in &other.entry_type_counts {
+ *self.entry_type_counts.entry(*ty).or_insert(0) += count;
+ }
+ self.total_entries += other.total_entries;
+ self.index_blocks.merge(&other.index_blocks);
+ self.key_blocks.merge(&other.key_blocks);
+ self.value_blocks.merge(&other.value_blocks);
+ self.key_dict_size += other.key_dict_size;
+ self.block_directory_size += other.block_directory_size;
+ self.inline_value_bytes += other.inline_value_bytes;
+ self.small_value_refs += other.small_value_refs;
+ self.medium_value_refs += other.medium_value_refs;
+ self.blob_refs += other.blob_refs;
+ self.deleted_count += other.deleted_count;
+ self.file_size += other.file_size;
+ }
+}
+
+/// Information about an SST file from the meta file
+struct SstInfo {
+ sequence_number: u32,
+ key_compression_dictionary_length: u16,
+ block_count: u16,
+}
+
+fn entry_type_description(ty: u8) -> String {
+ match ty {
+ KEY_BLOCK_ENTRY_TYPE_SMALL => "small value (in value block)".to_string(),
+ KEY_BLOCK_ENTRY_TYPE_BLOB => "blob reference".to_string(),
+ KEY_BLOCK_ENTRY_TYPE_DELETED => "deleted/tombstone".to_string(),
+ KEY_BLOCK_ENTRY_TYPE_MEDIUM => "medium value".to_string(),
+ ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
+ let inline_size = ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN;
+ format!("inline {} bytes", inline_size)
+ }
+ _ => format!("unknown type {}", ty),
+ }
+}
+
+fn family_name(family: u32) -> &'static str {
+ match family {
+ 0 => "Infra",
+ 1 => "TaskMeta",
+ 2 => "TaskData",
+ 3 => "TaskCache",
+ _ => "Unknown",
+ }
+}
+
+/// Format a number with comma separators for readability
+fn format_number(n: u64) -> String {
+ let s = n.to_string();
+ let mut result = String::with_capacity(s.len() + s.len() / 3);
+ for (i, c) in s.chars().enumerate() {
+ if i > 0 && (s.len() - i).is_multiple_of(3) {
+ result.push(',');
+ }
+ result.push(c);
+ }
+ result
+}
+
+fn format_bytes(bytes: u64) -> String {
+ if bytes >= 1024 * 1024 * 1024 {
+ format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
+ } else if bytes >= 1024 * 1024 {
+ format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
+ } else if bytes >= 1024 {
+ format!("{:.2} KB", bytes as f64 / 1024.0)
+ } else {
+ format!("{} B", bytes)
+ }
+}
+
+/// Collect SST info from all meta files in the database directory
+fn collect_sst_info(db_path: &Path) -> Result>> {
+ let mut meta_files: Vec = fs::read_dir(db_path)?
+ .filter_map(|entry| entry.ok())
+ .map(|entry| entry.path())
+ .filter(|path| path.extension().is_some_and(|ext| ext == "meta"))
+ .collect();
+
+ meta_files.sort();
+
+ if meta_files.is_empty() {
+ bail!("No .meta files found in {}", db_path.display());
+ }
+
+ let mut family_sst_info: BTreeMap> = BTreeMap::new();
+
+ for meta_path in &meta_files {
+ // Extract sequence number from filename
+ let filename = meta_path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
+ let seq_num: u32 = filename.parse().unwrap_or(0);
+
+ let meta_file = MetaFile::open(db_path, seq_num)
+ .with_context(|| format!("Failed to open {}", meta_path.display()))?;
+
+ let family = meta_file.family();
+
+ for entry in meta_file.entries() {
+ family_sst_info.entry(family).or_default().push(SstInfo {
+ sequence_number: entry.sequence_number(),
+ key_compression_dictionary_length: entry.key_compression_dictionary_length(),
+ block_count: entry.block_count(),
+ });
+ }
+ }
+
+ Ok(family_sst_info)
+}
+
+/// Decompress a block, respecting the optional compression protocol.
+/// When uncompressed_length is 0, the block is stored uncompressed.
+fn decompress_block(
+ compressed: &[u8],
+ uncompressed_length: u32,
+ dictionary: Option<&[u8]>,
+) -> Result> {
+ // Sentinel: uncompressed_length = 0 means block is stored uncompressed
+ if uncompressed_length == 0 {
+ return Ok(Arc::from(compressed));
+ }
+
+ let mut buffer = vec![0u8; uncompressed_length as usize];
+ let bytes_written = if let Some(dict) = dictionary {
+ decompress_with_dict(compressed, &mut buffer, dict)?
+ } else {
+ decompress(compressed, &mut buffer)?
+ };
+ assert_eq!(
+ bytes_written, uncompressed_length as usize,
+ "Decompressed length does not match expected"
+ );
+ Ok(Arc::from(buffer))
+}
+
+/// Analyze an SST file and return entry type statistics
+fn analyze_sst_file(db_path: &Path, info: &SstInfo) -> Result {
+ let filename = format!("{:08}.sst", info.sequence_number);
+ let path = db_path.join(&filename);
+
+ let file = File::open(&path).with_context(|| format!("Failed to open {}", filename))?;
+ let file_size = file.metadata()?.len();
+ let mmap = unsafe { Mmap::map(&file)? };
+
+ let mut stats = SstStats {
+ key_dict_size: info.key_compression_dictionary_length as u64,
+ block_directory_size: info.block_count as u64 * 4,
+ file_size,
+ ..Default::default()
+ };
+
+ // Calculate offsets
+ let block_offsets_start = mmap.len() - (info.block_count as usize * 4);
+ let blocks_start = info.key_compression_dictionary_length as usize;
+
+ // Get key compression dictionary if present
+ let key_dict = if info.key_compression_dictionary_length > 0 {
+ Some(&mmap[0..info.key_compression_dictionary_length as usize])
+ } else {
+ None
+ };
+
+ // Iterate through all blocks
+ for block_index in 0..info.block_count {
+ let offset = block_offsets_start + block_index as usize * 4;
+
+ let block_start = if block_index == 0 {
+ blocks_start
+ } else {
+ blocks_start + (&mmap[offset - 4..offset]).read_u32::()? as usize
+ };
+ let block_end = blocks_start + (&mmap[offset..offset + 4]).read_u32::()? as usize;
+
+ // Read uncompressed length and compressed data
+ let uncompressed_length = (&mmap[block_start..block_start + 4]).read_u32::()?;
+ let compressed_data = &mmap[block_start + 4..block_end];
+ let compressed_size = compressed_data.len() as u64;
+
+ // Determine if block was compressed (uncompressed_length > 0 means it was compressed)
+ let was_compressed = uncompressed_length > 0;
+ // Actual size: if uncompressed_length is 0, use stored size (block wasn't compressed)
+ let actual_size = if uncompressed_length == 0 {
+ compressed_size
+ } else {
+ uncompressed_length as u64
+ };
+
+ // Try to decompress with key dictionary first (for key/index blocks)
+ let decompressed = match decompress_block(compressed_data, uncompressed_length, key_dict) {
+ Ok(data) => data,
+ Err(_) => {
+ // If that fails, try without dictionary (value blocks)
+ match decompress_block(compressed_data, uncompressed_length, None) {
+ Ok(_) => {
+ // This is a value block
+ stats
+ .value_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ continue; // Value blocks don't have entry type headers
+ }
+ Err(e) => {
+ eprintln!(
+ "Warning: Failed to decompress block {} in {:08}.sst: {}",
+ block_index, info.sequence_number, e
+ );
+ continue;
+ }
+ }
+ }
+ };
+
+ let block = &decompressed[..];
+ if block.is_empty() {
+ continue;
+ }
+
+ let block_type = block[0];
+
+ // The index block is always the LAST block in the file
+ let is_last_block = block_index == info.block_count - 1;
+
+ match block_type {
+ BLOCK_TYPE_INDEX if is_last_block => {
+ // Validate index block structure: 1 byte type + 2 byte first_block + N*(10 bytes)
+ let content_len = block.len() - 3; // subtract header
+ if content_len % 10 == 0 {
+ stats
+ .index_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ } else {
+ // Invalid structure, treat as value block
+ stats
+ .value_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ }
+ }
+ BLOCK_TYPE_KEY_WITH_HASH | BLOCK_TYPE_KEY_NO_HASH => {
+ // Key block - extract entry types
+ if block.len() < 4 {
+ // Too small to be a valid key block, likely garbage from wrong decompression
+ stats
+ .value_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ continue;
+ }
+
+ // Entry count is stored as 3 bytes after the block type
+ let entry_count =
+ ((block[1] as u32) << 16) | ((block[2] as u32) << 8) | (block[3] as u32);
+
+ // Validate entry count - if it's unreasonably large or the block is too small
+ // to contain the headers, this is likely garbage from wrong decompression
+ let expected_header_size = 4 + entry_count as usize * 4;
+ if entry_count == 0 || entry_count > 100_000 || expected_header_size > block.len() {
+ // Invalid key block structure, treat as value block
+ stats
+ .value_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ continue;
+ }
+
+ stats
+ .key_blocks
+ .add(compressed_size, actual_size, was_compressed);
+
+ // Entry headers start at offset 4
+ // Each entry header is 4 bytes: 1 byte type + 3 bytes position
+ for i in 0..entry_count as usize {
+ let header_offset = 4 + i * 4;
+ if header_offset >= block.len() {
+ break;
+ }
+ let entry_type = block[header_offset];
+
+ *stats.entry_type_counts.entry(entry_type).or_insert(0) += 1;
+ stats.total_entries += 1;
+
+ // Track value statistics
+ match entry_type {
+ KEY_BLOCK_ENTRY_TYPE_SMALL => {
+ stats.small_value_refs += 1;
+ }
+ KEY_BLOCK_ENTRY_TYPE_BLOB => {
+ stats.blob_refs += 1;
+ }
+ KEY_BLOCK_ENTRY_TYPE_DELETED => {
+ stats.deleted_count += 1;
+ }
+ KEY_BLOCK_ENTRY_TYPE_MEDIUM => {
+ stats.medium_value_refs += 1;
+ }
+ ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
+ let inline_size = (ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN) as u64;
+ stats.inline_value_bytes += inline_size;
+ }
+ _ => {}
+ }
+ }
+ }
+ _ => {
+ // Unknown block type - might be a value block that happened to decompress with dict
+ // Try to identify it as a value block
+ stats
+ .value_blocks
+ .add(compressed_size, actual_size, was_compressed);
+ }
+ }
+ }
+
+ Ok(stats)
+}
+
+fn print_block_stats(name: &str, info: &BlockSizeInfo) {
+ let total = info.total_count();
+ if total == 0 {
+ println!(" {}: none", name);
+ return;
+ }
+
+ // Determine compression status
+ let all_uncompressed = info.compressed_count == 0;
+ let all_compressed = info.uncompressed_count == 0;
+
+ if all_uncompressed {
+ // All blocks uncompressed - just show size
+ println!(
+ " {}: {} blocks (uncompressed), {}",
+ name,
+ format_number(total),
+ format_bytes(info.actual_size),
+ );
+ } else if all_compressed {
+ // All blocks compressed - show stored vs actual with savings
+ let savings_pct = if info.actual_size > 0 {
+ ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
+ } else {
+ 0.0
+ };
+ let savings_str = if savings_pct < 0.0 {
+ format!("{:.0}% overhead", -savings_pct)
+ } else {
+ format!("{:.0}% savings", savings_pct)
+ };
+ println!(
+ " {}: {} blocks, stored: {}, actual: {} ({})",
+ name,
+ format_number(total),
+ format_bytes(info.stored_size),
+ format_bytes(info.actual_size),
+ savings_str,
+ );
+ } else {
+ // Mixed - show breakdown
+ let savings_pct = if info.actual_size > 0 {
+ ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
+ } else {
+ 0.0
+ };
+ let savings_str = if savings_pct < 0.0 {
+ format!("{:.0}% overhead", -savings_pct)
+ } else {
+ format!("{:.0}% savings", savings_pct)
+ };
+ println!(
+ " {}: {} blocks ({} compressed, {} uncompressed)",
+ name,
+ format_number(total),
+ format_number(info.compressed_count),
+ format_number(info.uncompressed_count),
+ );
+ println!(
+ " stored: {}, actual: {} ({})",
+ format_bytes(info.stored_size),
+ format_bytes(info.actual_size),
+ savings_str,
+ );
+ }
+}
+
+fn print_entry_histogram(stats: &SstStats, prefix: &str) {
+ if stats.entry_type_counts.is_empty() {
+ return;
+ }
+ println!("{}Entry Type Histogram:", prefix);
+ for (ty, count) in &stats.entry_type_counts {
+ let pct = (*count as f64 / stats.total_entries as f64) * 100.0;
+ // Visual bar
+ let bar_len = (pct / 2.0) as usize;
+ let bar: String = "█".repeat(bar_len.min(40));
+ println!(
+ "{} type {:3}: {:>12} ({:5.1}%) │{}│ {}",
+ prefix,
+ ty,
+ format_number(*count),
+ pct,
+ bar,
+ entry_type_description(*ty),
+ );
+ }
+}
+
+fn print_value_storage(stats: &SstStats, prefix: &str) {
+ println!("{}Value Storage:", prefix);
+ if stats.inline_value_bytes > 0 {
+ let inline_count: u64 = stats
+ .entry_type_counts
+ .iter()
+ .filter(|(ty, _)| **ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN)
+ .map(|(_, count)| count)
+ .sum();
+ println!(
+ "{} Inline: {} entries, {} total",
+ prefix,
+ format_number(inline_count),
+ format_bytes(stats.inline_value_bytes)
+ );
+ }
+ if stats.small_value_refs > 0 {
+ println!(
+ "{} Small (value block refs): {} entries",
+ prefix,
+ format_number(stats.small_value_refs)
+ );
+ }
+ if stats.medium_value_refs > 0 {
+ println!(
+ "{} Medium (dedicated blocks): {} entries",
+ prefix,
+ format_number(stats.medium_value_refs)
+ );
+ }
+ if stats.blob_refs > 0 {
+ println!(
+ "{} Blob (external files): {} entries",
+ prefix,
+ format_number(stats.blob_refs)
+ );
+ }
+ if stats.deleted_count > 0 {
+ println!(
+ "{} Deleted: {} entries",
+ prefix,
+ format_number(stats.deleted_count)
+ );
+ }
+}
+
+fn print_sst_details(seq_num: u32, stats: &SstStats) {
+ println!(
+ "\n ┌─ SST {:08}.sst ─────────────────────────────────────────────────────",
+ seq_num
+ );
+ println!(
+ " │ Entries: {}, File size: {}",
+ format_number(stats.total_entries),
+ format_bytes(stats.file_size)
+ );
+
+ // Per-file overhead
+ let overhead = stats.key_dict_size + stats.block_directory_size;
+ let overhead_pct = if stats.file_size > 0 {
+ (overhead as f64 / stats.file_size as f64) * 100.0
+ } else {
+ 0.0
+ };
+ println!(" │");
+ println!(
+ " │ Per-file Overhead: {} ({:.1}% of file)",
+ format_bytes(overhead),
+ overhead_pct
+ );
+ println!(
+ " │ Key compression dictionary: {}",
+ format_bytes(stats.key_dict_size)
+ );
+ println!(
+ " │ Block directory: {}",
+ format_bytes(stats.block_directory_size)
+ );
+
+ // Block statistics
+ println!(" │");
+ println!(" │ Block Statistics:");
+ print!(" │ ");
+ print_block_stats("Index blocks", &stats.index_blocks);
+ print!(" │ ");
+ print_block_stats("Key blocks", &stats.key_blocks);
+ print!(" │ ");
+ print_block_stats("Value blocks", &stats.value_blocks);
+
+ // Entry type histogram
+ if !stats.entry_type_counts.is_empty() {
+ println!(" │");
+ print_entry_histogram(stats, " │ ");
+ }
+
+ // Value storage summary
+ println!(" │");
+ print_value_storage(stats, " │ ");
+
+ println!(" └───────────────────────────────────────────────────────────────────────────");
+}
+
+fn print_family_summary(family: u32, sst_count: usize, stats: &SstStats) {
+ println!("═══════════════════════════════════════════════════════════════════════════════");
+ println!("Family {} ({}):", family, family_name(family));
+ println!("═══════════════════════════════════════════════════════════════════════════════");
+
+ println!(
+ " SST files: {}, Total entries: {}",
+ format_number(sst_count as u64),
+ format_number(stats.total_entries)
+ );
+ println!(" Total file size: {}", format_bytes(stats.file_size));
+
+ // Averages
+ if sst_count > 0 {
+ let avg_file_size = stats.file_size / sst_count as u64;
+ let avg_keys_per_file = stats.total_entries / sst_count as u64;
+ let total_key_blocks = stats.key_blocks.total_count();
+ let avg_keys_per_block = if total_key_blocks > 0 {
+ stats.total_entries as f64 / total_key_blocks as f64
+ } else {
+ 0.0
+ };
+
+ println!();
+ println!(" Averages:");
+ println!(" File size: {}", format_bytes(avg_file_size));
+ println!(" Keys per file: {}", format_number(avg_keys_per_file));
+ println!(" Keys per key block: {:.1}", avg_keys_per_block);
+ }
+
+ // Per-file overhead
+ let total_overhead = stats.key_dict_size + stats.block_directory_size;
+ let overhead_pct = if stats.file_size > 0 {
+ (total_overhead as f64 / stats.file_size as f64) * 100.0
+ } else {
+ 0.0
+ };
+ println!();
+ println!(
+ " Per-file Overhead (total): {} ({:.1}% of total file size)",
+ format_bytes(total_overhead),
+ overhead_pct
+ );
+ println!(
+ " Key compression dictionaries: {}",
+ format_bytes(stats.key_dict_size)
+ );
+ if sst_count > 0 {
+ println!(
+ " Average per file: {}",
+ format_bytes(stats.key_dict_size / sst_count as u64)
+ );
+ }
+ println!(
+ " Block directories: {}",
+ format_bytes(stats.block_directory_size)
+ );
+ if sst_count > 0 {
+ println!(
+ " Average per file: {}",
+ format_bytes(stats.block_directory_size / sst_count as u64)
+ );
+ }
+
+ println!();
+ println!(" Block Statistics:");
+ print!(" ");
+ print_block_stats("Index blocks", &stats.index_blocks);
+ print!(" ");
+ print_block_stats("Key blocks", &stats.key_blocks);
+ print!(" ");
+ print_block_stats("Value blocks", &stats.value_blocks);
+
+ println!();
+ print_entry_histogram(stats, " ");
+
+ println!();
+ print_value_storage(stats, " ");
+
+ println!();
+}
+
+fn main() -> Result<()> {
+ let args: Vec = std::env::args().collect();
+
+ // Parse arguments
+ let mut db_path: Option = None;
+ let mut verbose = false;
+
+ let mut i = 1;
+ while i < args.len() {
+ match args[i].as_str() {
+ "--verbose" | "-v" => verbose = true,
+ arg if !arg.starts_with('-') => {
+ if db_path.is_none() {
+ db_path = Some(PathBuf::from(arg));
+ }
+ }
+ _ => {
+ eprintln!("Unknown option: {}", args[i]);
+ std::process::exit(1);
+ }
+ }
+ i += 1;
+ }
+
+ let db_path = match db_path {
+ Some(p) => p,
+ None => {
+ eprintln!("Usage: {} [OPTIONS] ", args[0]);
+ eprintln!();
+ eprintln!("Inspects turbo-persistence SST files to report entry type statistics.");
+ eprintln!();
+ eprintln!("Options:");
+ eprintln!(" -v, --verbose Show per-SST file details (default: family totals only)");
+ eprintln!();
+ eprintln!("Entry types:");
+ eprintln!(" 0: Small value (stored in separate value block)");
+ eprintln!(" 1: Blob reference");
+ eprintln!(" 2: Deleted/tombstone");
+ eprintln!(" 3: Medium value");
+ eprintln!(" 8+: Inline value (size = type - 8)");
+ eprintln!();
+ eprintln!("For TaskCache (family 3), values are 4-byte TaskIds.");
+ eprintln!("Expected entry type is 12 (8 + 4) for inline optimization.");
+ std::process::exit(1);
+ }
+ };
+
+ if !db_path.is_dir() {
+ bail!("Not a directory: {}", db_path.display());
+ }
+
+ // Collect SST info grouped by family
+ let family_sst_info = collect_sst_info(&db_path)?;
+
+ let total_sst_count: usize = family_sst_info.values().map(|v| v.len()).sum();
+ println!(
+ "Analyzing {} SST files in {}\n",
+ format_number(total_sst_count as u64),
+ db_path.display()
+ );
+
+ // Analyze and report by family
+ for (family, sst_list) in &family_sst_info {
+ let mut family_stats = SstStats::default();
+ let mut sst_stats_list: Vec<(u32, SstStats)> = Vec::new();
+
+ for info in sst_list {
+ match analyze_sst_file(&db_path, info) {
+ Ok(stats) => {
+ family_stats.merge(&stats);
+ if verbose {
+ sst_stats_list.push((info.sequence_number, stats));
+ }
+ }
+ Err(e) => {
+ eprintln!(
+ "Warning: Failed to analyze {:08}.sst: {}",
+ info.sequence_number, e
+ );
+ }
+ }
+ }
+
+ // Print family summary
+ print_family_summary(*family, sst_list.len(), &family_stats);
+
+ // Print per-SST details in verbose mode
+ if verbose && !sst_stats_list.is_empty() {
+ println!(" Per-SST Details:");
+ for (seq_num, stats) in &sst_stats_list {
+ print_sst_details(*seq_num, stats);
+ }
+ println!();
+ }
+ }
+
+ Ok(())
+}
diff --git a/turbopack/crates/turbo-persistence/src/lib.rs b/turbopack/crates/turbo-persistence/src/lib.rs
index 588c484c94585..fd17c20c56449 100644
--- a/turbopack/crates/turbo-persistence/src/lib.rs
+++ b/turbopack/crates/turbo-persistence/src/lib.rs
@@ -13,11 +13,11 @@ mod db;
mod key;
mod lookup_entry;
mod merge_iter;
-mod meta_file;
+pub mod meta_file;
mod meta_file_builder;
mod parallel_scheduler;
mod sst_filter;
-mod static_sorted_file;
+pub mod static_sorted_file;
mod static_sorted_file_builder;
mod value_block_count_tracker;
mod value_buf;