From 0a9f9b3374dc77f7722f569c22445baacde45f04 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 26 Feb 2026 07:51:43 +0000 Subject: [PATCH 01/11] [wasm-split] Move exclusive items to secondary modules When splitting a module, if non-function items (memories, tables, globals, tags) are exclusively used by a single secondary module, this moves them directly to that secondary module rather than exporting them from the primary module. When a global is moved, its initializer can contain `global.get` or `ref.func`s, creating dependences on other globals and functions. For now, this PR just exports all the dependences from the primary module to the secondary module. This will be improved by follow-up PRs. This PR does not reduce the size of the primary module for acx_gallery test case; it actually increases it by 2.6%. But this PR is mostly a preparation for the follow-up PRs, which will reduce the size. This also sadly increases wasm-split's running time on acx_gallery from 15.7s -> 24.2s, by 54%, due to more computations in `shareImportableItems`. --- The below is `wasm-objdump -h` result of the primary modules: - Before ``` Type start=0x0000000c end=0x00035e09 (size=0x00035dfd) count: 11192 Import start=0x00035e0e end=0x004bd669 (size=0x0048785b) count: 65720 Function start=0x004bd66d end=0x004d0519 (size=0x00012eac) count: 62890 Table start=0x004d051c end=0x004d4059 (size=0x00003b3d) count: 2921 Tag start=0x004d405b end=0x004d405f (size=0x00000004) count: 1 Global start=0x004d4063 end=0x00689ff8 (size=0x001b5f95) count: 80766 Export start=0x00689ffc end=0x0077aafe (size=0x000f0b02) count: 60877 Start start=0x0077ab00 end=0x0077ab02 (size=0x00000002) start: 828 Elem start=0x0077ab06 end=0x007e494b (size=0x00069e45) count: 12303 DataCount start=0x007e494d end=0x007e494e (size=0x00000001) count: 1 Code start=0x007e4953 end=0x00a142ea (size=0x0022f997) count: 62890 Data start=0x00a142ee end=0x00a33e7b (size=0x0001fb8d) count: 1 ``` - After (This PR) ``` Type start=0x0000000c end=0x00035d44 (size=0x00035d38) count: 11185 Import start=0x00035d49 end=0x003faf6f (size=0x003c5226) count: 56805 Function start=0x003faf73 end=0x0040de1f (size=0x00012eac) count: 62890 Table start=0x0040de22 end=0x0041195d (size=0x00003b3b) count: 2921 Tag start=0x0041195f end=0x00411963 (size=0x00000004) count: 1 Global start=0x00411967 end=0x005541c5 (size=0x0014285e) count: 47771 Export start=0x005541ca end=0x007c0a00 (size=0x0026c836) count: 59077 Start start=0x007c0a02 end=0x007c0a04 (size=0x00000002) start: 828 Elem start=0x007c0a08 end=0x0082a84b (size=0x00069e43) count: 12303 DataCount start=0x0082a84d end=0x0082a84e (size=0x00000001) count: 1 Code start=0x0082a853 end=0x00a5a159 (size=0x0022f906) count: 62890 Data start=0x00a5a15d end=0x00a79cea (size=0x0001fb8d) count: 1 ``` Note that even though the size of the global section has decreased by 27% (the number of global by 41%), the size of the export section increased by 157%, while the number of exports has actually decreased. The reason is, while we shed some exports for globals, we gained exports for functions (due to globals `ref.func` dependences), which has a lot longer names. Follow-ups: --- src/ir/module-splitting.cpp | 212 +++++++++++++++----- test/lit/wasm-split/global-funcref.wast | 38 ++++ test/lit/wasm-split/selective-exports.wast | 57 ------ test/lit/wasm-split/split-module-items.wast | 149 ++++++++++++++ test/lit/wasm-split/transitive-globals.wast | 43 ++++ 5 files changed, 396 insertions(+), 103 deletions(-) create mode 100644 test/lit/wasm-split/global-funcref.wast delete mode 100644 test/lit/wasm-split/selective-exports.wast create mode 100644 test/lit/wasm-split/split-module-items.wast create mode 100644 test/lit/wasm-split/transitive-globals.wast diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index adc14e92f10..77ae38fe709 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -47,9 +47,6 @@ // 8. Export globals, tags, tables, and memories from the primary module and // import them in the secondary modules. // -// 9. Run RemoveUnusedModuleElements pass on the secondary modules in order to -// remove unused imports. -// // Functions can be used or referenced three ways in a WebAssembly module: they // can be exported, called, or referenced with ref.func. The above procedure // introduces a layer of indirection to each of those mechanisms that removes @@ -77,10 +74,9 @@ #include "ir/module-splitting.h" #include "asmjs/shared-constants.h" #include "ir/export-utils.h" +#include "ir/find_all.h" #include "ir/module-utils.h" #include "ir/names.h" -#include "ir/utils.h" -#include "pass.h" #include "support/insert_ordered.h" #include "wasm-builder.h" #include "wasm.h" @@ -963,13 +959,11 @@ void ModuleSplitter::shareImportableItems() { } }; - for (auto& secondaryPtr : secondaries) { - Module& secondary = *secondaryPtr; - - // Collect names used in the secondary module + // Given a module, collect names used in the module + auto getUsedNames = [&](Module& module) { UsedNames used; ModuleUtils::ParallelFunctionAnalysis nameCollector( - secondary, [&](Function* func, UsedNames& used) { + module, [&](Function* func, UsedNames& used) { if (!func->imported()) { NameCollector(used).walk(func->body); } @@ -983,65 +977,191 @@ void ModuleSplitter::shareImportableItems() { } NameCollector collector(used); - collector.walkModuleCode(&secondary); - for (auto& segment : secondary.dataSegments) { + collector.walkModuleCode(&module); + for (auto& segment : module.dataSegments) { if (segment->memory.is()) { used.memories.insert(segment->memory); } } - for (auto& segment : secondary.elementSegments) { + for (auto& segment : module.elementSegments) { if (segment->table.is()) { used.tables.insert(segment->table); } } + // If primary module has exports, they are "used" in it + for (auto& ex : module.exports) { + if (ex->kind == ExternalKind::Global) { + used.globals.insert(*ex->getInternalName()); + } else if (ex->kind == ExternalKind::Memory) { + used.memories.insert(*ex->getInternalName()); + } else if (ex->kind == ExternalKind::Table) { + used.tables.insert(*ex->getInternalName()); + } else if (ex->kind == ExternalKind::Tag) { + used.tags.insert(*ex->getInternalName()); + } + } + return used; + }; + + UsedNames primaryUsed = getUsedNames(primary); + std::vector secondaryUsed; + for (auto& secondaryPtr : secondaries) { + secondaryUsed.push_back(getUsedNames(*secondaryPtr)); + } + + // Compute globals referenced in other globals' initializers. Since globals + // can reference other globals, we must ensure that if a global is used in a + // module, all its dependencies are also marked as used. + auto computeDependentItems = [&](UsedNames& used) { + std::vector worklist(used.globals.begin(), used.globals.end()); + for (auto name : worklist) { + // At this point all globals are still in the primary module, so this + // exists + auto* global = primary.getGlobal(name); + if (!global->imported() && global->init) { + for (auto* get : FindAll(global->init).list) { + used.globals.insert(get->name); + } + } + } + }; + + for (auto& used : secondaryUsed) { + computeDependentItems(used); + } - // Export module items that are used in the secondary module - for (auto& memory : primary.memories) { - if (!used.memories.count(memory->name)) { - continue; + // Given a name and module item kind, returns the list of secondary modules + // using that name + auto getUsingSecondaries = [&](const Name& name, auto UsedNames::* field) { + std::vector usingModules; + for (size_t i = 0; i < secondaries.size(); ++i) { + if ((secondaryUsed[i].*field).count(name)) { + usingModules.push_back(secondaries[i].get()); } - auto secondaryMemory = ModuleUtils::copyMemory(memory.get(), secondary); - makeImportExport( - *memory, *secondaryMemory, "memory", ExternalKind::Memory); } + return usingModules; + }; - for (auto& table : primary.tables) { - // 1. In case we copied this table to this secondary module in - // setupTablePatching(), secondary.getTableOrNull(table->name) is not - // null, and we need to export it. - // 2. As in the case with other module elements, if the table is used in - // the secondary module's instructions, we need to export it. - auto secondaryTable = secondary.getTableOrNull(table->name); - if (!secondaryTable && !used.tables.count(table->name)) { - continue; + // Share module items with secondary modules. + // 1. Only share an item with the modules that use it + // 2. If an item is used by only a single secondary module, move the item to + // that secondary module. If an item is used by multiple modules (including + // the primary and secondary modules), export the item from the primary and + // import it from the using secondary modules. + + std::vector memoriesToRemove; + for (auto& memory : primary.memories) { + auto usingSecondaries = + getUsingSecondaries(memory->name, &UsedNames::memories); + bool inPrimary = primaryUsed.memories.count(memory->name); + + if (!inPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + ModuleUtils::copyMemory(memory.get(), *secondary); + memoriesToRemove.push_back(memory->name); + } else { + for (auto* secondary : usingSecondaries) { + auto* secondaryMemory = + ModuleUtils::copyMemory(memory.get(), *secondary); + makeImportExport( + *memory, *secondaryMemory, "memory", ExternalKind::Memory); } - if (!secondaryTable) { - secondaryTable = ModuleUtils::copyTable(table.get(), secondary); + } + } + for (auto& name : memoriesToRemove) { + primary.removeMemory(name); + } + + std::vector tablesToRemove; + for (auto& table : primary.tables) { + auto usingSecondaries = + getUsingSecondaries(table->name, &UsedNames::tables); + bool inPrimary = primaryUsed.tables.count(table->name); + + if (!inPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + // In case we copied this table to this secondary module in + // setupTablePatching(), !inPrimary can't be satisfied, because the + // primary module should have an element segment that refers to this + // table. + assert(!secondary->getTableOrNull(table->name)); + ModuleUtils::copyTable(table.get(), *secondary); + tablesToRemove.push_back(table->name); + } else { + for (auto* secondary : usingSecondaries) { + // 1. In case we copied this table to this secondary module in + // setupTablePatching(), secondary.getTableOrNull(table->name) is not + // null, and we need to export it. + // 2. As in the case with other module elements, if the table is used in + // the secondary module's instructions, we need to export it. + auto secondaryTable = secondary->getTableOrNull(table->name); + if (!secondaryTable) { + secondaryTable = ModuleUtils::copyTable(table.get(), *secondary); + } + makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table); } - makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table); + } + } + for (auto& name : tablesToRemove) { + primary.removeTable(name); + } + + std::vector globalsToRemove; + for (auto& global : primary.globals) { + if (global->mutable_) { + assert(primary.features.hasMutableGlobals() && + "TODO: add wrapper functions for disallowed mutable globals"); } - for (auto& global : primary.globals) { - if (!used.globals.count(global->name)) { - continue; + auto usingSecondaries = + getUsingSecondaries(global->name, &UsedNames::globals); + bool inPrimary = primaryUsed.globals.count(global->name); + if (!inPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + ModuleUtils::copyGlobal(global.get(), *secondary); + globalsToRemove.push_back(global->name); + // Import global initializer's ref.func dependences + if (global->init) { + for (auto* ref : FindAll(global->init).list) { + // Here, ref->func is either a function the primary module, or a + // trampoline created in indirectReferencesToSecondaryFunctions in + // case the original function is in one of the secondaries. + assert(primary.getFunctionOrNull(ref->func)); + exportImportFunction(ref->func, {secondary}); + } } - if (global->mutable_) { - assert(primary.features.hasMutableGlobals() && - "TODO: add wrapper functions for disallowed mutable globals"); + } else { + for (auto* secondary : usingSecondaries) { + auto* secondaryGlobal = + ModuleUtils::copyGlobal(global.get(), *secondary); + makeImportExport( + *global, *secondaryGlobal, "global", ExternalKind::Global); } - auto* secondaryGlobal = ModuleUtils::copyGlobal(global.get(), secondary); - makeImportExport( - *global, *secondaryGlobal, "global", ExternalKind::Global); } + } + for (auto& name : globalsToRemove) { + primary.removeGlobal(name); + } + + std::vector tagsToRemove; + for (auto& tag : primary.tags) { + auto usingSecondaries = getUsingSecondaries(tag->name, &UsedNames::tags); + bool inPrimary = primaryUsed.tags.count(tag->name); - for (auto& tag : primary.tags) { - if (!used.tags.count(tag->name)) { - continue; + if (!inPrimary && usingSecondaries.size() == 1) { + auto* secondary = usingSecondaries[0]; + ModuleUtils::copyTag(tag.get(), *secondary); + tagsToRemove.push_back(tag->name); + } else { + for (auto* secondary : usingSecondaries) { + auto* secondaryTag = ModuleUtils::copyTag(tag.get(), *secondary); + makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag); } - auto* secondaryTag = ModuleUtils::copyTag(tag.get(), secondary); - makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag); } } + for (auto& name : tagsToRemove) { + primary.removeTag(name); + } } } // anonymous namespace diff --git a/test/lit/wasm-split/global-funcref.wast b/test/lit/wasm-split/global-funcref.wast new file mode 100644 index 00000000000..11c4a332df7 --- /dev/null +++ b/test/lit/wasm-split/global-funcref.wast @@ -0,0 +1,38 @@ +;; RUN: wasm-split %s -all -g -o1 %t.1.wasm -o2 %t.2.wasm --keep-funcs=keep +;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY +;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY + +;; When a split global ($a here)'s initializer contains a ref.func of a split +;; function, currently we create its trampoline in the primary module and export +;; it. +;; TODO Use $split in the secondary module directly in the split global + +(module + ;; PRIMARY: (export "trampoline_split" (func $trampoline_split)) + + ;; PRIMARY: (func $keep + ;; PRIMARY-NEXT: ) + (func $keep) + + ;; PRIMARY: (func $trampoline_split + ;; PRIMARY-NEXT: (call_indirect (type $0) + ;; PRIMARY-NEXT: (i32.const 0) + ;; PRIMARY-NEXT: ) + ;; PRIMARY-NEXT: ) + + + ;; SECONDARY: (import "primary" "trampoline_split" (func $trampoline_split (exact))) + ;; SECONDARY: (global $a funcref (ref.func $trampoline_split)) + (global $a funcref (ref.func $split)) + + ;; SECONDARY: (func $split + ;; SECONDARY-NEXT: (drop + ;; SECONDARY-NEXT: (global.get $a) + ;; SECONDARY-NEXT: ) + ;; SECONDARY-NEXT: ) + (func $split + (drop + (global.get $a) + ) + ) +) diff --git a/test/lit/wasm-split/selective-exports.wast b/test/lit/wasm-split/selective-exports.wast deleted file mode 100644 index 4d2e0def020..00000000000 --- a/test/lit/wasm-split/selective-exports.wast +++ /dev/null @@ -1,57 +0,0 @@ -;; RUN: wasm-split %s -g -o1 %t.1.wasm -o2 %t.2.wasm --keep-funcs=foo -all -;; RUN: wasm-dis %t.1.wasm | filecheck %s - -;; Check if only the module elements that are used in the secondary module are -;; exported from the primary module. - -;; CHECK: (export "memory" (memory $used-mem)) -;; CHECK-NOT: (export "{{.*}}" (memory $unused-mem)) -;; CHECK: (export "table" (table $used-table)) -;; CHECK-NOT: (export "{{.*}}" (table $unused-table)) -;; CHECK: (export "global" (global $used-global)) -;; CHECK-NOT: (export "{{.*}}" (global $unused-global)) -;; CHECK: (export "tag" (tag $used-tag)) -;; CHECK-NOT: (export "{{.*}}" (tag $unused-tag)) - -(module - (memory $used-mem 1 1) - (memory $unused-mem 1 1) - (global $used-global i32 (i32.const 10)) - (global $unused-global i32 (i32.const 20)) - (table $used-table 1 1 funcref) - (table $unused-table 1 1 funcref) - (tag $used-tag (param i32)) - (tag $unused-tag (param i32)) - - (elem (table $used-table) (i32.const 0) func $foo) - - (func $foo (param i32) (result i32) - (call $bar (i32.const 0)) - ;; call_indirect requires a table, ensuring at least one table exists - ) - - (func $bar (param i32) (result i32) - (call $foo (i32.const 1)) - ;; Uses $used-mem - (drop - (i32.load - (i32.const 24) - ) - ) - ;; Uses $used-table - (drop - (call_indirect (param i32) (result i32) - (i32.const 0) - (i32.const 0) - ) - ) - ;; Uses $used-global - (drop - (global.get $used-global) - ) - ;; Uses $used-tag - (throw $used-tag - (i32.const 0) - ) - ) -) diff --git a/test/lit/wasm-split/split-module-items.wast b/test/lit/wasm-split/split-module-items.wast new file mode 100644 index 00000000000..340fe27dac8 --- /dev/null +++ b/test/lit/wasm-split/split-module-items.wast @@ -0,0 +1,149 @@ +;; RUN: wasm-split %s -all -g -o1 %t.1.wasm -o2 %t.2.wasm --keep-funcs=keep +;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY +;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY + +;; Check that +;; 1. Items only used in the primary module stay in the primary module +;; 2. Items only used in the secondary module are moved to the secondary module +;; 3. Items used in both modules are exported from the primary and imported from +;; the secondary module + +(module + (memory $keep-memory 1 1) + (global $keep-global i32 (i32.const 20)) + (table $keep-table 1 1 funcref) + (tag $keep-tag (param i32)) + + (memory $split-memory 1 1) + (global $split-global i32 (i32.const 20)) + (table $split-table 1 1 funcref) + (tag $split-tag (param i32)) + + (memory $shared-memory 1 1) + (global $shared-global i32 (i32.const 20)) + (table $shared-table 1 1 funcref) + (tag $shared-tag (param i32)) + + ;; PRIMARY: (global $keep-global i32 (i32.const 20)) + ;; PRIMARY-NEXT: (global $shared-global i32 (i32.const 20)) + ;; PRIMARY-NEXT: (memory $keep-memory 1 1) + ;; PRIMARY-NEXT: (memory $shared-memory 1 1) + ;; PRIMARY-NEXT: (table $keep-table 1 1 funcref) + ;; PRIMARY-NEXT: (table $shared-table 1 1 funcref) + ;; PRIMARY-NEXT: (table $2 1 funcref) + ;; PRIMARY: (tag $keep-tag (type $1) (param i32)) + ;; PRIMARY-NEXT: (tag $shared-tag (type $1) (param i32)) + + ;; PRIMARY: (export "keep" (func $keep)) + ;; PRIMARY-NEXT: (export "memory" (memory $shared-memory)) + ;; PRIMARY-NEXT: (export "table" (table $shared-table)) + ;; PRIMARY-NEXT: (export "table_3" (table $2)) + ;; PRIMARY-NEXT: (export "global" (global $shared-global)) + ;; PRIMARY-NEXT: (export "tag" (tag $shared-tag)) + + ;; SECONDARY: (import "primary" "memory" (memory $shared-memory 1 1)) + ;; SECONDARY-NEXT: (import "primary" "table_3" (table $timport$0 1 funcref)) + ;; SECONDARY-NEXT: (import "primary" "table" (table $shared-table 1 1 funcref)) + ;; SECONDARY-NEXT: (import "primary" "global" (global $shared-global i32)) + ;; SECONDARY-NEXT: (import "primary" "keep" (func $keep (exact (param i32) (result i32)))) + ;; SECONDARY-NEXT: (import "primary" "tag" (tag $shared-tag (type $1) (param i32))) + + ;; SECONDARY: (global $split-global i32 (i32.const 20)) + ;; SECONDARY-NEXT: (memory $split-memory 1 1) + ;; SECONDARY-NEXT: (table $split-table 1 1 funcref) + ;; SECONDARY: (tag $split-tag (type $1) (param i32)) + + (func $keep (param i32) (result i32) + (call $split (i32.const 0)) + ;; Uses $keep-memory + (drop + (i32.load $keep-memory + (i32.const 24) + ) + ) + ;; Uses $keep-table + (drop + (call_indirect $keep-table (param i32) (result i32) + (i32.const 0) + (i32.const 0) + ) + ) + ;; Uses $keep-global + (drop + (global.get $keep-global) + ) + ;; Uses $keep-tag + (try_table (catch $keep-tag 0) + (throw $keep-tag (i32.const 0)) + ) + ;; Uses $shared-memory + (drop + (i32.load $shared-memory + (i32.const 24) + ) + ) + ;; Uses $shared-table + (drop + (call_indirect $shared-table (param i32) (result i32) + (i32.const 0) + (i32.const 0) + ) + ) + ;; Uses $shared-global + (drop + (global.get $shared-global) + ) + ;; Uses $shared-tag + (try_table (catch $shared-tag 0) + (throw $shared-tag (i32.const 0)) + ) + (i32.const 0) + ) + + (func $split (param i32) (result i32) + (call $keep (i32.const 1)) + ;; Uses $split-memory + (drop + (i32.load $split-memory + (i32.const 24) + ) + ) + ;; Uses $split-table + (drop + (call_indirect $split-table (param i32) (result i32) + (i32.const 0) + (i32.const 0) + ) + ) + ;; Uses $split-global + (drop + (global.get $split-global) + ) + ;; Uses $split-tag + (try_table (catch $split-tag 0) + (throw $split-tag (i32.const 0)) + ) + ;; Uses $shared-memory + (drop + (i32.load $shared-memory + (i32.const 24) + ) + ) + ;; Uses $shared-table + (drop + (call_indirect $shared-table (param i32) (result i32) + (i32.const 0) + (i32.const 0) + ) + ) + ;; Uses $shared-global + (drop + (global.get $shared-global) + ) + ;; Uses $shared-tag + (try_table (catch $shared-tag 0) + (throw $shared-tag (i32.const 0)) + ) + (i32.const 0) + ) +) diff --git a/test/lit/wasm-split/transitive-globals.wast b/test/lit/wasm-split/transitive-globals.wast new file mode 100644 index 00000000000..90740adc3a3 --- /dev/null +++ b/test/lit/wasm-split/transitive-globals.wast @@ -0,0 +1,43 @@ +;; RUN: wasm-split %s -all -g -o1 %t.1.wasm -o2 %t.2.wasm --keep-funcs=keep +;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY +;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY + +;; Check that transitive dependencies in global initializers are correctly +;; analyzed and exported from the primary module to the secondary module. +;; TODO Move $b and $c to the secondary module + +(module + ;; PRIMARY: (global $c i32 (i32.const 42)) + (global $c i32 (i32.const 42)) + + ;; $b depends on $c. + ;; PRIMARY: (global $b i32 (global.get $c)) + (global $b i32 (global.get $c)) + + ;; Globals $b is exported to the secondary module + ;; PRIMARY: (export "global" (global $b)) + + ;; Globals $b is imported from the primary module + ;; SECONDARY: (import "primary" "global" (global $b i32)) + + ;; $a depends on $b. Since $a is exclusively used by the secondary module, + ;; it will be moved there. Its dependency $b should be exported from the + ;; primary module and imported into the secondary module. + ;; SECONDARY: (global $a i32 (global.get $b)) + (global $a i32 (global.get $b)) + + ;; PRIMARY: (func $keep (result i32) + ;; PRIMARY-NEXT: (i32.const 0) + ;; PRIMARY-NEXT: ) + (func $keep (result i32) + (i32.const 0) + ) + + ;; Exclusively uses $a, causing $a to move to the secondary module + ;; SECONDARY: (func $split (result i32) + ;; SECONDARY-NEXT: (global.get $a) + ;; SECONDARY-NEXT: ) + (func $split (result i32) + (global.get $a) + ) +) From 3d7d1f4bb6724980d6bb542ee681da22a3d89613 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 9 Mar 2026 05:12:43 +0000 Subject: [PATCH 02/11] [wasm-split] Split globals' transitive global dependencies When a global is exclusively used by a secondary module, we can move it to the secondary module. If its initializer contains a `global.get` of another global, we exported it from the primary module to the secondary module, even if it may not be used anywhere else. When we split a global out to a secondary module, this PR computes the transitive dependency of the split global, and if those globals in the dependency are not used anywhere else in other modules, we move them to the secondary module as well. #8441 and this PR combined reduce the size of the primary module by 29%. The running time of `wasm-split` hasn't really changed with this PR, compared to #8441. --- `wasm-objdump -h` result: - Before (#8441) ``` Type start=0x0000000c end=0x00035d44 (size=0x00035d38) count: 11185 Import start=0x00035d49 end=0x003faf6f (size=0x003c5226) count: 56805 Function start=0x003faf73 end=0x0040de1f (size=0x00012eac) count: 62890 Table start=0x0040de22 end=0x0041195d (size=0x00003b3b) count: 2921 Tag start=0x0041195f end=0x00411963 (size=0x00000004) count: 1 Global start=0x00411967 end=0x005541c5 (size=0x0014285e) count: 47771 Export start=0x005541ca end=0x007c0a00 (size=0x0026c836) count: 59077 Start start=0x007c0a02 end=0x007c0a04 (size=0x00000002) start: 828 Elem start=0x007c0a08 end=0x0082a84b (size=0x00069e43) count: 12303 DataCount start=0x0082a84d end=0x0082a84e (size=0x00000001) count: 1 Code start=0x0082a853 end=0x00a5a159 (size=0x0022f906) count: 62890 Data start=0x00a5a15d end=0x00a79cea (size=0x0001fb8d) count: 1 ``` - After (This PR) ``` Type start=0x0000000c end=0x00035d44 (size=0x00035d38) count: 11185 Import start=0x00035d48 end=0x00132efc (size=0x000fd1b4) count: 32642 Function start=0x00132f00 end=0x00145dac (size=0x00012eac) count: 62890 Table start=0x00145daf end=0x001498ea (size=0x00003b3b) count: 2921 Tag start=0x001498ec end=0x001498f0 (size=0x00000004) count: 1 Global start=0x001498f4 end=0x00289e60 (size=0x0014056c) count: 47728 Export start=0x00289e65 end=0x004977fe (size=0x0020d999) count: 35861 Start start=0x00497800 end=0x00497802 (size=0x00000002) start: 828 Elem start=0x00497806 end=0x00501649 (size=0x00069e43) count: 12303 DataCount start=0x0050164b end=0x0050164c (size=0x00000001) count: 1 Code start=0x00501651 end=0x00730f22 (size=0x0022f8d1) count: 62890 Data start=0x00730f26 end=0x00750ab3 (size=0x0001fb8d) count: 1 ``` Note that while the decrease in the global section is small, we have a significant size decrease in the import and the export sections, because we used to import globals and export them just to relay those globals to the secondary modules. --- src/ir/module-splitting.cpp | 57 +++++++++++++++++---- test/lit/wasm-split/transitive-globals.wast | 20 +++----- 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 77ae38fe709..3544610b654 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -583,6 +583,25 @@ Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder, return builder.makeSequence(loadSecondary, callIndirect); } +// Helper to walk expressions in segments but NOT in globals. +template +static void walkSegments(Walker& walker, Module* module) { + walker.setModule(module); + for (auto& curr : module->elementSegments) { + if (curr->offset) { + walker.walk(curr->offset); + } + for (auto* item : curr->data) { + walker.walk(item); + } + } + for (auto& curr : module->dataSegments) { + if (curr->offset) { + walker.walk(curr->offset); + } + } +} + void ModuleSplitter::indirectReferencesToSecondaryFunctions() { // Turn references to secondary functions into references to thunks that // perform a direct call to the original referent. The direct calls in the @@ -977,7 +996,19 @@ void ModuleSplitter::shareImportableItems() { } NameCollector collector(used); - collector.walkModuleCode(&module); + // We shouldn't use collector.walkModuleCode here, because we don't want to + // walk on global initializers. At this point, all globals are still in the + // primary module, so if we walk on global initializers here, globals appear + // in their initialalizers will be all marked as used in the primary module, + // which is not true. + // + // For example, we have (global $a i32 (global.get $b)). Because $a is at + // this point still in the primary module, $b will be marked as "used" in + // the primary module. But $a can be moved to a secondary module later if it + // is used exclusively by that module. Then $b can be also moved, in case it + // doesn't have other uses. But if it is marked as "used" in the primary + // module, it can't. + walkSegments(collector, &module); for (auto& segment : module.dataSegments) { if (segment->memory.is()) { used.memories.insert(segment->memory); @@ -1009,25 +1040,33 @@ void ModuleSplitter::shareImportableItems() { secondaryUsed.push_back(getUsedNames(*secondaryPtr)); } - // Compute globals referenced in other globals' initializers. Since globals - // can reference other globals, we must ensure that if a global is used in a - // module, all its dependencies are also marked as used. - auto computeDependentItems = [&](UsedNames& used) { + // Compute transitive closure of globals referenced in other globals' + // initializers. Since globals can reference other globals, we must ensure + // that if a global is used in a module, all its dependencies are also marked + // as used. + auto computeTransitiveGlobals = [&](UsedNames& used) { std::vector worklist(used.globals.begin(), used.globals.end()); - for (auto name : worklist) { + std::unordered_set visited(used.globals.begin(), used.globals.end()); + while (!worklist.empty()) { + Name currName = worklist.back(); + worklist.pop_back(); // At this point all globals are still in the primary module, so this // exists - auto* global = primary.getGlobal(name); + auto* global = primary.getGlobal(currName); if (!global->imported() && global->init) { for (auto* get : FindAll(global->init).list) { - used.globals.insert(get->name); + if (visited.insert(get->name).second) { + worklist.push_back(get->name); + used.globals.insert(get->name); + } } } } }; + computeTransitiveGlobals(primaryUsed); for (auto& used : secondaryUsed) { - computeDependentItems(used); + computeTransitiveGlobals(used); } // Given a name and module item kind, returns the list of secondary modules diff --git a/test/lit/wasm-split/transitive-globals.wast b/test/lit/wasm-split/transitive-globals.wast index 90740adc3a3..603e0cc4c8a 100644 --- a/test/lit/wasm-split/transitive-globals.wast +++ b/test/lit/wasm-split/transitive-globals.wast @@ -3,26 +3,20 @@ ;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY ;; Check that transitive dependencies in global initializers are correctly -;; analyzed and exported from the primary module to the secondary module. -;; TODO Move $b and $c to the secondary module +;; analyzed and moved to the secondary module. (module - ;; PRIMARY: (global $c i32 (i32.const 42)) + ;; SECONDARY: (global $c i32 (i32.const 42)) (global $c i32 (i32.const 42)) ;; $b depends on $c. - ;; PRIMARY: (global $b i32 (global.get $c)) + ;; SECONDARY: (global $b i32 (global.get $c)) (global $b i32 (global.get $c)) - ;; Globals $b is exported to the secondary module - ;; PRIMARY: (export "global" (global $b)) - - ;; Globals $b is imported from the primary module - ;; SECONDARY: (import "primary" "global" (global $b i32)) - - ;; $a depends on $b. Since $a is exclusively used by the secondary module, - ;; it will be moved there. Its dependency $b should be exported from the - ;; primary module and imported into the secondary module. + ;; $a depends on $b. since $a is exclusively used by the secondary module, + ;; it will be moved there. The transitive dependency must ensure that $b (and + ;; $c) are moved to the secondary module too, because they are not used in the + ;; primary module. ;; SECONDARY: (global $a i32 (global.get $b)) (global $a i32 (global.get $b)) From fcf42c0bd00c15d96bd546b3863fe12d7f191ae6 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 18 Mar 2026 23:30:28 +0000 Subject: [PATCH 03/11] Cosmetic changes --- src/ir/module-splitting.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 3302089c0a8..63e902852e7 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -1019,6 +1019,7 @@ void ModuleSplitter::shareImportableItems() { used.tables.insert(segment->table); } } + // If primary module has exports, they are "used" in it. Secondary modules // don't have exports, so this only applies to the primary module. for (auto& ex : module.exports) { @@ -1056,11 +1057,11 @@ void ModuleSplitter::shareImportableItems() { std::vector worklist(used.globals.begin(), used.globals.end()); std::unordered_set visited(used.globals.begin(), used.globals.end()); while (!worklist.empty()) { - Name currName = worklist.back(); + Name name = worklist.back(); worklist.pop_back(); // At this point all globals are still in the primary module, so this // exists - auto* global = primary.getGlobal(currName); + auto* global = primary.getGlobal(name); if (!global->imported() && global->init) { for (auto* get : FindAll(global->init).list) { if (visited.insert(get->name).second) { From 163ebc23a5507c857da4a08cd14efbfe8c3e09f3 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 01:32:29 +0000 Subject: [PATCH 04/11] Comment --- src/ir/module-splitting.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 63e902852e7..f0d19c84ee9 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -1165,6 +1165,7 @@ void ModuleSplitter::shareImportableItems() { getUsingSecondaries(global->name, &UsedNames::globals); bool usedInPrimary = primaryUsed.globals.count(global->name); if (!usedInPrimary && usingSecondaries.size() == 1) { + // We are moving this global to this secondary module auto* secondary = usingSecondaries[0]; ModuleUtils::copyGlobal(global.get(), *secondary); globalsToRemove.push_back(global->name); @@ -1178,7 +1179,7 @@ void ModuleSplitter::shareImportableItems() { exportImportFunction(ref->func, {secondary}); } } - } else { + } else { // We are NOT moving this global to the secondary module for (auto* secondary : usingSecondaries) { auto* secondaryGlobal = ModuleUtils::copyGlobal(global.get(), *secondary); From be536458bad5ee7455876b3db8828504feae42ec Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 01:40:24 +0000 Subject: [PATCH 05/11] Typo fix --- src/ir/module-splitting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index f0d19c84ee9..3c9d9063fd8 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -1172,7 +1172,7 @@ void ModuleSplitter::shareImportableItems() { // Import global initializer's ref.func dependences if (global->init) { for (auto* ref : FindAll(global->init).list) { - // Here, ref->func is either a function the primary module, or a + // Here, ref->func is either a function in the primary module, or a // trampoline created in indirectReferencesToSecondaryFunctions in // case the original function is in one of the secondaries. assert(primary.getFunctionOrNull(ref->func)); From 13d39e037f9c63381b5372114e5ed5b12b4883ad Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 02:05:26 +0000 Subject: [PATCH 06/11] Comment --- src/ir/module-splitting.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 3c9d9063fd8..6cd15b8cc40 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -45,7 +45,8 @@ // instantiation. // // 8. Export globals, tags, tables, and memories from the primary module and -// import them in the secondary modules. +// mport them in the secondary modules. If possible, move those module +// items instead to the secondary modules. // // Functions can be used or referenced three ways in a WebAssembly module: they // can be exported, called, or referenced with ref.func. The above procedure From 23789e23572a737fce7eee19c7b1cd0c0786e078 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 02:06:09 +0000 Subject: [PATCH 07/11] typo fix in comment --- src/ir/module-splitting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 6cd15b8cc40..2d5d7a67f44 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -45,7 +45,7 @@ // instantiation. // // 8. Export globals, tags, tables, and memories from the primary module and -// mport them in the secondary modules. If possible, move those module +// import them in the secondary modules. If possible, move those module // items instead to the secondary modules. // // Functions can be used or referenced three ways in a WebAssembly module: they From 3d29b1f863c0e88ac3cd05452209be68c7d73f1a Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 18 Mar 2026 23:09:44 -0700 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: Thomas Lively --- src/ir/module-splitting.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 2d5d7a67f44..5fdd3f84527 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -998,10 +998,10 @@ void ModuleSplitter::shareImportableItems() { NameCollector collector(used); // We shouldn't use collector.walkModuleCode here, because we don't want to - // walk on global initializers. At this point, all globals are still in the - // primary module, so if we walk on global initializers here, globals appear - // in their initialalizers will be all marked as used in the primary module, - // which is not we want. + // walk global initializers. At this point, all globals are still in the + // primary module, so if we walk global initializers here, other globals appearing + // in their initializers will all be marked as used in the primary module, + // which is not what we want. // // For example, we have (global $a i32 (global.get $b)). Because $a is at // this point still in the primary module, $b will be marked as "used" in @@ -1050,7 +1050,7 @@ void ModuleSplitter::shareImportableItems() { secondaryUsed.push_back(getUsedNames(*secondaryPtr)); } - // Compute transitive closure of globals referenced in other globals' + // Compute the transitive closure of globals referenced in other globals' // initializers. Since globals can reference other globals, we must ensure // that if a global is used in a module, all its dependencies are also marked // as used. From 15a0daff8de263094bfe70864ca68a7b78cca1bc Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 06:23:24 +0000 Subject: [PATCH 09/11] clang-format --- src/ir/module-splitting.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ir/module-splitting.cpp b/src/ir/module-splitting.cpp index 5fdd3f84527..89804678cfc 100644 --- a/src/ir/module-splitting.cpp +++ b/src/ir/module-splitting.cpp @@ -999,9 +999,9 @@ void ModuleSplitter::shareImportableItems() { NameCollector collector(used); // We shouldn't use collector.walkModuleCode here, because we don't want to // walk global initializers. At this point, all globals are still in the - // primary module, so if we walk global initializers here, other globals appearing - // in their initializers will all be marked as used in the primary module, - // which is not what we want. + // primary module, so if we walk global initializers here, other globals + // appearing in their initializers will all be marked as used in the primary + // module, which is not what we want. // // For example, we have (global $a i32 (global.get $b)). Because $a is at // this point still in the primary module, $b will be marked as "used" in From e8158fca0ea4181b4c64a85091a833314f4959af Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 07:47:57 +0000 Subject: [PATCH 10/11] Add new tests and tidy up --- test/lit/wasm-split/transitive-globals.wast | 66 +++++++++++++++------ 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/test/lit/wasm-split/transitive-globals.wast b/test/lit/wasm-split/transitive-globals.wast index 603e0cc4c8a..0cd53a216b5 100644 --- a/test/lit/wasm-split/transitive-globals.wast +++ b/test/lit/wasm-split/transitive-globals.wast @@ -6,32 +6,60 @@ ;; analyzed and moved to the secondary module. (module - ;; SECONDARY: (global $c i32 (i32.const 42)) - (global $c i32 (i32.const 42)) + ;; There are two dependency chains: $a->$b->$c and $d->$e->$f. While all of + ;; $a, $b, and $c can be moved to the secondary module because all f them are + ;; used only there, $e is used in the primary module, preventing $e and $f + ;; from being moved to the secondary module. - ;; $b depends on $c. - ;; SECONDARY: (global $b i32 (global.get $c)) + (global $c i32 (i32.const 42)) (global $b i32 (global.get $c)) - - ;; $a depends on $b. since $a is exclusively used by the secondary module, - ;; it will be moved there. The transitive dependency must ensure that $b (and - ;; $c) are moved to the secondary module too, because they are not used in the - ;; primary module. - ;; SECONDARY: (global $a i32 (global.get $b)) (global $a i32 (global.get $b)) - ;; PRIMARY: (func $keep (result i32) - ;; PRIMARY-NEXT: (i32.const 0) + (global $f i32 (i32.const 42)) + (global $e i32 (global.get $f)) + (global $d i32 (global.get $e)) + + ;; PRIMARY: (global $f i32 (i32.const 42)) + ;; PRIMARY: (global $e i32 (global.get $f)) + + ;; PRIMARY: (export "global" (global $f)) + ;; PRIMARY: (export "global_1" (global $e)) + + ;; SECONDARY: (import "primary" "global" (global $f i32)) + ;; SECONDARY: (import "primary" "global_1" (global $e i32)) + + ;; SECONDARY: (global $c i32 (i32.const 42)) + ;; SECONDARY: (global $b i32 (global.get $c)) + ;; SECONDARY: (global $a i32 (global.get $b)) + + ;; SECONDARY: (global $d i32 (global.get $e)) + + ;; PRIMARY: (func $keep + ;; PRIMARY-NEXT: (drop + ;; PRIMARY-NEXT: (global.get $e) + ;; PRIMARY-NEXT: ) ;; PRIMARY-NEXT: ) - (func $keep (result i32) - (i32.const 0) + (func $keep + (drop + (global.get $e) + ) ) - ;; Exclusively uses $a, causing $a to move to the secondary module - ;; SECONDARY: (func $split (result i32) - ;; SECONDARY-NEXT: (global.get $a) + ;; Exclusively uses $a and $d, causing them to move to the secondary module + ;; SECONDARY: (func $split + ;; SECONDARY-NEXT: (drop + ;; SECONDARY-NEXT: (global.get $a) + ;; SECONDARY-NEXT: ) + ;; SECONDARY-NEXT: (drop + ;; SECONDARY-NEXT: (global.get $d) + ;; SECONDARY-NEXT: ) ;; SECONDARY-NEXT: ) - (func $split (result i32) - (global.get $a) + (func $split + (drop + (global.get $a) + ) + (drop + (global.get $d) + ) ) ) From 7433eb7e132793850c462b952107bff00b8c3316 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 19 Mar 2026 08:17:54 +0000 Subject: [PATCH 11/11] Indent expectations --- test/lit/wasm-split/transitive-globals.wast | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/lit/wasm-split/transitive-globals.wast b/test/lit/wasm-split/transitive-globals.wast index 0cd53a216b5..6bf5ce6636f 100644 --- a/test/lit/wasm-split/transitive-globals.wast +++ b/test/lit/wasm-split/transitive-globals.wast @@ -19,20 +19,20 @@ (global $e i32 (global.get $f)) (global $d i32 (global.get $e)) - ;; PRIMARY: (global $f i32 (i32.const 42)) - ;; PRIMARY: (global $e i32 (global.get $f)) + ;; PRIMARY: (global $f i32 (i32.const 42)) + ;; PRIMARY: (global $e i32 (global.get $f)) - ;; PRIMARY: (export "global" (global $f)) - ;; PRIMARY: (export "global_1" (global $e)) + ;; PRIMARY: (export "global" (global $f)) + ;; PRIMARY: (export "global_1" (global $e)) - ;; SECONDARY: (import "primary" "global" (global $f i32)) - ;; SECONDARY: (import "primary" "global_1" (global $e i32)) + ;; SECONDARY: (import "primary" "global" (global $f i32)) + ;; SECONDARY: (import "primary" "global_1" (global $e i32)) - ;; SECONDARY: (global $c i32 (i32.const 42)) - ;; SECONDARY: (global $b i32 (global.get $c)) - ;; SECONDARY: (global $a i32 (global.get $b)) + ;; SECONDARY: (global $c i32 (i32.const 42)) + ;; SECONDARY: (global $b i32 (global.get $c)) + ;; SECONDARY: (global $a i32 (global.get $b)) - ;; SECONDARY: (global $d i32 (global.get $e)) + ;; SECONDARY: (global $d i32 (global.get $e)) ;; PRIMARY: (func $keep ;; PRIMARY-NEXT: (drop