Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
fe437c1
code drop
pggPL Feb 3, 2026
a54a743
code drop
pggPL Feb 3, 2026
b6e0767
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 3, 2026
76d362c
Merge branch 'main' into inpsect_tensor_dump_support
pggPL Mar 5, 2026
dc60fe8
docs
pggPL Mar 5, 2026
e94467f
nvfp4 internals support
pggPL Mar 5, 2026
e8c8e56
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2026
b002b89
lint fixes
pggPL Mar 5, 2026
2816f37
Update transformer_engine/debug/features/dump_tensors.py
pggPL Mar 5, 2026
83506af
fix
pggPL Mar 5, 2026
a525f82
Update transformer_engine/debug/features/dump_tensors.py
pggPL Mar 5, 2026
df66054
Update transformer_engine/debug/features/dump_tensors.py
pggPL Mar 5, 2026
ab3e90e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2026
089a4d2
Update tests/pytorch/debug/test_log.py
pggPL Mar 5, 2026
a18664f
Update transformer_engine/debug/features/dump_tensors.py
pggPL Mar 5, 2026
41d17fa
fix
pggPL Mar 5, 2026
1736cbe
fix
pggPL Mar 5, 2026
b78d36f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2026
d98c4d0
Remove dump_quantized_internals support from DumpTensors
pggPL Mar 10, 2026
23c70e5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2026
8357ebe
Address Greptile review comments
pggPL Mar 10, 2026
41c671e
Remove portability suggestion from quantized key docstring
pggPL Mar 10, 2026
0cd16e5
Compute rank lazily in _expected_root_dir
pggPL Mar 10, 2026
6f21734
detach tensors before saving; verify dump filename in test
pggPL Mar 10, 2026
7d36811
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2026
c7b7f01
Add empty dump_dict log; assert QuantizedTensor type in test
pggPL Mar 10, 2026
2fcd7eb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2026
677ad51
Update transformer_engine/debug/features/dump_tensors.py
pggPL Mar 10, 2026
dbe1688
Merge branch 'main' into inpsect_tensor_dump_support
pggPL Mar 19, 2026
c54368f
Address review: iter subdirs, remove dead rank field, add allclose te…
pggPL Mar 19, 2026
27bc899
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2026
d4459bd
Merge branch 'main' into inpsect_tensor_dump_support
pggPL Mar 20, 2026
f3d8a56
fix: use detach().clone() to avoid shared storage in DumpTensors
pggPL Mar 20, 2026
4e20b8c
test: use torch.equal instead of torch.allclose for serialisation rou…
pggPL Mar 20, 2026
7b1559f
fix: add tp_size to DumpTensors.inspect_tensor and fix KeyError in ca…
pggPL Mar 20, 2026
275767d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/debug/3_api_features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ Debug features
.. autoapiclass:: transformer_engine.debug.features.per_tensor_scaling.PerTensorScaling
.. autoapiclass:: transformer_engine.debug.features.fake_quant.FakeQuant
.. autoapiclass:: transformer_engine.debug.features.disable_fp8_gemm.DisableFP8GEMM
.. autoapiclass:: transformer_engine.debug.features.disable_fp8_layer.DisableFP8Layer
.. autoapiclass:: transformer_engine.debug.features.disable_fp8_layer.DisableFP8Layer
.. autoapiclass:: transformer_engine.debug.features.dump_tensors.DumpTensors
83 changes: 80 additions & 3 deletions tests/pytorch/debug/test_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
is_nvfp4_available,
)
from transformer_engine.pytorch.quantization import RecipeState
from transformer_engine.pytorch.tensor import QuantizedTensor
from transformer_engine.debug.pytorch.debug_state import TEDebugState
from transformer_engine.debug.features.utils.stats_computation import (
compute_max_blockwise_dynamic_range,
Expand Down Expand Up @@ -445,9 +446,6 @@ def test_nvfp4_numeric(feature_dirs):
log_nvfp4_config = LOG_NVFP4_CONFIG_BASE.format(stats="underflows%, mse")

with debug_session(log_nvfp4_config, feature_dirs) as log_dir:
from transformer_engine.pytorch.tensor.nvfp4_tensor import NVFP4Quantizer
from transformer_engine.pytorch.quantization import RecipeState

recipe_state = RecipeState.create(
recipe.NVFP4BlockScaling(),
mode="forward",
Expand Down Expand Up @@ -644,3 +642,82 @@ def test_compute_max_blockwise_dynamic_range_direct():
)

print("All direct tests for compute_max_blockwise_dynamic_range passed!")


# DumpTensors tests
DUMP_TENSORS_CONFIG = """
dump:
layers:
layer_name_regex_pattern: .*
enabled: True
transformer_engine:
DumpTensors:
enabled: True
tensors: [activation]
high_precision_tensor: True
quantized_tensor: True
freq: 1
"""


def test_dump_tensors_sanity(feature_dirs):
"""Sanity test for DumpTensors feature - verify files are created with correct structure."""
if not fp8_available:
pytest.skip(reason_for_no_fp8)

with debug_session(DUMP_TENSORS_CONFIG, feature_dirs) as log_dir:
recipe_state = RecipeState.create(
recipe.DelayedScaling(),
mode="forward",
num_quantizers=3,
)

tensor = torch.randn(128, 128, dtype=torch.bfloat16).cuda()
quantizer = recipe_state.make_quantizers()[0]
quantized_tensor = quantizer(tensor)

debug_api.transformer_engine.inspect_tensor(
layer_name="test_layer",
tensor_name="activation",
iteration=0,
tp_group=None,
tensor=tensor,
quantizer=quantizer,
rowwise_quantized_tensor=quantized_tensor,
columnwise_quantized_tensor=quantized_tensor,
)
debug_api.step()

# Check that dump file was created
dump_dir = os.path.join(log_dir, "tensor_dumps", "rank_0")
assert os.path.exists(dump_dir), f"Dump directory not created: {dump_dir}"

iter_dir = os.path.join(dump_dir, "iter_000000")
assert os.path.exists(iter_dir), f"Iteration directory not created: {iter_dir}"

dump_files = os.listdir(iter_dir)
assert len(dump_files) == 1, f"Expected 1 dump file, got {len(dump_files)}"
assert (
dump_files[0] == "test_layer_activation.pt"
), f"Unexpected dump filename: {dump_files[0]}"

# Load and verify structure
dump_file = os.path.join(iter_dir, dump_files[0])
# weights_only=False is required because the dump may contain QuantizedTensor objects,
# which are custom Python classes incompatible with the safe weights_only=True path.
data = torch.load(dump_file, weights_only=False)

assert isinstance(data, dict), "Dump should be a dictionary"
assert "high_precision" in data, "Missing high_precision tensor"
assert "quantized" in data, "Missing quantized tensor"
assert isinstance(
data["quantized"], QuantizedTensor
), f"Expected QuantizedTensor, got {type(data['quantized'])}"

# Verify tensor shapes and values match
assert data["high_precision"].shape == tensor.shape, "high_precision shape mismatch"
assert torch.equal(
data["high_precision"], tensor
), "high_precision tensor values do not match original tensor"

print("DumpTensors sanity test passed!")
6 changes: 4 additions & 2 deletions transformer_engine/debug/features/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def call_feature(self, call, feat_config, layer_name, **kwargs):
"tp_size",
]:
if k not in call.__code__.co_varnames:
kwargs_copy.pop(k)
kwargs_copy.pop(k, None)
else:
kwargs_copy = kwargs

Expand All @@ -498,7 +498,9 @@ def call_feature(self, call, feat_config, layer_name, **kwargs):
kwargs_copy = kwargs.copy()
for k in ["tp_size"]:
if k not in call.__code__.co_varnames:
kwargs_copy.pop(k, None)
kwargs_copy.pop(
k, None
) # use None default to avoid KeyError if kwarg wasn't passed

return call(feat_config, layer_name, **kwargs_copy)

Expand Down
Loading
Loading