From 303282e4892c082610ee04de7b83c6d17b35f3f7 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Fri, 20 Mar 2026 18:34:33 +0800 Subject: [PATCH 1/9] feat(pt_expt): add `dp change-bias` support for pt_expt backend Support adjusting output bias for pt_expt models via `dp --pt-expt change-bias`, handling both .pt checkpoints and .pte frozen models. Adds model_change_out_bias helper in training.py and CLI end-to-end tests covering data-based, file-based, user-defined, and frozen model bias changes. --- deepmd/pt_expt/entrypoints/main.py | 182 +++++++++++++++++ deepmd/pt_expt/train/training.py | 24 +++ source/tests/pt_expt/test_change_bias.py | 246 +++++++++++++++++++++++ 3 files changed, 452 insertions(+) create mode 100644 source/tests/pt_expt/test_change_bias.py diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index 2ad6c8863d..0cbd775e1c 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -218,6 +218,177 @@ def freeze( log.info("Saved frozen model to %s", output) +def change_bias( + input_file: str, + mode: str = "change", + bias_value: list | None = None, + datafile: str | None = None, + system: str = ".", + numb_batch: int = 0, + model_branch: str | None = None, + output: str | None = None, +) -> None: + """Change the output bias of a pt_expt model. + + Parameters + ---------- + input_file : str + Path to the model file (.pt checkpoint or .pte frozen model). + mode : str + ``"change"`` or ``"set"``. + bias_value : list or None + User-defined bias values (one per type). + datafile : str or None + File listing data system paths. + system : str + Data system path (used when *datafile* is None). + numb_batch : int + Number of batches for statistics (0 = all). + model_branch : str or None + Branch name for multi-task models. + output : str or None + Output file path. + """ + import torch + + from deepmd.common import ( + expand_sys_str, + ) + from deepmd.dpmodel.common import ( + to_numpy_array, + ) + from deepmd.pt_expt.model.get_model import ( + get_model, + ) + from deepmd.pt_expt.train.training import ( + get_additional_data_requirement, + get_loss, + model_change_out_bias, + ) + from deepmd.pt_expt.train.wrapper import ( + ModelWrapper, + ) + from deepmd.pt_expt.utils.env import ( + DEVICE, + ) + from deepmd.pt_expt.utils.serialization import ( + deserialize_to_file, + serialize_from_file, + ) + from deepmd.pt_expt.utils.stat import ( + make_stat_input, + ) + + if input_file.endswith(".pt"): + old_state_dict = torch.load(input_file, map_location=DEVICE, weights_only=True) + if "model" in old_state_dict: + model_state_dict = old_state_dict["model"] + else: + model_state_dict = old_state_dict + extra_state = model_state_dict.get("_extra_state") + if not isinstance(extra_state, dict) or "model_params" not in extra_state: + raise ValueError( + f"Unsupported checkpoint format at '{input_file}': missing " + "'_extra_state.model_params' in model state dict." + ) + model_params = extra_state["model_params"] + elif input_file.endswith((".pte", ".pt2")): + pte_data = serialize_from_file(input_file) + from deepmd.pt_expt.model.model import ( + BaseModel, + ) + + model_to_change = BaseModel.deserialize(pte_data["model"]) + model_params = None + else: + raise RuntimeError( + "The model provided must be a checkpoint file with a .pt extension " + "or a frozen model with a .pte/.pt2 extension" + ) + + bias_adjust_mode = "change-by-statistic" if mode == "change" else "set-by-statistic" + + if input_file.endswith(".pt"): + multi_task = "model_dict" in model_params + if multi_task: + raise NotImplementedError( + "Multi-task change-bias is not yet supported for the pt_expt backend." + ) + type_map = model_params["type_map"] + model = get_model(model_params) + wrapper = ModelWrapper(model) + wrapper.load_state_dict(model_state_dict) + model_to_change = model + + if input_file.endswith((".pte", ".pt2")): + type_map = model_to_change.get_type_map() + + if bias_value is not None: + assert "energy" in model_to_change.model_output_type(), ( + "User-defined bias is only available for energy model!" + ) + assert len(bias_value) == len(type_map), ( + f"The number of elements in the bias should be the same as " + f"that in the type_map: {type_map}." + ) + old_bias = model_to_change.get_out_bias() + bias_to_set = torch.tensor( + bias_value, dtype=old_bias.dtype, device=old_bias.device + ).view(old_bias.shape) + model_to_change.set_out_bias(bias_to_set) + log.info( + f"Change output bias of {type_map!s} " + f"from {to_numpy_array(old_bias).reshape(-1)!s} " + f"to {to_numpy_array(bias_to_set).reshape(-1)!s}." + ) + else: + if datafile is not None: + with open(datafile) as datalist: + all_sys = datalist.read().splitlines() + else: + all_sys = expand_sys_str(system) + data_systems = process_systems(all_sys) + data = DeepmdDataSystem( + systems=data_systems, + batch_size=1, + test_size=1, + rcut=model_to_change.get_rcut(), + type_map=type_map, + ) + mock_loss = get_loss({"inference": True}, 1.0, len(type_map), model_to_change) + data.add_data_requirements(mock_loss.label_requirement) + data.add_data_requirements(get_additional_data_requirement(model_to_change)) + nbatches = numb_batch if numb_batch != 0 else max(data.get_nbatches()) + sampled_data = make_stat_input(data, nbatches) + model_to_change = model_change_out_bias( + model_to_change, sampled_data, _bias_adjust_mode=bias_adjust_mode + ) + + if input_file.endswith(".pt"): + output_path = ( + output if output is not None else input_file.replace(".pt", "_updated.pt") + ) + wrapper = ModelWrapper(model_to_change) + if "model" in old_state_dict: + old_state_dict["model"] = wrapper.state_dict() + old_state_dict["model"]["_extra_state"] = extra_state + else: + old_state_dict = wrapper.state_dict() + old_state_dict["_extra_state"] = extra_state + torch.save(old_state_dict, output_path) + elif input_file.endswith((".pte", ".pt2")): + output_path = ( + output + if output is not None + else input_file.replace(".pte", "_updated.pte").replace( + ".pt2", "_updated.pt2" + ) + ) + model_dict = model_to_change.serialize() + deserialize_to_file(output_path, {"model": model_dict}) + log.info(f"Saved model to {output_path}") + + def main(args: list[str] | argparse.Namespace | None = None) -> None: """Entry point for the pt_expt backend CLI. @@ -275,6 +446,17 @@ def main(args: list[str] | argparse.Namespace | None = None) -> None: if not FLAGS.output.endswith((".pte", ".pt2")): FLAGS.output = str(Path(FLAGS.output).with_suffix(".pte")) freeze(model=FLAGS.model, output=FLAGS.output, head=FLAGS.head) + elif FLAGS.command == "change-bias": + change_bias( + input_file=FLAGS.INPUT, + mode=FLAGS.mode, + bias_value=FLAGS.bias_value, + datafile=FLAGS.datafile, + system=FLAGS.system, + numb_batch=FLAGS.numb_batch, + model_branch=FLAGS.model_branch, + output=FLAGS.output, + ) else: raise RuntimeError( f"Unsupported command '{FLAGS.command}' for the pt_expt backend." diff --git a/deepmd/pt_expt/train/training.py b/deepmd/pt_expt/train/training.py index f8730ed271..7297ead34d 100644 --- a/deepmd/pt_expt/train/training.py +++ b/deepmd/pt_expt/train/training.py @@ -884,3 +884,27 @@ def print_on_training( line += f" {cur_lr:8.1e}\n" fout.write(line) fout.flush() + + +def model_change_out_bias( + _model: Any, + _sample_func: list[dict], + _bias_adjust_mode: str = "change-by-statistic", +) -> Any: + old_bias = deepcopy(_model.get_out_bias()) + _model.change_out_bias( + _sample_func, + bias_adjust_mode=_bias_adjust_mode, + ) + new_bias = deepcopy(_model.get_out_bias()) + model_type_map = _model.get_type_map() + from deepmd.dpmodel.common import ( + to_numpy_array, + ) + + log.info( + f"Change output bias of {model_type_map!s} " + f"from {to_numpy_array(old_bias).reshape(-1)[: len(model_type_map)]!s} " + f"to {to_numpy_array(new_bias).reshape(-1)[: len(model_type_map)]!s}." + ) + return _model diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py new file mode 100644 index 0000000000..c6d2a332c5 --- /dev/null +++ b/source/tests/pt_expt/test_change_bias.py @@ -0,0 +1,246 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import os +import shutil +import tempfile +import unittest +from copy import ( + deepcopy, +) + +import numpy as np + +from deepmd.main import ( + main, +) +from deepmd.pt_expt.entrypoints.main import ( + get_trainer, +) +from deepmd.pt_expt.model.get_model import ( + get_model, +) +from deepmd.pt_expt.train.wrapper import ( + ModelWrapper, +) +from deepmd.pt_expt.utils.env import ( + DEVICE, +) +from deepmd.utils.argcheck import ( + normalize, +) +from deepmd.utils.compat import ( + update_deepmd_input, +) + +EXAMPLE_DIR = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "examples", + "water", +) + + +def run_dp(cmd: str) -> int: + """Run DP directly from the entry point.""" + cmds = cmd.split() + if cmds[0] == "dp": + cmds = cmds[1:] + else: + raise RuntimeError("The command is not dp") + main(cmds) + return 0 + + +def _make_config(data_dir: str) -> dict: + """Build a minimal config dict for change-bias tests.""" + return { + "model": { + "type_map": ["O", "H"], + "descriptor": { + "type": "se_e2_a", + "sel": [6, 12], + "rcut_smth": 0.50, + "rcut": 3.00, + "neuron": [8, 16], + "resnet_dt": False, + "axis_neuron": 4, + "type_one_side": True, + "seed": 1, + }, + "fitting_net": { + "neuron": [16, 16], + "resnet_dt": True, + "seed": 1, + }, + "data_stat_nbatch": 1, + }, + "learning_rate": { + "type": "exp", + "decay_steps": 500, + "start_lr": 0.001, + "stop_lr": 3.51e-8, + }, + "loss": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + }, + "training": { + "training_data": { + "systems": [os.path.join(data_dir, "data_0")], + "batch_size": 1, + }, + "validation_data": { + "systems": [os.path.join(data_dir, "data_0")], + "batch_size": 1, + "numb_btch": 1, + }, + "numb_steps": 1, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 1, + "save_freq": 1, + }, + } + + +class TestChangeBias(unittest.TestCase): + """Test dp change-bias for the pt_expt backend.""" + + @classmethod + def setUpClass(cls) -> None: + data_dir = os.path.join(EXAMPLE_DIR, "data") + if not os.path.isdir(data_dir): + raise unittest.SkipTest(f"Example data not found: {data_dir}") + cls.data_dir = data_dir + cls.data_file = [os.path.join(data_dir, "data_0")] + + cls.tmpdir = tempfile.mkdtemp() + cls.old_cwd = os.getcwd() + os.chdir(cls.tmpdir) + + # Build & train 1-step model + config = _make_config(data_dir) + config = update_deepmd_input(config, warning=False) + config = normalize(config) + config["training"]["save_ckpt"] = "model.ckpt" + cls.config = config + trainer = get_trainer(deepcopy(config)) + trainer.run() + + cls.model_path = os.path.join(cls.tmpdir, "model.ckpt.pt") + + # Record original bias + cls.original_bias = deepcopy(trainer.wrapper.model.get_out_bias()) + + @classmethod + def tearDownClass(cls) -> None: + os.chdir(cls.old_cwd) + shutil.rmtree(cls.tmpdir) + + def _load_model_from_ckpt(self, ckpt_path: str): + """Load a pt_expt model from a .pt checkpoint.""" + import torch + + state_dict = torch.load(ckpt_path, map_location=DEVICE, weights_only=True) + model_state = state_dict["model"] + model_params = model_state["_extra_state"]["model_params"] + model = get_model(model_params) + wrapper = ModelWrapper(model) + wrapper.load_state_dict(model_state) + return model + + def test_change_bias_with_data(self) -> None: + output_path = os.path.join(self.tmpdir, "model_data_bias.pt") + run_dp( + f"dp --pt-expt change-bias {self.model_path} " + f"-s {self.data_file[0]} -o {output_path}" + ) + updated_model = self._load_model_from_ckpt(output_path) + updated_bias = np.array(updated_model.get_out_bias()) + original_bias = np.array(self.original_bias) + # Bias should have changed from the original + self.assertFalse( + np.allclose(original_bias, updated_bias), + "Bias should have changed after change-bias with data", + ) + + def test_change_bias_with_data_sys_file(self) -> None: + tmp_file = tempfile.NamedTemporaryFile( + delete=False, suffix=".txt", dir=self.tmpdir + ) + with open(tmp_file.name, "w") as f: + f.writelines([sys + "\n" for sys in self.data_file]) + + output_path = os.path.join(self.tmpdir, "model_file_bias.pt") + run_dp( + f"dp --pt-expt change-bias {self.model_path} " + f"-f {tmp_file.name} -o {output_path}" + ) + updated_model = self._load_model_from_ckpt(output_path) + updated_bias = np.array(updated_model.get_out_bias()) + original_bias = np.array(self.original_bias) + # Bias should have changed from the original + self.assertFalse( + np.allclose(original_bias, updated_bias), + "Bias should have changed after change-bias with data file", + ) + + def test_change_bias_with_user_defined(self) -> None: + user_bias = [0.1, 3.2] + output_path = os.path.join(self.tmpdir, "model_user_bias.pt") + run_dp( + f"dp --pt-expt change-bias {self.model_path} " + f"-b {' '.join(str(v) for v in user_bias)} -o {output_path}" + ) + updated_model = self._load_model_from_ckpt(output_path) + updated_bias = np.array(updated_model.get_out_bias()) + expected_bias = np.array(user_bias).reshape(updated_bias.shape) + np.testing.assert_allclose(updated_bias, expected_bias) + + def test_change_bias_frozen_pte(self) -> None: + from deepmd.pt_expt.entrypoints.main import ( + freeze, + ) + from deepmd.pt_expt.model.model import ( + BaseModel, + ) + from deepmd.pt_expt.utils.serialization import ( + serialize_from_file, + ) + + # Freeze the checkpoint + pte_path = os.path.join(self.tmpdir, "frozen.pte") + freeze(model=self.model_path, output=pte_path) + + # Get original bias + original_data = serialize_from_file(pte_path) + original_model = BaseModel.deserialize(original_data["model"]) + original_bias = deepcopy(original_model.get_out_bias()) + + # Run change-bias on the frozen model + output_pte = os.path.join(self.tmpdir, "frozen_updated.pte") + run_dp( + f"dp --pt-expt change-bias {pte_path} " + f"-s {self.data_file[0]} -o {output_pte}" + ) + + # Load updated model and verify bias changed + updated_data = serialize_from_file(output_pte) + updated_model = BaseModel.deserialize(updated_data["model"]) + updated_bias = updated_model.get_out_bias() + + # Bias should have changed + self.assertFalse( + np.allclose(original_bias, updated_bias), + "Bias should have changed after change-bias on frozen model", + ) + + +if __name__ == "__main__": + unittest.main() From 07d76240ba7664462617a08f309403211a4629c8 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sat, 21 Mar 2026 00:24:58 +0800 Subject: [PATCH 2/9] fix(tests): add session-scoped DeviceContext cleanup for setUpClass The per-test autouse fixture _clear_leaked_device_context runs before each test method but not before setUpClass. Tests that call trainer.run() in setUpClass (e.g. TestChangeBias) hit a spurious "Torch not compiled with CUDA enabled" error because a leaked DeviceContext(cuda:127) from test collection reroutes torch.tensor() calls without device= to a fake CUDA device. Add a session-scoped fixture that clears leaked DeviceContext modes once at session start, before any setUpClass runs. --- source/tests/pt_expt/conftest.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/source/tests/pt_expt/conftest.py b/source/tests/pt_expt/conftest.py index 15791050d6..afda179a82 100644 --- a/source/tests/pt_expt/conftest.py +++ b/source/tests/pt_expt/conftest.py @@ -32,9 +32,8 @@ ) -@pytest.fixture(autouse=True) -def _clear_leaked_device_context(): - """Pop any stale ``DeviceContext`` before each test, restore after.""" +def _pop_device_contexts() -> list: + """Pop all stale DeviceContext modes from the torch function mode stack.""" popped = [] while True: modes = _get_current_function_mode_stack() @@ -46,6 +45,24 @@ def _clear_leaked_device_context(): popped.append(top) else: break + return popped + + +@pytest.fixture(autouse=True, scope="session") +def _clear_leaked_device_context_session(): + """Pop any stale DeviceContext once at session start. + + This runs before any setUpClass, preventing CUDA init errors + in tests that call trainer.run() during class setup. + """ + _pop_device_contexts() + yield + + +@pytest.fixture(autouse=True) +def _clear_leaked_device_context(): + """Pop any stale ``DeviceContext`` before each test, restore after.""" + popped = _pop_device_contexts() yield # Restore in reverse order so the stack is back to its original state. for ctx in reversed(popped): From cdcdb5d4f362dc0dd79747a8d557583d53b8a4df Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sat, 21 Mar 2026 00:30:04 +0800 Subject: [PATCH 3/9] fix(pt_expt): recompute fitting stats after set-by-statistic bias change The PT backend calls get_fitting_net().compute_input_stats() after change_out_bias with set-by-statistic mode to update fitting normalization. The pt_expt backend was missing this, which would leave stale fitting stats when using dp change-bias -m set. Add tests verifying compute_input_stats is called for set-by-statistic and not called for change-by-statistic. --- deepmd/pt_expt/train/training.py | 8 ++++ source/tests/pt_expt/test_change_bias.py | 56 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/deepmd/pt_expt/train/training.py b/deepmd/pt_expt/train/training.py index 7297ead34d..111dc94ecf 100644 --- a/deepmd/pt_expt/train/training.py +++ b/deepmd/pt_expt/train/training.py @@ -897,6 +897,14 @@ def model_change_out_bias( bias_adjust_mode=_bias_adjust_mode, ) new_bias = deepcopy(_model.get_out_bias()) + + from deepmd.dpmodel.model.dp_model import ( + DPModelCommon, + ) + + if isinstance(_model, DPModelCommon) and _bias_adjust_mode == "set-by-statistic": + _model.get_fitting_net().compute_input_stats(_sample_func) + model_type_map = _model.get_type_map() from deepmd.dpmodel.common import ( to_numpy_array, diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index c6d2a332c5..ffb450c34b 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -242,5 +242,61 @@ def test_change_bias_frozen_pte(self) -> None: ) +class TestChangeBiasFittingStats(unittest.TestCase): + """Test that model_change_out_bias recomputes fitting stats for set-by-statistic.""" + + def _make_mock_model(self): + from unittest.mock import ( + MagicMock, + ) + + from deepmd.dpmodel.model.dp_model import ( + DPModelCommon, + ) + + fitting_net = MagicMock() + + class FakeModel(DPModelCommon): + def get_out_bias(self): + return np.array([[0.0, 0.0]]) + + def get_type_map(self): + return ["O", "H"] + + def get_fitting_net(self): + return fitting_net + + def change_out_bias(self, *args, **kwargs): + pass + + return FakeModel(), fitting_net + + def test_compute_input_stats_called(self) -> None: + from deepmd.pt_expt.train.training import ( + model_change_out_bias, + ) + + model, fitting_net = self._make_mock_model() + sample_func = [{"energy": np.zeros((1, 1))}] + + model_change_out_bias(model, sample_func, _bias_adjust_mode="set-by-statistic") + + fitting_net.compute_input_stats.assert_called_once_with(sample_func) + + def test_compute_input_stats_not_called_for_change(self) -> None: + from deepmd.pt_expt.train.training import ( + model_change_out_bias, + ) + + model, fitting_net = self._make_mock_model() + sample_func = [{"energy": np.zeros((1, 1))}] + + model_change_out_bias( + model, sample_func, _bias_adjust_mode="change-by-statistic" + ) + + fitting_net.compute_input_stats.assert_not_called() + + if __name__ == "__main__": unittest.main() From a0aa242511db6d1028ad4c010f63b568eb7e83ab Mon Sep 17 00:00:00 2001 From: Han Wang Date: Sat, 21 Mar 2026 00:32:06 +0800 Subject: [PATCH 4/9] fix(pt_expt): cap nbatches per system in change-bias to avoid oversampling When numb_batch=0, the code used max(data.get_nbatches()) which makes every smaller system wrap and repeat batches until matching the largest, overweighting short systems. Use min() instead so no system wraps, matching PT backend behavior. --- deepmd/pt_expt/entrypoints/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index 0cbd775e1c..f444e8dd96 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -358,7 +358,12 @@ def change_bias( mock_loss = get_loss({"inference": True}, 1.0, len(type_map), model_to_change) data.add_data_requirements(mock_loss.label_requirement) data.add_data_requirements(get_additional_data_requirement(model_to_change)) - nbatches = numb_batch if numb_batch != 0 else max(data.get_nbatches()) + if numb_batch != 0: + nbatches = numb_batch + else: + # Cap at the minimum across systems so no system wraps and + # overweights short systems (matching PT behavior). + nbatches = min(data.get_nbatches()) sampled_data = make_stat_input(data, nbatches) model_to_change = model_change_out_bias( model_to_change, sampled_data, _bias_adjust_mode=bias_adjust_mode From ac7f14ab9ab3ccc0699696a6cb644de56f785b7c Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 23 Mar 2026 18:24:05 +0800 Subject: [PATCH 5/9] fix(tests): convert CUDA tensors to numpy in change-bias tests get_out_bias() returns a torch.Tensor on CUDA, which can't be directly converted to numpy. Add to_numpy() helper that calls .detach().cpu() before .numpy(). --- source/tests/pt_expt/test_change_bias.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index ffb450c34b..db5a7758d5 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -8,6 +8,7 @@ ) import numpy as np +import torch from deepmd.main import ( main, @@ -31,6 +32,14 @@ update_deepmd_input, ) + +def to_numpy(x): + """Convert array-like (numpy or torch.Tensor) to numpy array.""" + if isinstance(x, torch.Tensor): + return x.detach().cpu().numpy() + return np.asarray(x) + + EXAMPLE_DIR = os.path.join( os.path.dirname(__file__), "..", @@ -136,7 +145,7 @@ def setUpClass(cls) -> None: cls.model_path = os.path.join(cls.tmpdir, "model.ckpt.pt") # Record original bias - cls.original_bias = deepcopy(trainer.wrapper.model.get_out_bias()) + cls.original_bias = to_numpy(trainer.wrapper.model.get_out_bias()) @classmethod def tearDownClass(cls) -> None: @@ -162,7 +171,7 @@ def test_change_bias_with_data(self) -> None: f"-s {self.data_file[0]} -o {output_path}" ) updated_model = self._load_model_from_ckpt(output_path) - updated_bias = np.array(updated_model.get_out_bias()) + updated_bias = to_numpy(updated_model.get_out_bias()) original_bias = np.array(self.original_bias) # Bias should have changed from the original self.assertFalse( @@ -183,7 +192,7 @@ def test_change_bias_with_data_sys_file(self) -> None: f"-f {tmp_file.name} -o {output_path}" ) updated_model = self._load_model_from_ckpt(output_path) - updated_bias = np.array(updated_model.get_out_bias()) + updated_bias = to_numpy(updated_model.get_out_bias()) original_bias = np.array(self.original_bias) # Bias should have changed from the original self.assertFalse( @@ -199,7 +208,7 @@ def test_change_bias_with_user_defined(self) -> None: f"-b {' '.join(str(v) for v in user_bias)} -o {output_path}" ) updated_model = self._load_model_from_ckpt(output_path) - updated_bias = np.array(updated_model.get_out_bias()) + updated_bias = to_numpy(updated_model.get_out_bias()) expected_bias = np.array(user_bias).reshape(updated_bias.shape) np.testing.assert_allclose(updated_bias, expected_bias) From c8a352d6eedda6fe75fa78b235d5dac4bed89a58 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 23 Mar 2026 18:25:53 +0800 Subject: [PATCH 6/9] fix(pt_expt): replace assert with ValueError for input validation Assertions are stripped with python -O. Use explicit exceptions for CLI-facing user input validation. --- deepmd/pt_expt/entrypoints/main.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index 82f4527e69..4a95ac7baf 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -324,13 +324,13 @@ def change_bias( type_map = model_to_change.get_type_map() if bias_value is not None: - assert "energy" in model_to_change.model_output_type(), ( - "User-defined bias is only available for energy model!" - ) - assert len(bias_value) == len(type_map), ( - f"The number of elements in the bias should be the same as " - f"that in the type_map: {type_map}." - ) + if "energy" not in model_to_change.model_output_type(): + raise ValueError("User-defined bias is only available for energy models!") + if len(bias_value) != len(type_map): + raise ValueError( + f"The number of elements in the bias ({len(bias_value)}) must match " + f"the number of types in type_map ({len(type_map)}): {type_map}." + ) old_bias = model_to_change.get_out_bias() bias_to_set = torch.tensor( bias_value, dtype=old_bias.dtype, device=old_bias.device From a8f3b3e72709d3fd59d68afaf6c3c59ed5c33410 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 23 Mar 2026 20:06:14 +0800 Subject: [PATCH 7/9] fix(pt_expt): replace assert with ValueError and validate mode in change_bias Replace assert statements with explicit ValueError for input validation (bias_value length, energy model check) since assertions are stripped with python -O. Add explicit mode validation to reject typos instead of silently defaulting to set-by-statistic. --- deepmd/pt_expt/entrypoints/main.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index f444e8dd96..0a564b3782 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -306,7 +306,12 @@ def change_bias( "or a frozen model with a .pte/.pt2 extension" ) - bias_adjust_mode = "change-by-statistic" if mode == "change" else "set-by-statistic" + if mode == "change": + bias_adjust_mode = "change-by-statistic" + elif mode == "set": + bias_adjust_mode = "set-by-statistic" + else: + raise ValueError(f"Unsupported mode '{mode}'. Expected 'change' or 'set'.") if input_file.endswith(".pt"): multi_task = "model_dict" in model_params @@ -324,13 +329,13 @@ def change_bias( type_map = model_to_change.get_type_map() if bias_value is not None: - assert "energy" in model_to_change.model_output_type(), ( - "User-defined bias is only available for energy model!" - ) - assert len(bias_value) == len(type_map), ( - f"The number of elements in the bias should be the same as " - f"that in the type_map: {type_map}." - ) + if "energy" not in model_to_change.model_output_type(): + raise ValueError("User-defined bias is only available for energy model!") + if len(bias_value) != len(type_map): + raise ValueError( + f"The number of elements in the bias should be the same as " + f"that in the type_map: {type_map}." + ) old_bias = model_to_change.get_out_bias() bias_to_set = torch.tensor( bias_value, dtype=old_bias.dtype, device=old_bias.device From 007f6b92a3f841d65b40a78ead63279c40c90175 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Tue, 24 Mar 2026 13:16:03 +0800 Subject: [PATCH 8/9] fix(tests): use to_numpy for CUDA-compatible bias conversion in frozen pte test test_change_bias_frozen_pte passed raw tensors from get_out_bias() to np.allclose, which fails on CUDA with "can't convert cuda:0 device type tensor to numpy". Use the existing to_numpy helper. --- source/tests/pt_expt/test_change_bias.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index db5a7758d5..50d114af28 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -230,7 +230,7 @@ def test_change_bias_frozen_pte(self) -> None: # Get original bias original_data = serialize_from_file(pte_path) original_model = BaseModel.deserialize(original_data["model"]) - original_bias = deepcopy(original_model.get_out_bias()) + original_bias = to_numpy(original_model.get_out_bias()) # Run change-bias on the frozen model output_pte = os.path.join(self.tmpdir, "frozen_updated.pte") @@ -242,7 +242,7 @@ def test_change_bias_frozen_pte(self) -> None: # Load updated model and verify bias changed updated_data = serialize_from_file(output_pte) updated_model = BaseModel.deserialize(updated_data["model"]) - updated_bias = updated_model.get_out_bias() + updated_bias = to_numpy(updated_model.get_out_bias()) # Bias should have changed self.assertFalse( From 228534e94be4b1a9e239a9e6ae06d70c396c54b3 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Tue, 24 Mar 2026 21:05:14 +0800 Subject: [PATCH 9/9] fix(tests): clear leaked DeviceContext in TestChangeBias.setUpClass Session-scoped conftest fixture doesn't run before unittest setUpClass when the full test suite is collected. Call _pop_device_contexts() explicitly at the start of setUpClass to prevent CUDA init errors from stale DeviceContext(cuda:127) during trainer.run() on CPU runners. --- source/tests/pt_expt/test_change_bias.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index 50d114af28..16974cc653 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -123,6 +123,12 @@ class TestChangeBias(unittest.TestCase): @classmethod def setUpClass(cls) -> None: + from .conftest import ( + _pop_device_contexts, + ) + + _pop_device_contexts() + data_dir = os.path.join(EXAMPLE_DIR, "data") if not os.path.isdir(data_dir): raise unittest.SkipTest(f"Example data not found: {data_dir}")