From b8c0859cf8b090a2bb83ba99ec49d389f6cb1e73 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 18 Jun 2025 14:13:32 +0800 Subject: [PATCH 01/27] feat(pt): add default_fparam "numb_fparam": 2, "default_fparam": [0.0, 1.0], --- deepmd/entrypoints/test.py | 8 ++++-- deepmd/infer/deep_eval.py | 6 +++++ deepmd/pt/infer/deep_eval.py | 3 +++ .../pt/model/atomic_model/dp_atomic_model.py | 3 +++ deepmd/pt/model/model/make_model.py | 4 +++ deepmd/pt/model/task/ener.py | 2 ++ deepmd/pt/model/task/fitting.py | 27 ++++++++++++++++++- deepmd/pt/model/task/invar_fitting.py | 2 ++ deepmd/pt/train/training.py | 8 ++++-- deepmd/utils/argcheck.py | 1 + 10 files changed, 59 insertions(+), 5 deletions(-) diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index db605b0de1..5b22d16be4 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -298,7 +298,11 @@ def test_ener( data.add("atom_ener", 1, atomic=True, must=True, high_prec=False) if dp.get_dim_fparam() > 0: data.add( - "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False + "fparam", + dp.get_dim_fparam(), + atomic=False, + must=not dp.has_default_fparam(), + high_prec=False, ) if dp.get_dim_aparam() > 0: data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False) @@ -334,7 +338,7 @@ def test_ener( atype = test_data["type"][:numb_test].reshape([numb_test, -1]) else: atype = test_data["type"][0] - if dp.get_dim_fparam() > 0: + if dp.get_dim_fparam() > 0 and test_data["find_fparam"] != 0.0: fparam = test_data["fparam"][:numb_test] else: fparam = None diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index ee61abe58c..881a2f899f 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -160,6 +160,9 @@ def get_type_map(self) -> list[str]: def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this DP.""" + def has_default_fparam(self) -> bool: + return False + @abstractmethod def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" @@ -370,6 +373,9 @@ def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this DP.""" return self.deep_eval.get_dim_fparam() + def has_default_fparam(self) -> bool: + return self.deep_eval.has_default_fparam() + def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" return self.deep_eval.get_dim_aparam() diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py index c4d5d028ce..07b9176c99 100644 --- a/deepmd/pt/infer/deep_eval.py +++ b/deepmd/pt/infer/deep_eval.py @@ -183,6 +183,9 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" return self.dp.model["Default"].get_dim_aparam() + def has_default_fparam(self) -> bool: + return self.dp.model["Default"].has_default_fparam() + def get_intensive(self) -> bool: return self.dp.model["Default"].get_intensive() diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index 5a5655b72c..cde6cae9a8 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -308,6 +308,9 @@ def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" return self.fitting_net.get_dim_fparam() + def has_default_fparam(self) -> bool: + return self.fitting_net.has_default_fparam() + def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.fitting_net.get_dim_aparam() diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index c32abaa095..0b90c2cc44 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -522,6 +522,10 @@ def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" return self.atomic_model.get_dim_fparam() + @torch.jit.export + def has_default_fparam(self) -> bool: + return self.atomic_model.has_default_fparam() + @torch.jit.export def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 07351b33f6..5e993fec1b 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -56,6 +56,7 @@ def __init__( mixed_types: bool = True, seed: Optional[Union[int, list[int]]] = None, type_map: Optional[list[str]] = None, + default_fparam: Optional[list] = None, **kwargs, ) -> None: super().__init__( @@ -74,6 +75,7 @@ def __init__( mixed_types=mixed_types, seed=seed, type_map=type_map, + default_fparam=default_fparam, **kwargs, ) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 0865b61f52..054dce01eb 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -227,6 +227,7 @@ def __init__( remove_vaccum_contribution: Optional[list[bool]] = None, type_map: Optional[list[str]] = None, use_aparam_as_mask: bool = False, + default_fparam: Optional[list] = None, **kwargs, ) -> None: super().__init__() @@ -238,6 +239,7 @@ def __init__( self.resnet_dt = resnet_dt self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam + self.default_fparam = default_fparam self.dim_case_embd = dim_case_embd self.activation_function = activation_function self.precision = precision @@ -299,6 +301,20 @@ def __init__( else: self.case_embd = None + if self.default_fparam is not None: + if self.numb_fparam > 0: + assert ( + len(self.default_fparam) == self.numb_fparam + ), "default_fparam length mismatch!" + self.register_buffer( + "default_fparam_tensor", + torch.tensor( + np.array(self.default_fparam), dtype=self.prec, device=device + ), + ) + else: + self.default_fparam_tensor = None + in_dim = ( self.dim_descrpt + self.numb_fparam @@ -415,6 +431,9 @@ def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this atomic model.""" return self.numb_fparam + def has_default_fparam(self) -> bool: + return self.default_fparam is not None + def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.numb_aparam @@ -509,6 +528,13 @@ def _forward_common( ): # cast the input to internal precsion xx = descriptor.to(self.prec) + nf, nloc, nd = xx.shape + + if self.numb_fparam > 0 and fparam is None: + # use default fparam + assert self.default_fparam_tensor is not None + fparam = torch.tile(self.default_fparam_tensor.unsqueeze(0), [nf, 1]) + fparam = fparam.to(self.prec) if fparam is not None else None aparam = aparam.to(self.prec) if aparam is not None else None @@ -521,7 +547,6 @@ def _forward_common( xx_zeros = torch.zeros_like(xx) else: xx_zeros = None - nf, nloc, nd = xx.shape net_dim_out = self._net_out_dim() if nd != self.dim_descrpt: diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index b1599eac60..f9ab2265f8 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -103,6 +103,7 @@ def __init__( atom_ener: Optional[list[Optional[torch.Tensor]]] = None, type_map: Optional[list[str]] = None, use_aparam_as_mask: bool = False, + default_fparam: Optional[list] = None, **kwargs, ) -> None: self.dim_out = dim_out @@ -128,6 +129,7 @@ def __init__( else [x is not None for x in atom_ener], type_map=type_map, use_aparam_as_mask=use_aparam_as_mask, + default_fparam=default_fparam, **kwargs, ) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 193dcd8cb9..09103e283e 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1126,7 +1126,8 @@ def get_data(self, is_train=True, task_key="Default"): label_dict = {} for item_key in batch_data: if item_key in input_keys: - input_dict[item_key] = batch_data[item_key] + if item_key != "fparam" or batch_data["find_fparam"] != 0.0: + input_dict[item_key] = batch_data[item_key] else: if item_key not in ["sid", "fid"]: label_dict[item_key] = batch_data[item_key] @@ -1205,7 +1206,10 @@ def get_additional_data_requirement(_model): if _model.get_dim_fparam() > 0: fparam_requirement_items = [ DataRequirementItem( - "fparam", _model.get_dim_fparam(), atomic=False, must=True + "fparam", + _model.get_dim_fparam(), + atomic=False, + must=not _model.has_default_fparam(), ) ] additional_data_requirement += fparam_requirement_items diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index fb911550dd..6d7285593e 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1773,6 +1773,7 @@ def fitting_ener(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument("default_fparam", list, optional=True, default=None), Argument( "dim_case_embd", int, From 28be7f6da3eecb03d5886943085d138202bb8290 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 11 Jul 2025 19:42:52 +0800 Subject: [PATCH 02/27] Update stat.py --- deepmd/pt/utils/stat.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index cf6892b49d..182cba6ed6 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -59,6 +59,14 @@ def make_stat_input(datasets, dataloaders, nbatches): except StopIteration: iterator = iter(dataloaders[i]) stat_data = next(iterator) + if ( + "find_fparam" in stat_data + and "fparam" in stat_data + and stat_data["find_fparam"] == 0.0 + ): + # for model using default fparam + stat_data.pop("fparam") + stat_data.pop("find_fparam") for dd in stat_data: if stat_data[dd] is None: sys_stat[dd] = None From 503ec287875a92b6bde665ac824423b77e605c13 Mon Sep 17 00:00:00 2001 From: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com> Date: Thu, 24 Jul 2025 18:19:04 +0800 Subject: [PATCH 03/27] Write fparam/aparam statistic to stat_file (#47) * Add fparam/aparam stat * Add fparam/aparam stat in share_fitting * Add fparam default value if default_fparam is not None * Add model_prob in share_fitting_params * Add protection when share_params of fitting net --- .../pt/model/atomic_model/dp_atomic_model.py | 6 +- deepmd/pt/model/model/make_model.py | 5 + deepmd/pt/model/task/fitting.py | 263 ++++++++++++++---- deepmd/pt/train/training.py | 36 ++- deepmd/pt/train/wrapper.py | 5 +- deepmd/utils/env_mat_stat.py | 7 + 6 files changed, 253 insertions(+), 69 deletions(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index cde6cae9a8..e19670cf90 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -6,6 +6,7 @@ ) import torch +import numpy as np from deepmd.dpmodel import ( FittingOutputDef, @@ -300,7 +301,7 @@ def wrapped_sampler(): self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path) self.fitting_net.compute_input_stats( - wrapped_sampler, protection=self.data_stat_protect + wrapped_sampler, protection=self.data_stat_protect, stat_file_path=stat_file_path ) self.compute_or_load_out_stat(wrapped_sampler, stat_file_path) @@ -311,6 +312,9 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.fitting_net.has_default_fparam() + def get_default_fparam(self) -> Optional[np.array]: + return self.fitting_net.get_default_fparam() + def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.fitting_net.get_dim_aparam() diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index 0b90c2cc44..63cf7db6fe 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -4,6 +4,7 @@ ) import torch +import numpy as np from deepmd.dpmodel import ( ModelOutputDef, @@ -526,6 +527,10 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.atomic_model.has_default_fparam() + @torch.jit.export + def get_default_fparam(self) -> Optional[np.array]: + return self.atomic_model.get_default_fparam() + @torch.jit.export def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 054dce01eb..4c5c3a02da 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -7,6 +7,7 @@ Callable, Optional, Union, + List, ) import numpy as np @@ -40,6 +41,12 @@ get_index_between_two_maps, map_atom_exclude_types, ) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.env_mat_stat import ( + StatItem, +) dtype = env.GLOBAL_PT_FLOAT_PRECISION device = env.DEVICE @@ -55,7 +62,7 @@ def __new__(cls, *args, **kwargs): return BaseFitting.__new__(BaseFitting, *args, **kwargs) return super().__new__(cls) - def share_params(self, base_class, shared_level, resume=False) -> None: + def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False) -> None: """ Share the parameters of self to the base_class with shared_level during multitask training. If not start from checkpoint (resume is False), @@ -66,16 +73,145 @@ def share_params(self, base_class, shared_level, resume=False) -> None: ) if shared_level == 0: # only not share the bias_atom_e and the case_embd + # link fparam buffers + if self.numb_fparam > 0: + if not resume: + base_fparam = base_class.stats["fparam"] + assert len(base_fparam) == self.numb_fparam + for ii in range(self.numb_fparam): + base_fparam[ii] += self.get_stats()["fparam"][ii] * model_prob + fparam_avg = np.array([ii.compute_avg() for ii in base_fparam]) + fparam_std = np.array([ii.compute_std(protection=protection) for ii in base_fparam]) + fparam_inv_std = 1.0 / fparam_std + base_class.fparam_avg.copy_( + torch.tensor( + fparam_avg, device=env.DEVICE, dtype=base_class.fparam_avg.dtype + ) + ) + base_class.fparam_inv_std.copy_( + torch.tensor( + fparam_inv_std, device=env.DEVICE, dtype=base_class.fparam_inv_std.dtype + ) + ) + self.fparam_avg = base_class.fparam_avg + self.fparam_inv_std = base_class.fparam_inv_std + + # link aparam buffers + if self.numb_aparam > 0: + if not resume: + base_aparam = base_class.stats["aparam"] + assert len(base_aparam) == self.numb_aparam + for ii in range(self.numb_aparam): + base_aparam[ii] += self.get_stats()["aparam"][ii] * model_prob + aparam_avg = np.array([ii.compute_avg() for ii in base_aparam]) + aparam_std = np.array([ii.compute_std(protection=protection) for ii in base_aparam]) + aparam_inv_std = 1.0 / aparam_std + base_class.aparam_avg.copy_( + torch.tensor( + aparam_avg, device=env.DEVICE, dtype=base_class.aparam_avg.dtype + ) + ) + base_class.aparam_inv_std.copy_( + torch.tensor( + aparam_inv_std, device=env.DEVICE, dtype=base_class.aparam_inv_std.dtype + ) + ) + self.aparam_avg = base_class.aparam_avg + self.aparam_inv_std = base_class.aparam_inv_std + # the following will successfully link all the params except buffers, which need manually link. for item in self._modules: self._modules[item] = base_class._modules[item] else: raise NotImplementedError + def save_to_file_fparam( + self, + stat_file_path: DPPath, + ) -> None: + """Save the statistics of fparam. + + Parameters + ---------- + path : DPPath + The path to save the statistics of fparam. + """ + assert stat_file_path is not None + stat_file_path.mkdir(exist_ok=True, parents=True) + if len(self.stats) == 0: + raise ValueError("The statistics hasn't been computed.") + fp = stat_file_path / "fparam" + _fparam_stat = [] + for ii in range(self.numb_fparam): + _tmp_stat = self.stats["fparam"][ii] + _fparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]) + _fparam_stat = np.array(_fparam_stat) + fp.save_numpy(_fparam_stat) + log.info(f"Save fparam stats to {fp}.") + + def save_to_file_aparam( + self, + stat_file_path: DPPath, + ) -> None: + """Save the statistics of aparam. + + Parameters + ---------- + path : DPPath + The path to save the statistics of aparam. + """ + assert stat_file_path is not None + stat_file_path.mkdir(exist_ok=True, parents=True) + if len(self.stats) == 0: + raise ValueError("The statistics hasn't been computed.") + fp = stat_file_path / "aparam" + _aparam_stat = [] + for ii in range(self.numb_aparam): + _tmp_stat = self.stats["aparam"][ii] + _aparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]) + _aparam_stat = np.array(_aparam_stat) + fp.save_numpy(_aparam_stat) + log.info(f"Save aparam stats to {fp}.") + + def restore_fparam_from_file(self, stat_file_path: DPPath) -> None: + """Load the statistics of fparam. + + Parameters + ---------- + path : DPPath + The path to load the statistics of fparam. + """ + fp = stat_file_path / "fparam" + arr = fp.load_numpy() + assert arr.shape == (self.numb_fparam, 3) + _fparam_stat = [] + for ii in range(self.numb_fparam): + _fparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])) + self.stats["fparam"] = _fparam_stat + log.info(f"Load fparam stats from {fp}.") + + def restore_aparam_from_file(self, stat_file_path: DPPath) -> None: + """Load the statistics of aparam. + + Parameters + ---------- + path : DPPath + The path to load the statistics of aparam. + """ + fp = stat_file_path / "aparam" + arr = fp.load_numpy() + assert arr.shape == (self.numb_aparam, 3) + _aparam_stat = [] + for ii in range(self.numb_aparam): + _aparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])) + self.stats["aparam"] = _aparam_stat + log.info(f"Load aparam stats from {fp}.") + def compute_input_stats( self, merged: Union[Callable[[], list[dict]], list[dict]], protection: float = 1e-2, + stat_file_path: Optional[DPPath] = None, ) -> None: """ Compute the input statistics (e.g. mean and stddev) for the fittings from packed data. @@ -91,67 +227,89 @@ def compute_input_stats( the lazy function helps by only sampling once. protection : float Divided-by-zero protection + stat_file_path : Optional[DPPath] + The path to the stat file. """ if self.numb_fparam == 0 and self.numb_aparam == 0: # skip data statistics + self.stats = None return - if callable(merged): - sampled = merged() - else: - sampled = merged + + self.stats = {} + # stat fparam if self.numb_fparam > 0: - cat_data = torch.cat([frame["fparam"] for frame in sampled], dim=0) - cat_data = torch.reshape(cat_data, [-1, self.numb_fparam]) - fparam_avg = torch.mean(cat_data, dim=0) - fparam_std = torch.std(cat_data, dim=0, unbiased=False) - fparam_std = torch.where( - fparam_std < protection, - torch.tensor( - protection, dtype=fparam_std.dtype, device=fparam_std.device - ), - fparam_std, - ) + if stat_file_path is not None and stat_file_path.is_dir(): + self.restore_fparam_from_file(stat_file_path) + else: + sampled = merged() if callable(merged) else merged + self.stats["fparam"] = [] + cat_data = to_numpy_array(torch.cat([frame["fparam"] for frame in sampled], dim=0)) + cat_data = np.reshape(cat_data, [-1, self.numb_fparam]) + sumv = np.sum(cat_data, axis=0) + sumv2 = np.sum(cat_data * cat_data, axis=0) + sumn = cat_data.shape[0] + for ii in range(self.numb_fparam): + self.stats["fparam"].append( + StatItem( + number=sumn, + sum=sumv[ii], + squared_sum=sumv2[ii], + ) + ) + if stat_file_path is not None: + self.save_to_file_fparam(stat_file_path) + + fparam_avg = np.array([ii.compute_avg() for ii in self.stats["fparam"]]) + fparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["fparam"]]) fparam_inv_std = 1.0 / fparam_std - self.fparam_avg.copy_( - torch.tensor(fparam_avg, device=env.DEVICE, dtype=self.fparam_avg.dtype) - ) - self.fparam_inv_std.copy_( - torch.tensor( - fparam_inv_std, device=env.DEVICE, dtype=self.fparam_inv_std.dtype - ) - ) + log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}") + self.fparam_avg.copy_(to_torch_tensor(fparam_avg)) + self.fparam_inv_std.copy_(to_torch_tensor(fparam_inv_std)) + # stat aparam if self.numb_aparam > 0: - sys_sumv = [] - sys_sumv2 = [] - sys_sumn = [] - for ss_ in [frame["aparam"] for frame in sampled]: - ss = torch.reshape(ss_, [-1, self.numb_aparam]) - sys_sumv.append(torch.sum(ss, dim=0)) - sys_sumv2.append(torch.sum(ss * ss, dim=0)) - sys_sumn.append(ss.shape[0]) - sumv = torch.sum(torch.stack(sys_sumv), dim=0) - sumv2 = torch.sum(torch.stack(sys_sumv2), dim=0) - sumn = sum(sys_sumn) - aparam_avg = sumv / sumn - aparam_std = torch.sqrt(sumv2 / sumn - (sumv / sumn) ** 2) - aparam_std = torch.where( - aparam_std < protection, - torch.tensor( - protection, dtype=aparam_std.dtype, device=aparam_std.device - ), - aparam_std, - ) + if stat_file_path is not None and stat_file_path.is_dir(): + self.restore_aparam_from_file(stat_file_path) + else: + sampled = merged() if callable(merged) else merged + self.stats["aparam"] = [] + sys_sumv = [] + sys_sumv2 = [] + sys_sumn = [] + for ss_ in [frame["aparam"] for frame in sampled]: + ss = np.reshape(to_numpy_array(ss_), [-1, self.numb_aparam]) + sys_sumv.append(np.sum(ss, axis=0)) + sys_sumv2.append(np.sum(ss * ss, axis=0)) + sys_sumn.append(ss.shape[0]) + sumv = np.sum(np.stack(sys_sumv), axis=0) + sumv2 = np.sum(np.stack(sys_sumv2), axis=0) + sumn = sum(sys_sumn) + for ii in range(self.numb_aparam): + self.stats["aparam"].append( + StatItem( + number=sumn, + sum=sumv[ii], + squared_sum=sumv2[ii], + ) + ) + if stat_file_path is not None: + self.save_to_file_aparam(stat_file_path) + + aparam_avg = np.array([ii.compute_avg() for ii in self.stats["aparam"]]) + aparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["aparam"]]) aparam_inv_std = 1.0 / aparam_std - self.aparam_avg.copy_( - torch.tensor(aparam_avg, device=env.DEVICE, dtype=self.aparam_avg.dtype) - ) - self.aparam_inv_std.copy_( - torch.tensor( - aparam_inv_std, device=env.DEVICE, dtype=self.aparam_inv_std.dtype - ) + log.info(f"aparam_avg is {aparam_avg}, aparam_inv_std is {aparam_inv_std}") + self.aparam_avg.copy_(to_torch_tensor(aparam_avg)) + self.aparam_inv_std.copy_(to_torch_tensor(aparam_inv_std)) + + def get_stats(self) -> dict[str, List[StatItem]]: + """Get the statistics of the fitting_net.""" + if self.stats is None: + raise RuntimeError( + "The statistics of fitting net has not been computed." ) + return self.stats class GeneralFitting(Fitting): @@ -434,6 +592,9 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.default_fparam is not None + def get_default_fparam(self) -> Optional[np.array]: + return self.default_fparam_tensor.cpu().numpy() + def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.numb_aparam diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 09103e283e..a0f4ca3f43 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -583,11 +583,30 @@ def single_model_finetune( frz_model = torch.jit.load(init_frz_model, map_location=DEVICE) self.model.load_state_dict(frz_model.state_dict()) + # Get model prob for multi-task + if self.multi_task: + self.model_prob = np.array([0.0 for key in self.model_keys]) + if training_params.get("model_prob", None) is not None: + model_prob = training_params["model_prob"] + for ii, model_key in enumerate(self.model_keys): + if model_key in model_prob: + self.model_prob[ii] += float(model_prob[model_key]) + else: + for ii, model_key in enumerate(self.model_keys): + self.model_prob[ii] += float(len(self.training_data[model_key])) + sum_prob = np.sum(self.model_prob) + assert sum_prob > 0.0, "Sum of model prob must be larger than 0!" + self.model_prob = self.model_prob / sum_prob + # Multi-task share params if shared_links is not None: + _data_stat_protect = np.array([model_params["model_dict"][ii].get("data_stat_protect", 1e-2) for ii in model_params["model_dict"]]) + assert np.allclose(_data_stat_protect, _data_stat_protect[0]), f"Model key 'data_stat_protect' must be the same in each branch when multitask!" self.wrapper.share_params( shared_links, resume=(resuming and not self.finetune_update_stat) or self.rank != 0, + model_key_prob_map = dict(zip(self.model_keys, self.model_prob)), + data_stat_protect = _data_stat_protect[0] ) if dist.is_available() and dist.is_initialized(): @@ -637,21 +656,6 @@ def warm_up_linear(step, warmup_steps): else: raise ValueError(f"Not supported optimizer type '{self.opt_type}'") - # Get model prob for multi-task - if self.multi_task: - self.model_prob = np.array([0.0 for key in self.model_keys]) - if training_params.get("model_prob", None) is not None: - model_prob = training_params["model_prob"] - for ii, model_key in enumerate(self.model_keys): - if model_key in model_prob: - self.model_prob[ii] += float(model_prob[model_key]) - else: - for ii, model_key in enumerate(self.model_keys): - self.model_prob[ii] += float(len(self.training_data[model_key])) - sum_prob = np.sum(self.model_prob) - assert sum_prob > 0.0, "Sum of model prob must be larger than 0!" - self.model_prob = self.model_prob / sum_prob - # Tensorboard self.enable_tensorboard = training_params.get("tensorboard", False) self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log") @@ -1204,12 +1208,14 @@ def print_on_training( def get_additional_data_requirement(_model): additional_data_requirement = [] if _model.get_dim_fparam() > 0: + _fparam_default = _model.get_default_fparam() if _model.has_default_fparam() else 0.0 fparam_requirement_items = [ DataRequirementItem( "fparam", _model.get_dim_fparam(), atomic=False, must=not _model.has_default_fparam(), + default=_fparam_default, ) ] additional_data_requirement += fparam_requirement_items diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py index 9a2cbff295..4ac86d43d6 100644 --- a/deepmd/pt/train/wrapper.py +++ b/deepmd/pt/train/wrapper.py @@ -59,7 +59,7 @@ def __init__( self.loss[task_key] = loss[task_key] self.inference_only = self.loss is None - def share_params(self, shared_links, resume=False) -> None: + def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2, resume=False) -> None: """ Share the parameters of classes following rules defined in shared_links during multitask training. If not start from checkpoint (resume is False), @@ -129,8 +129,9 @@ def share_params(self, shared_links, resume=False) -> None: link_class = self.model[ model_key_link ].atomic_model.__getattr__(class_type_link) + frac_prob = model_key_prob_map[model_key_link]/model_key_prob_map[model_key_base] link_class.share_params( - base_class, shared_level_link, resume=resume + base_class, shared_level_link, model_prob=frac_prob, protection=data_stat_protect, resume=resume ) log.warning( f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!" diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py index ecc0b7b62f..3fa4d7d410 100644 --- a/deepmd/utils/env_mat_stat.py +++ b/deepmd/utils/env_mat_stat.py @@ -48,6 +48,13 @@ def __add__(self, other: "StatItem") -> "StatItem": squared_sum=self.squared_sum + other.squared_sum, ) + def __mul__(self, scalar: float) -> "StatItem": + return StatItem( + number=self.number * scalar, + sum=self.sum * scalar, + squared_sum=self.squared_sum * scalar, + ) + def compute_avg(self, default: float = 0) -> float: """Compute the average of the environment matrix. From 82646e9ff458bcc039c7a40c070d854e9c83b37c Mon Sep 17 00:00:00 2001 From: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com> Date: Wed, 30 Jul 2025 17:52:52 +0800 Subject: [PATCH 04/27] delete torch.jit.export of get_default_fparam (#51) --- deepmd/pt/model/atomic_model/dp_atomic_model.py | 2 +- deepmd/pt/model/model/make_model.py | 3 +-- deepmd/pt/model/task/fitting.py | 4 ++-- deepmd/pt/train/training.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index e19670cf90..5bf61a01ab 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -312,7 +312,7 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.fitting_net.has_default_fparam() - def get_default_fparam(self) -> Optional[np.array]: + def get_default_fparam(self) -> Optional[torch.Tensor]: return self.fitting_net.get_default_fparam() def get_dim_aparam(self) -> int: diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index 63cf7db6fe..d58261a481 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -527,8 +527,7 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.atomic_model.has_default_fparam() - @torch.jit.export - def get_default_fparam(self) -> Optional[np.array]: + def get_default_fparam(self) -> Optional[torch.Tensor]: return self.atomic_model.get_default_fparam() @torch.jit.export diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 4c5c3a02da..35c26c376c 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -592,8 +592,8 @@ def get_dim_fparam(self) -> int: def has_default_fparam(self) -> bool: return self.default_fparam is not None - def get_default_fparam(self) -> Optional[np.array]: - return self.default_fparam_tensor.cpu().numpy() + def get_default_fparam(self) -> Optional[torch.Tensor]: + return self.default_fparam_tensor def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index a0f4ca3f43..589fe620ee 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1208,7 +1208,7 @@ def print_on_training( def get_additional_data_requirement(_model): additional_data_requirement = [] if _model.get_dim_fparam() > 0: - _fparam_default = _model.get_default_fparam() if _model.has_default_fparam() else 0.0 + _fparam_default = _model.get_default_fparam().cpu().numpy() if _model.has_default_fparam() else 0.0 fparam_requirement_items = [ DataRequirementItem( "fparam", From 13ee082661e03d13b4d460bec936cc838edd06fd Mon Sep 17 00:00:00 2001 From: anyangml Date: Tue, 21 Oct 2025 02:31:49 +0000 Subject: [PATCH 05/27] add edge readout --- .../descriptor/make_base_descriptor.py | 7 + deepmd/dpmodel/fitting/make_base_fitting.py | 3 + deepmd/dpmodel/utils/learning_rate.py | 33 ++++ .../pt/model/atomic_model/dp_atomic_model.py | 32 +++- deepmd/pt/model/descriptor/dpa1.py | 7 + deepmd/pt/model/descriptor/dpa3.py | 7 + deepmd/pt/model/descriptor/repflows.py | 18 ++ deepmd/pt/model/model/__init__.py | 6 +- deepmd/pt/model/task/ener.py | 162 ++++++++++++++++++ deepmd/pt/model/task/invar_fitting.py | 2 + deepmd/pt/train/training.py | 14 +- deepmd/pt/utils/learning_rate.py | 2 + deepmd/utils/argcheck.py | 109 +++++++++++- deepmd/utils/path.py | 2 +- 14 files changed, 386 insertions(+), 18 deletions(-) diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py index f45e85e516..97cc7abf65 100644 --- a/deepmd/dpmodel/descriptor/make_base_descriptor.py +++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py @@ -148,6 +148,13 @@ def compute_input_stats( """Update mean and stddev for descriptor elements.""" raise NotImplementedError + def get_norm_fact(self) -> list[float]: + """Returns the norm factor.""" + raise NotImplementedError + + def get_additional_output_for_fitting(self): + raise NotImplementedError + def enable_compression( self, min_nbor_dist: float, diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py index 201b5e27d1..ccd10a8e11 100644 --- a/deepmd/dpmodel/fitting/make_base_fitting.py +++ b/deepmd/dpmodel/fitting/make_base_fitting.py @@ -67,6 +67,9 @@ def compute_output_stats(self, merged) -> NoReturn: """Update the output bias for fitting net.""" raise NotImplementedError + def need_additional_input(self) -> bool: + return False + @abstractmethod def get_type_map(self) -> list[str]: """Get the name to each type of atoms.""" diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py index 90c18fca22..a19069f938 100644 --- a/deepmd/dpmodel/utils/learning_rate.py +++ b/deepmd/dpmodel/utils/learning_rate.py @@ -51,3 +51,36 @@ def value(self, step) -> np.float64: if step_lr < self.min_lr: step_lr = self.min_lr return step_lr + +class LearningRateWSD: + def __init__( + self, + start_lr, + stop_lr, + stop_steps, + decay_mode="85:10:5", # stable-decay-stable + **kwargs, + ) -> None: + self.start_lr = start_lr + self.stop_lr = stop_lr + self.stop_steps = stop_steps + self.decay_mode = [float(ii) for ii in decay_mode.split(":")] + assert len(self.decay_mode) == 3 + self.decay_start_rate = self.decay_mode[0] / sum(self.decay_mode) + self.decay_end_rate = (self.decay_mode[0] + self.decay_mode[1]) / sum( + self.decay_mode + ) + def value(self, step) -> np.float64: + if step < self.decay_start_rate * self.stop_steps: + return self.start_lr + elif step >= self.decay_end_rate * self.stop_steps: + return self.stop_lr + else: + # linear decay + decay_rate = (self.start_lr - self.stop_lr) / ( + self.decay_end_rate * self.stop_steps + - self.decay_start_rate * self.stop_steps + ) + return self.start_lr - decay_rate * ( + step - self.decay_start_rate * self.stop_steps + ) \ No newline at end of file diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index 5bf61a01ab..e2be1eb097 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -247,15 +247,29 @@ def forward_atomic( if self.enable_eval_descriptor_hook: self.eval_descriptor_list.append(descriptor.detach()) # energy, force - fit_ret = self.fitting_net( - descriptor, - atype, - gr=rot_mat, - g2=g2, - h2=h2, - fparam=fparam, - aparam=aparam, - ) + if not self.fitting_net.need_additional_input(): + fit_ret = self.fitting_net( + descriptor, + atype, + gr=rot_mat, + g2=g2, + h2=h2, + fparam=fparam, + aparam=aparam, + ) + else: + add_input = self.descriptor.get_additional_output_for_fitting() + fit_ret = self.fitting_net( + descriptor, + atype, + gr=rot_mat, + g2=g2, + h2=h2, + fparam=fparam, + aparam=aparam, + sw=sw, + edge_index=add_input.get("edge_index", None), + ) return fit_ret def get_out_bias(self) -> torch.Tensor: diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index 9c1e144f48..97b1e29da3 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -353,6 +353,13 @@ def get_dim_out(self) -> int: def get_dim_emb(self) -> int: return self.se_atten.dim_emb + + def get_norm_fact(self) -> list[float]: + """Returns the norm factor.""" + return [float(self.get_nnei())] + + def get_additional_output_for_fitting(self) -> dict[str, Optional[torch.Tensor]]: + return {} def mixed_types(self) -> bool: """If true, the descriptor diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index dd2da9a3c8..df8016b0a1 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -245,6 +245,13 @@ def get_dim_out(self) -> int: def get_dim_emb(self) -> int: """Returns the embedding dimension of this descriptor.""" return self.repflows.dim_emb + + def get_norm_fact(self) -> list[float]: + """Returns the norm factor.""" + return self.repflows.get_norm_fact() + + def get_additional_output_for_fitting(self): + return self.repflows.get_additional_output_for_fitting() def mixed_types(self) -> bool: """If true, the descriptor diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py index 0bcca7a132..0d453c8aa0 100644 --- a/deepmd/pt/model/descriptor/repflows.py +++ b/deepmd/pt/model/descriptor/repflows.py @@ -254,6 +254,8 @@ def __init__( self.use_exp_switch = use_exp_switch self.use_dynamic_sel = use_dynamic_sel self.sel_reduce_factor = sel_reduce_factor + self.dynamic_e_sel = self.nnei / self.sel_reduce_factor + self.dynamic_a_sel = self.a_sel / self.sel_reduce_factor if self.use_dynamic_sel and not self.smooth_edge_update: raise NotImplementedError( "smooth_edge_update must be True when use_dynamic_sel is True!" @@ -321,6 +323,7 @@ def __init__( ) ) self.layers = torch.nn.ModuleList(layers) + self.additional_output_for_fitting: dict[str, Optional[torch.Tensor]] = {} wanted_shape = (self.ntypes, self.nnei, 4) mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE) @@ -330,6 +333,8 @@ def __init__( self.register_buffer("mean", mean) self.register_buffer("stddev", stddev) self.stats = None + + additional_output_for_fitting: dict[str, Optional[torch.Tensor]] def get_rcut(self) -> float: """Returns the cut-off radius.""" @@ -362,6 +367,17 @@ def get_dim_in(self) -> int: def get_dim_emb(self) -> int: """Returns the embedding dimension e_dim.""" return self.e_dim + + def get_additional_output_for_fitting(self): + return self.additional_output_for_fitting + + def get_norm_fact(self) -> list[float]: + """Returns the norm factor.""" + return [ + float(self.dynamic_e_sel if self.use_dynamic_sel else self.nnei), + # float(self.dynamic_a_sel if self.use_dynamic_sel else self.a_sel), + ] + def __setitem__(self, key, value) -> None: if key in ("avg", "data_avg", "davg"): @@ -535,10 +551,12 @@ def forward( angle_input = angle_input[a_nlist_mask] # n_angle x 1 a_sw = (a_sw[:, :, :, None] * a_sw[:, :, None, :])[a_nlist_mask] + self.additional_output_for_fitting["edge_index"] = edge_index else: # avoid jit assertion edge_index = torch.zeros([2, 1], device=nlist.device, dtype=nlist.dtype) angle_index = torch.zeros([3, 1], device=nlist.device, dtype=nlist.dtype) + self.additional_output_for_fitting["edge_index"] = None # get edge and angle embedding # nb x nloc x nnei x e_dim [OR] n_edge x e_dim if not self.edge_init_use_dist: diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py index 8d451f087f..d78a1ea0b5 100644 --- a/deepmd/pt/model/model/__init__.py +++ b/deepmd/pt/model/model/__init__.py @@ -90,8 +90,10 @@ def _get_standard_model_components(model_params, ntypes): fitting_net["ntypes"] = descriptor.get_ntypes() fitting_net["type_map"] = copy.deepcopy(model_params["type_map"]) fitting_net["mixed_types"] = descriptor.mixed_types() - if fitting_net["type"] in ["dipole", "polar"]: + if fitting_net["type"] in ["dipole", "polar", "ener_readout"]: fitting_net["embedding_width"] = descriptor.get_dim_emb() + if fitting_net["type"] in ["ener_readout"]: + fitting_net["norm_fact"] = descriptor.get_norm_fact() fitting_net["dim_descrpt"] = descriptor.get_dim_out() grad_force = "direct" not in fitting_net["type"] if not grad_force: @@ -262,7 +264,7 @@ def get_standard_model(model_params): modelcls = PolarModel elif fitting_net_type == "dos": modelcls = DOSModel - elif fitting_net_type in ["ener", "direct_force_ener"]: + elif fitting_net_type in ["ener", "direct_force_ener", "ener_readout"]: modelcls = EnergyModel elif fitting_net_type == "property": modelcls = PropertyModel diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 5e993fec1b..3968e21cdc 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -13,9 +13,19 @@ OutputVariableDef, fitting_check_output, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) +from deepmd.pt.model.network.mlp import ( + FittingNet, + NetworkCollection, +) from deepmd.pt.model.network.network import ( ResidualDeep, ) +from deepmd.pt.model.network.utils import ( + aggregate, +) from deepmd.pt.model.task.fitting import ( Fitting, GeneralFitting, @@ -259,3 +269,155 @@ def forward( "energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION), "dforce": vec_out, } + + +@Fitting.register("ener_readout") +@fitting_check_output +class EnergyFittingNetReadout(InvarFitting): + def __init__( + self, + ntypes: int, + dim_descrpt: int, + neuron: list[int] = [128, 128, 128], + bias_atom_e: Optional[torch.Tensor] = None, + resnet_dt: bool = True, + numb_fparam: int = 0, + numb_aparam: int = 0, + dim_case_embd: int = 0, + embedding_width: int = 128, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + mixed_types: bool = True, + seed: Optional[Union[int, list[int]]] = None, + type_map: Optional[list[str]] = None, + norm_fact: list[float] = [120.0], + add_edge_readout: bool = True, + slim_edge_readout: bool = False, + **kwargs, + ) -> None: + """Construct a fitting net for energy. + + Args: + - ntypes: Element count. + - embedding_width: Embedding width per atom. + - neuron: Number of neurons in each hidden layers of the fitting net. + - bias_atom_e: Average energy per atom for each element. + - resnet_dt: Using time-step in the ResNet construction. + """ + self.add_edge_readout = add_edge_readout + super().__init__( + "energy", + ntypes, + dim_descrpt, + 1, + neuron=neuron, + bias_atom_e=bias_atom_e, + resnet_dt=resnet_dt, + numb_fparam=numb_fparam, + numb_aparam=numb_aparam, + dim_case_embd=dim_case_embd, + activation_function=activation_function, + precision=precision, + mixed_types=mixed_types, + seed=seed, + type_map=type_map, + **kwargs, + ) + + # embedding for edge readout + self.embedding_width = embedding_width + self.slim_edge_readout = slim_edge_readout + self.norm_e_fact = norm_fact[0] + + if self.add_edge_readout: + self.edge_embed = NetworkCollection( + 1 if not self.mixed_types else 0, + self.ntypes, + network_type="fitting_network", + networks=[ + FittingNet( + self.embedding_width, + 1, + self.neuron if not self.slim_edge_readout else self.neuron[:1], + self.activation_function, + self.resnet_dt, + self.precision, + bias_out=True, + seed=child_seed(self.seed + 100, ii), + ) + for ii in range(self.ntypes if not self.mixed_types else 1) + ], + ) + else: + self.edge_embed = None + + # set trainable + for param in self.parameters(): + param.requires_grad = self.trainable + + # make jit happy with torch 2.0.0 + exclude_types: list[int] + + def need_additional_input(self) -> bool: + return True + + def serialize(self) -> dict: + raise NotImplementedError + + @classmethod + def deserialize(cls, data: dict) -> "EnergyFittingNetReadout": + raise NotImplementedError + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + sw: Optional[torch.Tensor] = None, + edge_index: Optional[torch.Tensor] = None, + ): + """Based on embedding net output, alculate total energy. + + Args: + - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt]. + - natoms: Tell atom count and element count. Its shape is [2+self.ntypes]. + + Returns + ------- + - `torch.Tensor`: Total energy with shape [nframes, natoms[0]]. + """ + out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[ + self.var_name + ] + nf, nloc, _ = descriptor.shape + + if self.add_edge_readout: + assert g2 is not None + assert sw is not None + assert self.edge_embed is not None + # nf x nloc x nnei x d [OR] nedge x d + edge_feature = g2 + # nf x nloc x nnei x 1 [OR] nedge x 1 + edge_atomic_contrib = self.edge_embed.networks[0](edge_feature) + # nf x nloc x nnei x 1 [OR] nedge x 1 + edge_atomic_contrib = edge_atomic_contrib * sw.unsqueeze(-1) + if edge_index is not None: + # use dynamic sel + n2e_index, n_ext2e_index = edge_index[0], edge_index[1] + # nf x nloc x 1 + edge_energy = aggregate( + edge_atomic_contrib, + n2e_index, + average=False, + num_owner=nf * nloc, + ).reshape(nf, nloc, 1) + else: + # nf x nloc x 1 + edge_energy = torch.sum(edge_atomic_contrib, dim=-2) + # energy + out = out + edge_energy / self.norm_e_fact + return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)} \ No newline at end of file diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index f9ab2265f8..ede0315e87 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -172,6 +172,8 @@ def forward( h2: Optional[torch.Tensor] = None, fparam: Optional[torch.Tensor] = None, aparam: Optional[torch.Tensor] = None, + sw: Optional[torch.Tensor] = None, + edge_index: Optional[torch.Tensor] = None, ): """Based on embedding net output, alculate total energy. diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 589fe620ee..e389252dc9 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -61,6 +61,7 @@ ) from deepmd.pt.utils.learning_rate import ( LearningRateExp, + LearningRateWSD ) from deepmd.pt.utils.stat import ( make_stat_input, @@ -258,12 +259,15 @@ def get_sample(): return get_sample def get_lr(lr_params): - assert lr_params.get("type", "exp") == "exp", ( - "Only learning rate `exp` is supported!" - ) + lr_type = lr_params.get("type", "exp") lr_params["stop_steps"] = self.num_steps - self.warmup_steps - lr_exp = LearningRateExp(**lr_params) - return lr_exp + if lr_type == "exp": + lr_schedule = LearningRateExp(**lr_params) + elif lr_type == "wsd": + lr_schedule = LearningRateWSD(**lr_params) + else: + raise ValueError(f"Not supported learning rate type '{lr_type}'") + return lr_schedule # Optimizer if self.multi_task and training_params.get("optim_dict", None) is not None: diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py index 3502434bc0..ed3fa8e519 100644 --- a/deepmd/pt/utils/learning_rate.py +++ b/deepmd/pt/utils/learning_rate.py @@ -1,8 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from deepmd.dpmodel.utils.learning_rate import ( LearningRateExp, + LearningRateWSD, ) __all__ = [ "LearningRateExp", + "LearningRateWSD", ] diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 6d7285593e..7ef7d02741 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2443,13 +2443,24 @@ def learning_rate_exp(): ] return args +def learning_rate_wsd(): + doc_start_lr = "The learning rate at the start of the training." + doc_stop_lr = "The desired learning rate at the end of the training. " + + args = [ + Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr), + Argument("stop_lr", float, optional=True, default=1e-5, doc=doc_stop_lr), + Argument("decay_mode", str, optional=True, default="85:10:5"), + ] + return args def learning_rate_variant_type_args(): doc_lr = "The type of the learning rate." return Variant( "type", - [Argument("exp", dict, learning_rate_exp())], + [Argument("exp", dict, learning_rate_exp()), + Argument("wsd", dict, learning_rate_wsd())], optional=True, default_tag="exp", doc=doc_lr, @@ -2766,6 +2777,102 @@ def loss_ener_spin(): ] +@fitting_args_plugin.register("ener_readout") +def fitting_ener_readout(): + doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." + doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." + doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' + doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." + doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' + doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\ +- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ +- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1." + doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." + doc_seed = "Random seed for parameter initialization of the fitting net" + doc_atom_ener = "Specify the atomic energy in vacuum for each type" + doc_layer_name = ( + "The name of the each layer. The length of this list should be equal to n_neuron + 1. " + "If two layers, either in the same fitting or different fittings, " + "have the same name, they will share the same neural network parameters. " + "The shape of these layers should be the same. " + "If null is given for a layer, parameters will not be shared." + ) + doc_use_aparam_as_mask = ( + "Whether to use the aparam as a mask in input." + "If True, the aparam will not be used in fitting net for embedding." + "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True." + ) + + return [ + Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), + Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "dim_case_embd", + int, + optional=True, + default=0, + doc=doc_only_pt_supported + doc_dim_case_embd, + ), + Argument( + "neuron", + list[int], + optional=True, + default=[120, 120, 120], + alias=["n_neuron"], + doc=doc_neuron, + ), + Argument( + "activation_function", + str, + optional=True, + default="tanh", + doc=doc_activation_function, + ), + Argument("precision", str, optional=True, default="default", doc=doc_precision), + Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt), + Argument( + "trainable", + [list[bool], bool], + optional=True, + default=True, + doc=doc_trainable, + ), + Argument( + "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond + ), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument( + "atom_ener", + list[Optional[float]], + optional=True, + default=[], + doc=doc_atom_ener, + ), + Argument("layer_name", list[str], optional=True, doc=doc_layer_name), + Argument( + "use_aparam_as_mask", + bool, + optional=True, + default=False, + doc=doc_use_aparam_as_mask, + ), + Argument( + "add_edge_readout", + bool, + optional=True, + default=True, + ), + Argument( + "slim_edge_readout", + bool, + optional=True, + default=False, + ), + ] + + @loss_args_plugin.register("dos") def loss_dos(): doc_start_pref_dos = start_pref("Density of State (DOS)") diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py index 87a44aa70d..57a6708d76 100644 --- a/deepmd/utils/path.py +++ b/deepmd/utils/path.py @@ -329,7 +329,7 @@ def _load_h5py(cls, path: str, mode: str = "r") -> h5py.File: # this method has cache to avoid duplicated # loading from different DPH5Path # However the file will be never closed? - return h5py.File(path, mode) + return h5py.File(path, mode, locking=False) def load_numpy(self) -> np.ndarray: """Load NumPy array. From e14804d5019da294e49cbbda567b35ebde726289 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 15 Aug 2025 18:49:09 +0800 Subject: [PATCH 06/27] add add_chg_spin_ebd --- .../pt/model/atomic_model/dp_atomic_model.py | 6 ++- deepmd/pt/model/descriptor/dpa3.py | 48 ++++++++++++++++++- deepmd/utils/argcheck.py | 14 +++++- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index e2be1eb097..b90aa113ce 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -6,7 +6,6 @@ ) import torch -import numpy as np from deepmd.dpmodel import ( FittingOutputDef, @@ -242,6 +241,7 @@ def forward_atomic( nlist, mapping=mapping, comm_dict=comm_dict, + fparam=fparam, ) assert descriptor is not None if self.enable_eval_descriptor_hook: @@ -315,7 +315,9 @@ def wrapped_sampler(): self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path) self.fitting_net.compute_input_stats( - wrapped_sampler, protection=self.data_stat_protect, stat_file_path=stat_file_path + wrapped_sampler, + protection=self.data_stat_protect, + stat_file_path=stat_file_path, ) self.compute_or_load_out_stat(wrapped_sampler, stat_file_path) diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index df8016b0a1..3e532a61ae 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -31,6 +31,7 @@ UpdateSel, ) from deepmd.pt.utils.utils import ( + ActivationFn, to_numpy_array, ) from deepmd.utils.data_system import ( @@ -119,6 +120,7 @@ def __init__( use_tebd_bias: bool = False, use_loc_mapping: bool = True, type_map: Optional[list[str]] = None, + add_chg_spin_ebd: bool = False, ) -> None: super().__init__() @@ -170,8 +172,10 @@ def init_subclass_params(sub_data, sub_class): precision=precision, seed=child_seed(seed, 1), ) + self.act = ActivationFn(activation_function) self.use_econf_tebd = use_econf_tebd + self.add_chg_spin_ebd = add_chg_spin_ebd self.use_loc_mapping = use_loc_mapping self.use_tebd_bias = use_tebd_bias self.type_map = type_map @@ -188,6 +192,33 @@ def init_subclass_params(sub_data, sub_class): self.concat_output_tebd = concat_output_tebd self.precision = precision self.prec = PRECISION_DICT[self.precision] + + if self.add_chg_spin_ebd: + # -100 ~ 100 is a conservative bound + self.chg_embedding = TypeEmbedNet( + 200, + self.tebd_dim, + precision=precision, + seed=child_seed(seed, 3), + ) + # 100 is a conservative upper bound + self.spin_embedding = TypeEmbedNet( + 100, + self.tebd_dim, + precision=precision, + seed=child_seed(seed, 4), + ) + self.mix_cs_mlp = MLPLayer( + 2 * self.tebd_dim, + self.tebd_dim, + precision=precision, + seed=child_seed(seed, 3), + ) + else: + self.chg_embedding = None + self.spin_embedding = None + self.mix_cs_mlp = None + self.exclude_types = exclude_types self.env_protection = env_protection self.trainable = trainable @@ -245,7 +276,7 @@ def get_dim_out(self) -> int: def get_dim_emb(self) -> int: """Returns the embedding dimension of this descriptor.""" return self.repflows.dim_emb - + def get_norm_fact(self) -> list[float]: """Returns the norm factor.""" return self.repflows.get_norm_fact() @@ -457,6 +488,7 @@ def forward( nlist: torch.Tensor, mapping: Optional[torch.Tensor] = None, comm_dict: Optional[dict[str, torch.Tensor]] = None, + fparam: Optional[torch.Tensor] = None, ): """Compute the descriptor. @@ -500,6 +532,20 @@ def forward( node_ebd_ext = self.type_embedding(extended_atype[:, :nloc]) else: node_ebd_ext = self.type_embedding(extended_atype) + + if self.add_chg_spin_ebd: + assert fparam is not None + assert self.chg_embedding is not None + assert self.spin_embedding is not None + charge = fparam[:, 0].to(dtype=torch.int64) + 100 + spin = fparam[:, 1].to(dtype=torch.int64) + chg_ebd = self.chg_embedding(charge) + spin_ebd = self.spin_embedding(spin) + sys_cs_embd = self.act( + self.mix_cs_mlp(torch.cat((chg_ebd, spin_ebd), dim=-1)) + ) + node_ebd_ext = node_ebd_ext + sys_cs_embd.unsqueeze(1) + node_ebd_inp = node_ebd_ext[:, :nloc, :] # repflows node_ebd, edge_ebd, h2, rot_mat, sw = self.repflows( diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 7ef7d02741..d9e6798167 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1387,6 +1387,12 @@ def descrpt_dpa3_args(): default=False, doc=doc_concat_output_tebd, ), + Argument( + "add_chg_spin_ebd", + bool, + optional=True, + default=False, + ), Argument( "activation_function", str, @@ -2443,6 +2449,7 @@ def learning_rate_exp(): ] return args + def learning_rate_wsd(): doc_start_lr = "The learning rate at the start of the training." doc_stop_lr = "The desired learning rate at the end of the training. " @@ -2454,13 +2461,16 @@ def learning_rate_wsd(): ] return args + def learning_rate_variant_type_args(): doc_lr = "The type of the learning rate." return Variant( "type", - [Argument("exp", dict, learning_rate_exp()), - Argument("wsd", dict, learning_rate_wsd())], + [ + Argument("exp", dict, learning_rate_exp()), + Argument("wsd", dict, learning_rate_wsd()), + ], optional=True, default_tag="exp", doc=doc_lr, From 58d8c10d8b0ae1c8dda40eee65edf7cf880a4029 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 22 Oct 2025 20:00:41 +0800 Subject: [PATCH 07/27] update add_chg_spin_ebd for default fparam --- deepmd/pt/model/atomic_model/dp_atomic_model.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index b90aa113ce..20ecac3195 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -235,13 +235,24 @@ def forward_atomic( atype = extended_atype[:, :nloc] if self.do_grad_r() or self.do_grad_c(): extended_coord.requires_grad_(True) + + if self.fitting_net.get_dim_fparam() > 0 and fparam is None: + # use default fparam + default_fparam_tensor = self.fitting_net.get_default_fparam() + assert default_fparam_tensor is not None + fparam_input_for_des = torch.tile( + default_fparam_tensor.unsqueeze(0), [nframes, 1] + ) + else: + fparam_input_for_des = fparam + descriptor, rot_mat, g2, h2, sw = self.descriptor( extended_coord, extended_atype, nlist, mapping=mapping, comm_dict=comm_dict, - fparam=fparam, + fparam=fparam_input_for_des, ) assert descriptor is not None if self.enable_eval_descriptor_hook: From c98b1fc1b50e0e552c4ceb22ed9a585ebd16bbec Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Sun, 26 Oct 2025 15:55:18 +0800 Subject: [PATCH 08/27] fix multitask --- deepmd/pt/model/descriptor/dpa3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index 3e532a61ae..9010638608 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -322,6 +322,9 @@ def share_params(self, base_class, shared_level, resume=False) -> None: # share all parameters in type_embedding, repflow if shared_level == 0: self._modules["type_embedding"] = base_class._modules["type_embedding"] + for kk in ["chg_embedding", "spin_embedding", "mix_cs_mlp"]: + if kk in self._modules: + self._modules[kk] = base_class._modules[kk] self.repflows.share_params(base_class.repflows, 0, resume=resume) # shared_level: 1 # share all parameters in type_embedding From 82286fd5359448315d54e1e775ebc52c0f69c789 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:50:10 +0800 Subject: [PATCH 09/27] add update_use_layernorm --- deepmd/dpmodel/descriptor/dpa3.py | 2 ++ deepmd/pt/model/descriptor/dpa3.py | 1 + deepmd/pt/model/descriptor/repflow_layer.py | 21 +++++++++++++++++++++ deepmd/pt/model/descriptor/repflows.py | 3 +++ deepmd/utils/argcheck.py | 6 ++++++ 5 files changed, 33 insertions(+) diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py index 79fa6c8b68..1464b40391 100644 --- a/deepmd/dpmodel/descriptor/dpa3.py +++ b/deepmd/dpmodel/descriptor/dpa3.py @@ -177,6 +177,7 @@ def __init__( use_exp_switch: bool = False, use_dynamic_sel: bool = False, sel_reduce_factor: float = 10.0, + update_use_layernorm: bool = False, ) -> None: self.n_dim = n_dim self.e_dim = e_dim @@ -207,6 +208,7 @@ def __init__( self.use_exp_switch = use_exp_switch self.use_dynamic_sel = use_dynamic_sel self.sel_reduce_factor = sel_reduce_factor + self.update_use_layernorm = update_use_layernorm def __getitem__(self, key): if hasattr(self, key): diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index 9010638608..f824e0842b 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -167,6 +167,7 @@ def init_subclass_params(sub_data, sub_class): use_dynamic_sel=self.repflow_args.use_dynamic_sel, sel_reduce_factor=self.repflow_args.sel_reduce_factor, use_loc_mapping=use_loc_mapping, + update_use_layernorm=self.repflow_args.update_use_layernorm, exclude_types=exclude_types, env_protection=env_protection, precision=precision, diff --git a/deepmd/pt/model/descriptor/repflow_layer.py b/deepmd/pt/model/descriptor/repflow_layer.py index 36e738b8b2..ba3158e72b 100644 --- a/deepmd/pt/model/descriptor/repflow_layer.py +++ b/deepmd/pt/model/descriptor/repflow_layer.py @@ -58,6 +58,7 @@ def __init__( use_dynamic_sel: bool = False, sel_reduce_factor: float = 10.0, smooth_edge_update: bool = False, + update_use_layernorm: bool = False, activation_function: str = "silu", update_style: str = "res_residual", update_residual: float = 0.1, @@ -96,6 +97,7 @@ def __init__( self.update_style = update_style self.update_residual = update_residual self.update_residual_init = update_residual_init + self.update_use_layernorm = update_use_layernorm self.a_compress_e_rate = a_compress_e_rate self.a_compress_use_split = a_compress_use_split self.precision = precision @@ -194,6 +196,17 @@ def __init__( ) ) + if self.update_use_layernorm: + self.node_layernorm = torch.nn.LayerNorm(self.n_dim) + self.edge_layernorm = torch.nn.LayerNorm(self.e_dim) + self.angle_layernorm = ( + torch.nn.LayerNorm(self.a_dim) if self.update_angle else None + ) + else: + self.node_layernorm = None + self.edge_layernorm = None + self.angle_layernorm = None + if self.update_angle: self.angle_dim = self.a_dim if self.a_compress_rate == 0: @@ -1117,6 +1130,14 @@ def forward( # update angle_ebd a_updated = self.list_update(a_update_list, "angle") + if self.update_use_layernorm: + assert self.node_layernorm is not None + n_updated = self.node_layernorm(n_updated) + assert self.edge_layernorm is not None + e_updated = self.edge_layernorm(e_updated) + if self.update_angle: + assert self.angle_layernorm is not None + a_updated = self.angle_layernorm(a_updated) return n_updated, e_updated, a_updated @torch.jit.export diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py index 0d453c8aa0..d122f4834d 100644 --- a/deepmd/pt/model/descriptor/repflows.py +++ b/deepmd/pt/model/descriptor/repflows.py @@ -217,6 +217,7 @@ def __init__( use_dynamic_sel: bool = False, sel_reduce_factor: float = 10.0, use_loc_mapping: bool = True, + update_use_layernorm: bool = False, optim_update: bool = True, seed: Optional[Union[int, list[int]]] = None, ) -> None: @@ -283,6 +284,7 @@ def __init__( self.precision = precision self.epsilon = 1e-4 self.seed = seed + self.update_use_layernorm = update_use_layernorm self.edge_embd = MLPLayer( 1, self.e_dim, precision=precision, seed=child_seed(seed, 0) @@ -319,6 +321,7 @@ def __init__( use_dynamic_sel=self.use_dynamic_sel, sel_reduce_factor=self.sel_reduce_factor, smooth_edge_update=self.smooth_edge_update, + update_use_layernorm=self.update_use_layernorm, seed=child_seed(child_seed(seed, 1), ii), ) ) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index d9e6798167..4cd43909d6 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1671,6 +1671,12 @@ def dpa3_repflow_args(): default=10.0, doc=doc_sel_reduce_factor, ), + Argument( + "update_use_layernorm", + bool, + optional=True, + default=False, + ), ] From 7c2287e6c8b44af8448a85373eed294a0c79a5b5 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:09:47 +0800 Subject: [PATCH 10/27] add GatedMLP --- deepmd/dpmodel/descriptor/dpa3.py | 4 + deepmd/pt/model/descriptor/dpa3.py | 3 + deepmd/pt/model/descriptor/repflow_layer.py | 51 ++++++++++--- deepmd/pt/model/descriptor/repflows.py | 11 ++- deepmd/pt/model/network/mlp.py | 83 +++++++++++++++++++++ deepmd/utils/argcheck.py | 12 +++ 6 files changed, 149 insertions(+), 15 deletions(-) diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py index 1464b40391..b66f78c04c 100644 --- a/deepmd/dpmodel/descriptor/dpa3.py +++ b/deepmd/dpmodel/descriptor/dpa3.py @@ -178,6 +178,8 @@ def __init__( use_dynamic_sel: bool = False, sel_reduce_factor: float = 10.0, update_use_layernorm: bool = False, + use_gated_mlp: bool = False, + gated_mlp_norm: str = "none", ) -> None: self.n_dim = n_dim self.e_dim = e_dim @@ -209,6 +211,8 @@ def __init__( self.use_dynamic_sel = use_dynamic_sel self.sel_reduce_factor = sel_reduce_factor self.update_use_layernorm = update_use_layernorm + self.use_gated_mlp = use_gated_mlp + self.gated_mlp_norm = gated_mlp_norm def __getitem__(self, key): if hasattr(self, key): diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index f824e0842b..2924f7fc0f 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -167,7 +167,10 @@ def init_subclass_params(sub_data, sub_class): use_dynamic_sel=self.repflow_args.use_dynamic_sel, sel_reduce_factor=self.repflow_args.sel_reduce_factor, use_loc_mapping=use_loc_mapping, + # followings are new added param update_use_layernorm=self.repflow_args.update_use_layernorm, + use_gated_mlp=self.repflow_args.use_gated_mlp, + gated_mlp_norm=self.repflow_args.gated_mlp_norm, exclude_types=exclude_types, env_protection=env_protection, precision=precision, diff --git a/deepmd/pt/model/descriptor/repflow_layer.py b/deepmd/pt/model/descriptor/repflow_layer.py index ba3158e72b..4d52f26a59 100644 --- a/deepmd/pt/model/descriptor/repflow_layer.py +++ b/deepmd/pt/model/descriptor/repflow_layer.py @@ -17,6 +17,7 @@ get_residual, ) from deepmd.pt.model.network.mlp import ( + GatedMLP, MLPLayer, ) from deepmd.pt.model.network.utils import ( @@ -59,6 +60,8 @@ def __init__( sel_reduce_factor: float = 10.0, smooth_edge_update: bool = False, update_use_layernorm: bool = False, + use_gated_mlp: bool = False, + gated_mlp_norm: str = "none", activation_function: str = "silu", update_style: str = "res_residual", update_residual: float = 0.1, @@ -98,6 +101,10 @@ def __init__( self.update_residual = update_residual self.update_residual_init = update_residual_init self.update_use_layernorm = update_use_layernorm + self.use_gated_mlp = use_gated_mlp + if self.use_gated_mlp: + assert not optim_update, "Gated MLP does not support optim update!" + self.gated_mlp_norm = gated_mlp_norm self.a_compress_e_rate = a_compress_e_rate self.a_compress_use_split = a_compress_use_split self.precision = precision @@ -160,12 +167,22 @@ def __init__( ) # node edge message - self.node_edge_linear = MLPLayer( - self.edge_info_dim, - self.n_multi_edge_message * n_dim, - precision=precision, - seed=child_seed(seed, 4), - ) + if not self.use_gated_mlp: + self.node_edge_linear = MLPLayer( + self.edge_info_dim, + self.n_multi_edge_message * n_dim, + precision=precision, + seed=child_seed(seed, 4), + ) + else: + self.node_edge_linear = GatedMLP( + self.edge_info_dim, + self.n_multi_edge_message * n_dim, + activation_function=self.activation_function, + norm=self.gated_mlp_norm, + precision=precision, + seed=child_seed(seed, 4), + ) if self.update_style == "res_residual": for head_index in range(self.n_multi_edge_message): self.n_residual.append( @@ -245,12 +262,22 @@ def __init__( self.a_compress_e_linear = None # edge angle message - self.edge_angle_linear1 = MLPLayer( - self.angle_dim, - self.e_dim, - precision=precision, - seed=child_seed(seed, 10), - ) + if not self.use_gated_mlp: + self.edge_angle_linear1 = MLPLayer( + self.angle_dim, + self.e_dim, + precision=precision, + seed=child_seed(seed, 10), + ) + else: + self.edge_angle_linear1 = GatedMLP( + self.angle_dim, + self.e_dim, + activation_function=self.activation_function, + norm=self.gated_mlp_norm, + precision=precision, + seed=child_seed(seed, 10), + ) self.edge_angle_linear2 = MLPLayer( self.e_dim, self.e_dim, diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py index d122f4834d..ec375a3acd 100644 --- a/deepmd/pt/model/descriptor/repflows.py +++ b/deepmd/pt/model/descriptor/repflows.py @@ -218,6 +218,8 @@ def __init__( sel_reduce_factor: float = 10.0, use_loc_mapping: bool = True, update_use_layernorm: bool = False, + use_gated_mlp: bool = False, + gated_mlp_norm: str = "none", optim_update: bool = True, seed: Optional[Union[int, list[int]]] = None, ) -> None: @@ -285,6 +287,8 @@ def __init__( self.epsilon = 1e-4 self.seed = seed self.update_use_layernorm = update_use_layernorm + self.use_gated_mlp = use_gated_mlp + self.gated_mlp_norm = gated_mlp_norm self.edge_embd = MLPLayer( 1, self.e_dim, precision=precision, seed=child_seed(seed, 0) @@ -322,6 +326,8 @@ def __init__( sel_reduce_factor=self.sel_reduce_factor, smooth_edge_update=self.smooth_edge_update, update_use_layernorm=self.update_use_layernorm, + use_gated_mlp=self.use_gated_mlp, + gated_mlp_norm=self.gated_mlp_norm, seed=child_seed(child_seed(seed, 1), ii), ) ) @@ -336,7 +342,7 @@ def __init__( self.register_buffer("mean", mean) self.register_buffer("stddev", stddev) self.stats = None - + additional_output_for_fitting: dict[str, Optional[torch.Tensor]] def get_rcut(self) -> float: @@ -370,7 +376,7 @@ def get_dim_in(self) -> int: def get_dim_emb(self) -> int: """Returns the embedding dimension e_dim.""" return self.e_dim - + def get_additional_output_for_fitting(self): return self.additional_output_for_fitting @@ -381,7 +387,6 @@ def get_norm_fact(self) -> list[float]: # float(self.dynamic_a_sel if self.use_dynamic_sel else self.a_sel), ] - def __setitem__(self, key, value) -> None: if key in ("avg", "data_avg", "davg"): self.mean = value diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index 22675d6163..463158d7e5 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -275,6 +275,89 @@ def check_load_param(ss): return obj +class GatedMLP(nn.Module): + """Gated MLP + similar model structure is used in CGCNN and M3GNet. + """ + + def __init__( + self, + input_dim: int, + output_dim: int, + *, + activation_function: Optional[str] = None, + norm: str = "batch", + bias: bool = True, + precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + """Initialize a gated MLP. + + Args: + input_dim (int): the input dimension + output_dim (int): the output dimension + activation_function (str, optional): The name of the activation function to use in + the gated MLP. Must be one of "relu", "silu", "tanh", or "gelu". + Default = "silu" + norm (str, optional): The name of the normalization layer to use on the + updated atom features. Must be one of "batch", "layer", or None. + Default = "batch" + bias (bool): whether to use bias in each Linear layers. + Default = True + """ + super().__init__() + self.mlp_core = MLPLayer( + input_dim, + output_dim, + bias=bias, + precision=precision, + seed=seed, + ) + self.mlp_gate = MLPLayer( + input_dim, + output_dim, + bias=bias, + precision=precision, + seed=seed, + ) + # for jit + self.matrix = self.mlp_core.matrix + self.bias = self.mlp_core.bias + self.act = ActivationFn(activation_function) + self.sigmoid = nn.Sigmoid() + self.norm1 = find_normalization(name=norm, dim=output_dim) + self.norm2 = find_normalization(name=norm, dim=output_dim) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Performs a forward pass through the MLP. + + Args: + x (Tensor): a tensor of shape (batch_size, input_dim) + + Returns + ------- + Tensor: a tensor of shape (batch_size, output_dim) + """ + if self.norm1 is None: + core = self.act(self.mlp_core(x)) + gate = self.sigmoid(self.mlp_gate(x)) + else: + core = self.act(self.norm1(self.mlp_core(x))) + gate = self.sigmoid(self.norm2(self.mlp_gate(x))) + return core * gate + + +def find_normalization(name: str, dim: int | None = None) -> nn.Module | None: + """Return an normalization function using name.""" + if name is None: + return None + return { + "batch": nn.BatchNorm1d(dim), + "layer": nn.LayerNorm(dim), + "none": None, + }.get(name.lower(), None) + + MLP_ = make_multilayer_network(MLPLayer, nn.Module) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 4cd43909d6..2da6dd349a 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1677,6 +1677,18 @@ def dpa3_repflow_args(): optional=True, default=False, ), + Argument( + "use_gated_mlp", + bool, + optional=True, + default=False, + ), + Argument( + "gated_mlp_norm", + str, + optional=True, + default="none", + ), ] From 1404623a4b3395e940483eedd78a6fb0bf147688 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 5 Nov 2025 18:01:58 +0800 Subject: [PATCH 11/27] add add_case_embd --- .../pt/model/atomic_model/dp_atomic_model.py | 6 ++++ deepmd/pt/model/descriptor/dpa3.py | 36 +++++++++++++++++-- deepmd/pt/model/model/__init__.py | 4 +++ deepmd/pt/model/task/fitting.py | 8 +++++ deepmd/utils/argcheck.py | 12 +++++++ 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index 20ecac3195..dda10b82a8 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -246,6 +246,11 @@ def forward_atomic( else: fparam_input_for_des = fparam + if self.fitting_net.get_dim_case_embd() > 0: + case_embd_input_for_des = self.fitting_net.get_case_embd() + else: + case_embd_input_for_des = None + descriptor, rot_mat, g2, h2, sw = self.descriptor( extended_coord, extended_atype, @@ -253,6 +258,7 @@ def forward_atomic( mapping=mapping, comm_dict=comm_dict, fparam=fparam_input_for_des, + case_embd=case_embd_input_for_des, ) assert descriptor is not None if self.enable_eval_descriptor_hook: diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index 2924f7fc0f..5134177392 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -121,6 +121,8 @@ def __init__( use_loc_mapping: bool = True, type_map: Optional[list[str]] = None, add_chg_spin_ebd: bool = False, + add_case_embd: bool = False, + dim_case_embd: int = 0, ) -> None: super().__init__() @@ -180,6 +182,8 @@ def init_subclass_params(sub_data, sub_class): self.use_econf_tebd = use_econf_tebd self.add_chg_spin_ebd = add_chg_spin_ebd + self.add_case_embd = add_case_embd + self.dim_case_embd = dim_case_embd self.use_loc_mapping = use_loc_mapping self.use_tebd_bias = use_tebd_bias self.type_map = type_map @@ -216,13 +220,25 @@ def init_subclass_params(sub_data, sub_class): 2 * self.tebd_dim, self.tebd_dim, precision=precision, - seed=child_seed(seed, 3), + seed=child_seed(seed, 5), ) else: self.chg_embedding = None self.spin_embedding = None self.mix_cs_mlp = None + if self.add_case_embd: + assert self.dim_case_embd > 0 + self.case_embd_mlp = MLPLayer( + self.dim_case_embd, + self.tebd_dim, + precision=precision, + bias=False, + seed=child_seed(seed, 6), + ) + else: + self.case_embd_mlp = None + self.exclude_types = exclude_types self.env_protection = env_protection self.trainable = trainable @@ -326,7 +342,12 @@ def share_params(self, base_class, shared_level, resume=False) -> None: # share all parameters in type_embedding, repflow if shared_level == 0: self._modules["type_embedding"] = base_class._modules["type_embedding"] - for kk in ["chg_embedding", "spin_embedding", "mix_cs_mlp"]: + for kk in [ + "chg_embedding", + "spin_embedding", + "mix_cs_mlp", + "case_embd_mlp", + ]: if kk in self._modules: self._modules[kk] = base_class._modules[kk] self.repflows.share_params(base_class.repflows, 0, resume=resume) @@ -496,6 +517,7 @@ def forward( mapping: Optional[torch.Tensor] = None, comm_dict: Optional[dict[str, torch.Tensor]] = None, fparam: Optional[torch.Tensor] = None, + case_embd: Optional[torch.Tensor] = None, ): """Compute the descriptor. @@ -511,6 +533,10 @@ def forward( The index mapping, mapps extended region index to local region. comm_dict The data needed for communication for parallel inference. + fparam + The frame-level parameters. shape: nf x nfparam + case_embd + The case (dataset) embedding for multitask training with shared fitting. shape: nf x dim_case_embd Returns ------- @@ -553,6 +579,12 @@ def forward( ) node_ebd_ext = node_ebd_ext + sys_cs_embd.unsqueeze(1) + if self.add_case_embd: + assert case_embd is not None + assert self.case_embd_mlp is not None + case_embd_out = self.case_embd_mlp(case_embd) + node_ebd_ext = node_ebd_ext + case_embd_out.unsqueeze(0).unsqueeze(0) + node_ebd_inp = node_ebd_ext[:, :nloc, :] # repflows node_ebd, edge_ebd, h2, rot_mat, sw = self.repflows( diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py index d78a1ea0b5..d8980ba4c7 100644 --- a/deepmd/pt/model/model/__init__.py +++ b/deepmd/pt/model/model/__init__.py @@ -83,6 +83,10 @@ def _get_standard_model_components(model_params, ntypes): # descriptor model_params["descriptor"]["ntypes"] = ntypes model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"]) + # explicitly add dim_case_embd if using case embedding + if model_params["descriptor"].get("type", "se_e2_a") in ["dpa3"]: + dim_case_embd = model_params.get("fitting_net", {}).get("dim_case_embd", 0) + model_params["descriptor"]["dim_case_embd"] = dim_case_embd descriptor = BaseDescriptor(**model_params["descriptor"]) # fitting fitting_net = model_params.get("fitting_net", {}) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 35c26c376c..4da177c5a1 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -599,6 +599,14 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.numb_aparam + def get_dim_case_embd(self) -> int: + """Get the number (dimension) of dataset embedding.""" + return self.dim_case_embd + + def get_case_embd(self) -> Optional[torch.Tensor]: + """Get the dataset embedding.""" + return self.case_embd + # make jit happy exclude_types: list[int] diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 2da6dd349a..a94200fbec 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1376,6 +1376,10 @@ def descrpt_dpa3_args(): "Whether to use local atom index mapping in training or non-parallel inference. " "When True, local indexing and mapping are applied to neighbor lists and embeddings during descriptor computation." ) + doc_add_chg_spin_ebd = ( + "Whether to use charge and spin embedding in the type embedding." + ) + doc_add_case_embd = "Whether to use case (dataset) embedding in the type embedding." return [ # doc_repflow args Argument("repflow", dict, dpa3_repflow_args(), doc=doc_repflow), @@ -1392,6 +1396,14 @@ def descrpt_dpa3_args(): bool, optional=True, default=False, + doc=doc_add_chg_spin_ebd, + ), + Argument( + "add_case_embd", + bool, + optional=True, + default=False, + doc=doc_add_case_embd, ), Argument( "activation_function", From 219266cf6d2ba81dfd1b60551cb4b53b68f9d6f5 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:15:16 +0800 Subject: [PATCH 12/27] Update training.py --- deepmd/pt/train/training.py | 67 +++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index e389252dc9..126465f42f 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -61,7 +61,7 @@ ) from deepmd.pt.utils.learning_rate import ( LearningRateExp, - LearningRateWSD + LearningRateWSD, ) from deepmd.pt.utils.stat import ( make_stat_input, @@ -345,14 +345,14 @@ def get_lr(lr_params): self.validation_data, self.valid_numb_batch, ) = get_data_loader(training_data, validation_data, training_params) - training_data.print_summary( - "training", to_numpy_array(self.training_dataloader.sampler.weights) - ) - if validation_data is not None: - validation_data.print_summary( - "validation", - to_numpy_array(self.validation_dataloader.sampler.weights), - ) + # training_data.print_summary( + # "training", to_numpy_array(self.training_dataloader.sampler.weights) + # ) + # if validation_data is not None: + # validation_data.print_summary( + # "validation", + # to_numpy_array(self.validation_dataloader.sampler.weights), + # ) else: ( self.training_dataloader, @@ -388,20 +388,20 @@ def get_lr(lr_params): training_params["data_dict"][model_key], ) - training_data[model_key].print_summary( - f"training in {model_key}", - to_numpy_array(self.training_dataloader[model_key].sampler.weights), - ) - if ( - validation_data is not None - and validation_data[model_key] is not None - ): - validation_data[model_key].print_summary( - f"validation in {model_key}", - to_numpy_array( - self.validation_dataloader[model_key].sampler.weights - ), - ) + # training_data[model_key].print_summary( + # f"training in {model_key}", + # to_numpy_array(self.training_dataloader[model_key].sampler.weights), + # ) + # if ( + # validation_data is not None + # and validation_data[model_key] is not None + # ): + # validation_data[model_key].print_summary( + # f"validation in {model_key}", + # to_numpy_array( + # self.validation_dataloader[model_key].sampler.weights + # ), + # ) # Learning rate self.warmup_steps = training_params.get("warmup_steps", 0) @@ -604,13 +604,20 @@ def single_model_finetune( # Multi-task share params if shared_links is not None: - _data_stat_protect = np.array([model_params["model_dict"][ii].get("data_stat_protect", 1e-2) for ii in model_params["model_dict"]]) - assert np.allclose(_data_stat_protect, _data_stat_protect[0]), f"Model key 'data_stat_protect' must be the same in each branch when multitask!" + _data_stat_protect = np.array( + [ + model_params["model_dict"][ii].get("data_stat_protect", 1e-2) + for ii in model_params["model_dict"] + ] + ) + assert np.allclose(_data_stat_protect, _data_stat_protect[0]), ( + "Model key 'data_stat_protect' must be the same in each branch when multitask!" + ) self.wrapper.share_params( shared_links, resume=(resuming and not self.finetune_update_stat) or self.rank != 0, - model_key_prob_map = dict(zip(self.model_keys, self.model_prob)), - data_stat_protect = _data_stat_protect[0] + model_key_prob_map=dict(zip(self.model_keys, self.model_prob)), + data_stat_protect=_data_stat_protect[0], ) if dist.is_available() and dist.is_initialized(): @@ -1212,7 +1219,11 @@ def print_on_training( def get_additional_data_requirement(_model): additional_data_requirement = [] if _model.get_dim_fparam() > 0: - _fparam_default = _model.get_default_fparam().cpu().numpy() if _model.has_default_fparam() else 0.0 + _fparam_default = ( + _model.get_default_fparam().cpu().numpy() + if _model.has_default_fparam() + else 0.0 + ) fparam_requirement_items = [ DataRequirementItem( "fparam", From e2777c03c40e9b487f0f20fab29318af52c50be3 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Fri, 29 Aug 2025 15:17:34 +0800 Subject: [PATCH 13/27] feat: handle masked forces in test (#4893) ## Summary by CodeRabbit - New Features - Added per-atom weighting for force evaluation: computes and reports weighted MAE/RMSE alongside unweighted metrics, includes weighted metrics in system-average summaries, logs weighted force metrics, and safely handles zero-weight cases. Also propagates the per-atom weight field into reporting. - Tests - Added end-to-end tests validating weighted vs unweighted force MAE/RMSE and verifying evaluator outputs when using per-atom weight masks. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- deepmd/entrypoints/test.py | 24 +++++++- source/tests/pt/test_dp_test.py | 99 +++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index 5b22d16be4..69b1704471 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -291,6 +291,7 @@ def test_ener( data.add("energy", 1, atomic=False, must=False, high_prec=True) data.add("force", 3, atomic=True, must=False, high_prec=False) + data.add("atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3) data.add("virial", 9, atomic=False, must=False, high_prec=False) if dp.has_efield: data.add("efield", 3, atomic=True, must=True, high_prec=False) @@ -317,6 +318,7 @@ def test_ener( find_force = test_data.get("find_force") find_virial = test_data.get("find_virial") find_force_mag = test_data.get("find_force_mag") + find_atom_pref = test_data.get("find_atom_pref") mixed_type = data.mixed_type natoms = len(test_data["type"][0]) nframes = test_data["box"].shape[0] @@ -423,6 +425,16 @@ def test_ener( diff_f = force - test_data["force"][:numb_test] mae_f = mae(diff_f) rmse_f = rmse(diff_f) + size_f = diff_f.size + if find_atom_pref == 1: + atom_weight = test_data["atom_pref"][:numb_test] + weight_sum = np.sum(atom_weight) + if weight_sum > 0: + mae_fw = np.sum(np.abs(diff_f) * atom_weight) / weight_sum + rmse_fw = np.sqrt(np.sum(diff_f * diff_f * atom_weight) / weight_sum) + else: + mae_fw = 0.0 + rmse_fw = 0.0 diff_v = virial - test_data["virial"][:numb_test] mae_v = mae(diff_v) rmse_v = rmse(diff_v) @@ -457,8 +469,13 @@ def test_ener( if not out_put_spin and find_force == 1: log.info(f"Force MAE : {mae_f:e} eV/A") log.info(f"Force RMSE : {rmse_f:e} eV/A") - dict_to_return["mae_f"] = (mae_f, force.size) - dict_to_return["rmse_f"] = (rmse_f, force.size) + dict_to_return["mae_f"] = (mae_f, size_f) + dict_to_return["rmse_f"] = (rmse_f, size_f) + if find_atom_pref == 1: + log.info(f"Force weighted MAE : {mae_fw:e} eV/A") + log.info(f"Force weighted RMSE: {rmse_fw:e} eV/A") + dict_to_return["mae_fw"] = (mae_fw, weight_sum) + dict_to_return["rmse_fw"] = (rmse_fw, weight_sum) if out_put_spin and find_force == 1: log.info(f"Force atom MAE : {mae_fr:e} eV/A") log.info(f"Force atom RMSE : {rmse_fr:e} eV/A") @@ -604,6 +621,9 @@ def print_ener_sys_avg(avg: dict[str, float]) -> None: if "rmse_f" in avg: log.info(f"Force MAE : {avg['mae_f']:e} eV/A") log.info(f"Force RMSE : {avg['rmse_f']:e} eV/A") + if "rmse_fw" in avg: + log.info(f"Force weighted MAE : {avg['mae_fw']:e} eV/A") + log.info(f"Force weighted RMSE: {avg['rmse_fw']:e} eV/A") else: log.info(f"Force atom MAE : {avg['mae_fr']:e} eV/A") log.info(f"Force spin MAE : {avg['mae_fm']:e} eV/uB") diff --git a/source/tests/pt/test_dp_test.py b/source/tests/pt/test_dp_test.py index c2915c7ee7..085bff88de 100644 --- a/source/tests/pt/test_dp_test.py +++ b/source/tests/pt/test_dp_test.py @@ -15,12 +15,19 @@ import torch from deepmd.entrypoints.test import test as dp_test +from deepmd.entrypoints.test import test_ener as dp_test_ener +from deepmd.infer.deep_eval import ( + DeepEval, +) from deepmd.pt.entrypoints.main import ( get_trainer, ) from deepmd.pt.utils.utils import ( to_numpy_array, ) +from deepmd.utils.data import ( + DeepmdData, +) from .model.test_permutation import ( model_property, @@ -140,6 +147,98 @@ def setUp(self) -> None: json.dump(self.config, fp, indent=4) +class TestDPTestForceWeight(DPTest, unittest.TestCase): + def setUp(self) -> None: + self.detail_file = "test_dp_test_force_weight_detail" + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + system_dir = self._prepare_weighted_system() + data_file = [system_dir] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_se_e2_a) + self.system_dir = system_dir + self.input_json = "test_dp_test_force_weight.json" + with open(self.input_json, "w") as fp: + json.dump(self.config, fp, indent=4) + + def _prepare_weighted_system(self) -> str: + src = Path(__file__).parent / "water/data/single" + tmp_dir = tempfile.mkdtemp() + shutil.copytree(src, tmp_dir, dirs_exist_ok=True) + set_dir = Path(tmp_dir) / "set.000" + forces = np.load(set_dir / "force.npy") + forces[0, :3] += 1.0 + forces[0, -3:] += 10.0 + np.save(set_dir / "force.npy", forces) + natoms = forces.shape[1] // 3 + atom_pref = np.ones((forces.shape[0], natoms), dtype=forces.dtype) + atom_pref[:, 0] = 2.0 + atom_pref[:, -1] = 0.0 + np.save(set_dir / "atom_pref.npy", atom_pref) + return tmp_dir + + def test_force_weight(self) -> None: + trainer = get_trainer(deepcopy(self.config)) + with torch.device("cpu"): + trainer.get_data(is_train=False) + model = torch.jit.script(trainer.model) + tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth") + torch.jit.save(model, tmp_model.name) + dp = DeepEval(tmp_model.name) + data = DeepmdData( + self.system_dir, + set_prefix="set", + shuffle_test=False, + type_map=dp.get_type_map(), + sort_atoms=False, + ) + err = dp_test_ener( + dp, + data, + self.system_dir, + numb_test=1, + detail_file=None, + has_atom_ener=False, + ) + test_data = data.get_test() + coord = test_data["coord"].reshape([1, -1]) + box = test_data["box"][:1] + atype = test_data["type"][0] + ret = dp.eval( + coord, + box, + atype, + fparam=None, + aparam=None, + atomic=False, + efield=None, + mixed_type=False, + spin=None, + ) + force_pred = ret[1].reshape([1, -1]) + force_true = test_data["force"][:1] + weight = test_data["atom_pref"][:1] + diff = force_pred - force_true + mae_unweighted = np.sum(np.abs(diff)) / diff.size + rmse_unweighted = np.sqrt(np.sum(diff * diff) / diff.size) + denom = weight.sum() + mae_weighted = np.sum(np.abs(diff) * weight) / denom + rmse_weighted = np.sqrt(np.sum(diff * diff * weight) / denom) + np.testing.assert_allclose(err["mae_f"][0], mae_unweighted) + np.testing.assert_allclose(err["rmse_f"][0], rmse_unweighted) + np.testing.assert_allclose(err["mae_fw"][0], mae_weighted) + np.testing.assert_allclose(err["rmse_fw"][0], rmse_weighted) + os.unlink(tmp_model.name) + + def tearDown(self) -> None: + super().tearDown() + shutil.rmtree(self.system_dir) + + class TestDPTestPropertySeA(unittest.TestCase): def setUp(self) -> None: self.detail_file = "test_dp_test_property_detail" From 7f547b880c460aa081d2d1843355eab977e12987 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:54:31 +0800 Subject: [PATCH 14/27] add use_default_pf --- deepmd/pt/loss/ener.py | 9 +++++++-- deepmd/utils/argcheck.py | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index 10e2bf9971..75efd0277f 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -54,6 +54,7 @@ def __init__( use_l1_all: bool = False, inference=False, use_huber=False, + use_default_pf=False, huber_delta=0.01, **kwargs, ) -> None: @@ -131,6 +132,7 @@ def __init__( self.limit_pref_pf = limit_pref_pf self.start_pref_gf = start_pref_gf self.limit_pref_gf = limit_pref_gf + self.use_default_pf = use_default_pf self.relative_f = relative_f self.enable_atom_ener_coeff = enable_atom_ener_coeff self.numb_generalized_coord = numb_generalized_coord @@ -301,7 +303,9 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): if self.has_pf and "atom_pref" in label: atom_pref = label["atom_pref"] - find_atom_pref = label.get("find_atom_pref", 0.0) + find_atom_pref = ( + label.get("find_atom_pref", 0.0) if not self.use_default_pf else 1.0 + ) pref_pf = pref_pf * find_atom_pref atom_pref_reshape = atom_pref.reshape(-1) l2_pref_force_loss = (torch.square(diff_f) * atom_pref_reshape).mean() @@ -410,7 +414,7 @@ def label_requirement(self) -> list[DataRequirementItem]: high_prec=True, ) ) - if self.has_f: + if self.has_f or self.has_pf or self.relative_f is not None or self.has_gf: label_requirement.append( DataRequirementItem( "force", @@ -449,6 +453,7 @@ def label_requirement(self) -> list[DataRequirementItem]: must=False, high_prec=False, repeat=3, + default=1.0, ) ) if self.has_gf > 0: diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index a94200fbec..7f448fa1ee 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2659,6 +2659,12 @@ def loss_ener(): default=0.00, doc=doc_limit_pref_pf, ), + Argument( + "use_default_pf", + bool, + optional=True, + default=False, + ), Argument("relative_f", [float, None], optional=True, doc=doc_relative_f), Argument( "enable_atom_ener_coeff", From 773fb32259514bd0cc610b7b22cea761131b0528 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 30 May 2025 21:54:47 +0800 Subject: [PATCH 15/27] add init from direct model --- deepmd/pt/train/training.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 126465f42f..12a891cf32 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -541,8 +541,30 @@ def collect_single_finetune_params( state_dict["_extra_state"] = self.wrapper.state_dict()[ "_extra_state" ] - - self.wrapper.load_state_dict(state_dict) + try: + self.wrapper.load_state_dict(state_dict) + except RuntimeError as e: + # init from direct fitting + rm_list = [] + for kk in state_dict: + # delete direct heads + if ( + "fitting_net.force_embed." in kk + or "fitting_net.noise_embed" in kk + ): + rm_list.append(kk) + for kk in rm_list: + state_dict.pop(kk) + state_dict["_extra_state"] = self.wrapper.state_dict()[ + "_extra_state" + ] + out_shape_list = [ + "model.Default.atomic_model.out_bias", + "model.Default.atomic_model.out_std", + ] + for kk in out_shape_list: + state_dict[kk] = state_dict[kk][:1, :, :1] + self.wrapper.load_state_dict(state_dict) # change bias for fine-tuning if finetune_model is not None: From a18fd7299026425b8fd5b3018c302fbf792b6166 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Sat, 31 May 2025 13:30:03 +0800 Subject: [PATCH 16/27] Update training.py --- deepmd/pt/train/training.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 12a891cf32..7e0761915f 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -541,6 +541,9 @@ def collect_single_finetune_params( state_dict["_extra_state"] = self.wrapper.state_dict()[ "_extra_state" ] + old_model_params = self.wrapper.state_dict()["_extra_state"][ + "model_params" + ] try: self.wrapper.load_state_dict(state_dict) except RuntimeError as e: @@ -555,9 +558,7 @@ def collect_single_finetune_params( rm_list.append(kk) for kk in rm_list: state_dict.pop(kk) - state_dict["_extra_state"] = self.wrapper.state_dict()[ - "_extra_state" - ] + state_dict["_extra_state"]["model_params"] = old_model_params out_shape_list = [ "model.Default.atomic_model.out_bias", "model.Default.atomic_model.out_std", From 32b94312631b2dceff189bd0d3f2ed13f1062a77 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Thu, 4 Sep 2025 19:53:30 +0800 Subject: [PATCH 17/27] add full default fparam --- .../dpmodel/atomic_model/base_atomic_model.py | 4 + .../dpmodel/atomic_model/dp_atomic_model.py | 4 + deepmd/dpmodel/fitting/dipole_fitting.py | 7 +- deepmd/dpmodel/fitting/dos_fitting.py | 4 +- deepmd/dpmodel/fitting/ener_fitting.py | 4 +- deepmd/dpmodel/fitting/general_fitting.py | 33 +++++- deepmd/dpmodel/fitting/invar_fitting.py | 7 +- .../dpmodel/fitting/polarizability_fitting.py | 9 +- deepmd/dpmodel/fitting/property_fitting.py | 9 +- deepmd/dpmodel/infer/deep_eval.py | 4 + deepmd/dpmodel/model/make_model.py | 4 + deepmd/infer/deep_eval.py | 2 + deepmd/jax/fitting/fitting.py | 1 + deepmd/pd/model/task/ener.py | 2 +- deepmd/pd/model/task/fitting.py | 9 +- deepmd/pd/model/task/invar_fitting.py | 2 +- deepmd/pt/infer/deep_eval.py | 7 +- .../model/atomic_model/base_atomic_model.py | 4 + .../pt/model/atomic_model/dp_atomic_model.py | 1 + deepmd/pt/model/model/make_model.py | 2 +- deepmd/pt/model/task/dipole.py | 10 +- deepmd/pt/model/task/dos.py | 4 +- deepmd/pt/model/task/ener.py | 9 +- deepmd/pt/model/task/fitting.py | 101 ++++++++++++------ deepmd/pt/model/task/invar_fitting.py | 10 +- deepmd/pt/model/task/polarizability.py | 13 ++- deepmd/pt/model/task/property.py | 9 +- 27 files changed, 211 insertions(+), 64 deletions(-) diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py index eb95886598..1158d278e5 100644 --- a/deepmd/dpmodel/atomic_model/base_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py @@ -88,6 +88,10 @@ def get_type_map(self) -> list[str]: """Get the type map.""" return self.type_map + def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" + return False + def reinit_atom_exclude( self, exclude_types: list[int] = [], diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py index 2fa072cc78..8bae07dcad 100644 --- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py +++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py @@ -233,6 +233,10 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.fitting.get_dim_aparam() + def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" + return self.fitting.has_default_fparam() + def get_sel_type(self) -> list[int]: """Get the selected atom types of this model. diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index fcaea43338..f49c148377 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -84,6 +84,9 @@ class DipoleFitting(GeneralFitting): Only reducible variable are differentiable. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -110,6 +113,7 @@ def __init__( c_differentiable: bool = True, type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list[float]] = None, ) -> None: if tot_ener_zero: raise NotImplementedError("tot_ener_zero is not implemented") @@ -144,6 +148,7 @@ def __init__( exclude_types=exclude_types, type_map=type_map, seed=seed, + default_fparam=default_fparam, ) def _net_out_dim(self): @@ -161,7 +166,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) var_name = data.pop("var_name", None) assert var_name == "dipole" return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py index 2f6df77eac..4bc34b8abf 100644 --- a/deepmd/dpmodel/fitting/dos_fitting.py +++ b/deepmd/dpmodel/fitting/dos_fitting.py @@ -46,6 +46,7 @@ def __init__( exclude_types: list[int] = [], type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list] = None, ) -> None: if bias_dos is not None: self.bias_dos = bias_dos @@ -70,12 +71,13 @@ def __init__( exclude_types=exclude_types, type_map=type_map, seed=seed, + default_fparam=default_fparam, ) @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data["numb_dos"] = data.pop("dim_out") data.pop("tot_ener_zero", None) data.pop("var_name", None) diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py index 6435b6468f..794c074485 100644 --- a/deepmd/dpmodel/fitting/ener_fitting.py +++ b/deepmd/dpmodel/fitting/ener_fitting.py @@ -46,6 +46,7 @@ def __init__( exclude_types: list[int] = [], type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list] = None, ) -> None: super().__init__( var_name="energy", @@ -70,12 +71,13 @@ def __init__( exclude_types=exclude_types, type_map=type_map, seed=seed, + default_fparam=default_fparam, ) @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index c6c51725bb..66bcd7007d 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -94,6 +94,9 @@ class GeneralFitting(NativeOP, BaseFitting): A list of strings. Give the name to each type of atoms. seed: Optional[Union[int, list[int]]] Random seed for initializing the network parameters. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -120,6 +123,7 @@ def __init__( remove_vaccum_contribution: Optional[list[bool]] = None, type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list[float]] = None, ) -> None: self.var_name = var_name self.ntypes = ntypes @@ -129,6 +133,7 @@ def __init__( self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam self.dim_case_embd = dim_case_embd + self.default_fparam = default_fparam self.rcond = rcond self.tot_ener_zero = tot_ener_zero self.trainable = trainable @@ -177,6 +182,15 @@ def __init__( self.case_embd = np.zeros(self.dim_case_embd, dtype=self.prec) else: self.case_embd = None + + if self.default_fparam is not None: + if self.numb_fparam > 0: + assert len(self.default_fparam) == self.numb_fparam, ( + "default_fparam length mismatch!" + ) + self.default_fparam_tensor = np.array(self.default_fparam, dtype=self.prec) + else: + self.default_fparam_tensor = None # init networks in_dim = ( self.dim_descrpt @@ -216,6 +230,10 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.numb_aparam + def has_default_fparam(self) -> bool: + """Check if the fitting has default frame parameters.""" + return self.default_fparam is not None + def get_sel_type(self) -> list[int]: """Get the selected atom types of this model. @@ -273,6 +291,8 @@ def __setitem__(self, key, value) -> None: self.case_embd = value elif key in ["scale"]: self.scale = value + elif key in ["default_fparam_tensor"]: + self.default_fparam_tensor = value else: raise KeyError(key) @@ -291,6 +311,8 @@ def __getitem__(self, key): return self.case_embd elif key in ["scale"]: return self.scale + elif key in ["default_fparam_tensor"]: + return self.default_fparam_tensor else: raise KeyError(key) @@ -305,7 +327,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 3, + "@version": 4, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -314,6 +336,7 @@ def serialize(self) -> dict: "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, "dim_case_embd": self.dim_case_embd, + "default_fparam": self.default_fparam, "rcond": self.rcond, "activation_function": self.activation_function, "precision": self.precision, @@ -402,6 +425,14 @@ def _call_common( xx_zeros = xp.zeros_like(xx) else: xx_zeros = None + + if self.numb_fparam > 0 and fparam is None: + # use default fparam + assert self.default_fparam_tensor is not None + fparam = xp.tile( + xp.reshape(self.default_fparam_tensor, (1, self.numb_fparam)), (nf, 1) + ) + # check fparam dim, concate to input descriptor if self.numb_fparam > 0: assert fparam is not None, "fparam should not be None" diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py index b5d3a02d86..9e97eac22a 100644 --- a/deepmd/dpmodel/fitting/invar_fitting.py +++ b/deepmd/dpmodel/fitting/invar_fitting.py @@ -110,6 +110,9 @@ class InvarFitting(GeneralFitting): Atomic contributions of the excluded atom types are set zero. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ @@ -138,6 +141,7 @@ def __init__( exclude_types: list[int] = [], type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list[float]] = None, ) -> None: if tot_ener_zero: raise NotImplementedError("tot_ener_zero is not implemented") @@ -173,6 +177,7 @@ def __init__( else [x is not None for x in atom_ener], type_map=type_map, seed=seed, + default_fparam=default_fparam, ) def serialize(self) -> dict: @@ -185,7 +190,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) return super().deserialize(data) def _net_out_dim(self): diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index bfc337a177..cc20e4c932 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -90,6 +90,9 @@ class PolarFitting(GeneralFitting): Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -117,6 +120,7 @@ def __init__( shift_diag: bool = True, type_map: Optional[list[str]] = None, seed: Optional[Union[int, list[int]]] = None, + default_fparam: Optional[list[float]] = None, ) -> None: if tot_ener_zero: raise NotImplementedError("tot_ener_zero is not implemented") @@ -164,6 +168,7 @@ def __init__( exclude_types=exclude_types, type_map=type_map, seed=seed, + default_fparam=default_fparam, ) def _net_out_dim(self): @@ -189,7 +194,7 @@ def __getitem__(self, key): def serialize(self) -> dict: data = super().serialize() data["type"] = "polar" - data["@version"] = 4 + data["@version"] = 5 data["embedding_width"] = self.embedding_width data["fit_diag"] = self.fit_diag data["shift_diag"] = self.shift_diag @@ -200,7 +205,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 4, 1) + check_version_compatibility(data.pop("@version", 1), 5, 1) var_name = data.pop("var_name", None) assert var_name == "polar" return super().deserialize(data) diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py index 6d0aa3546f..944b1f7958 100644 --- a/deepmd/dpmodel/fitting/property_fitting.py +++ b/deepmd/dpmodel/fitting/property_fitting.py @@ -61,6 +61,9 @@ class PropertyFittingNet(InvarFitting): Atomic contributions of the excluded atom types are set zero. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -83,6 +86,7 @@ def __init__( mixed_types: bool = True, exclude_types: list[int] = [], type_map: Optional[list[str]] = None, + default_fparam: Optional[list] = None, # not used seed: Optional[int] = None, ) -> None: @@ -106,12 +110,13 @@ def __init__( mixed_types=mixed_types, exclude_types=exclude_types, type_map=type_map, + default_fparam=default_fparam, ) @classmethod def deserialize(cls, data: dict) -> "PropertyFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version"), 4, 1) + check_version_compatibility(data.pop("@version"), 5, 1) data.pop("dim_out") data["property_name"] = data.pop("var_name") data.pop("tot_ener_zero") @@ -131,6 +136,6 @@ def serialize(self) -> dict: "task_dim": self.task_dim, "intensive": self.intensive, } - dd["@version"] = 4 + dd["@version"] = 5 return dd diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py index 91fa0ac2ac..1aea58ef33 100644 --- a/deepmd/dpmodel/infer/deep_eval.py +++ b/deepmd/dpmodel/infer/deep_eval.py @@ -120,6 +120,10 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" return self.dp.get_dim_aparam() + def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" + return self.dp.has_default_fparam() + @property def model_type(self) -> type["DeepEvalWrapper"]: """The the evaluator of the model type.""" diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py index ec0b986394..95ed7adbb7 100644 --- a/deepmd/dpmodel/model/make_model.py +++ b/deepmd/dpmodel/model/make_model.py @@ -563,6 +563,10 @@ def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this atomic model.""" return self.atomic_model.get_dim_aparam() + def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" + return self.atomic_model.has_default_fparam() + def get_sel_type(self) -> list[int]: """Get the selected atom types of this model. diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py index 881a2f899f..9079e0b8fe 100644 --- a/deepmd/infer/deep_eval.py +++ b/deepmd/infer/deep_eval.py @@ -161,6 +161,7 @@ def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this DP.""" def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" return False @abstractmethod @@ -374,6 +375,7 @@ def get_dim_fparam(self) -> int: return self.deep_eval.get_dim_fparam() def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" return self.deep_eval.has_default_fparam() def get_dim_aparam(self) -> int: diff --git a/deepmd/jax/fitting/fitting.py b/deepmd/jax/fitting/fitting.py index d62681490c..e69bded640 100644 --- a/deepmd/jax/fitting/fitting.py +++ b/deepmd/jax/fitting/fitting.py @@ -35,6 +35,7 @@ def setattr_for_general_fitting(name: str, value: Any) -> Any: "fparam_inv_std", "aparam_avg", "aparam_inv_std", + "default_fparam_tensor", }: value = to_jax_array(value) if value is not None: diff --git a/deepmd/pd/model/task/ener.py b/deepmd/pd/model/task/ener.py index 789ef75066..738990b2d8 100644 --- a/deepmd/pd/model/task/ener.py +++ b/deepmd/pd/model/task/ener.py @@ -72,7 +72,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py index a478c12f97..e7b11b35bc 100644 --- a/deepmd/pd/model/task/fitting.py +++ b/deepmd/pd/model/task/fitting.py @@ -95,6 +95,10 @@ class GeneralFitting(Fitting): Number of frame parameters. numb_aparam : int Number of atomic parameters. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. + This parameter is not supported in PaddlePaddle. dim_case_embd : int Dimension of case specific embedding. activation_function : str @@ -145,6 +149,7 @@ def __init__( remove_vaccum_contribution: Optional[list[bool]] = None, type_map: Optional[list[str]] = None, use_aparam_as_mask: bool = False, + default_fparam: Optional[list[float]] = None, **kwargs, ) -> None: super().__init__() @@ -157,6 +162,7 @@ def __init__( self.numb_fparam = numb_fparam self.numb_aparam = numb_aparam self.dim_case_embd = dim_case_embd + self.default_fparam = default_fparam self.activation_function = activation_function self.precision = precision self.prec = PRECISION_DICT[self.precision] @@ -282,7 +288,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 3, + "@version": 4, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -291,6 +297,7 @@ def serialize(self) -> dict: "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, "dim_case_embd": self.dim_case_embd, + "default_fparam": self.default_fparam, "activation_function": self.activation_function, "precision": self.precision, "mixed_types": self.mixed_types, diff --git a/deepmd/pd/model/task/invar_fitting.py b/deepmd/pd/model/task/invar_fitting.py index b92c862dc8..176acdeb20 100644 --- a/deepmd/pd/model/task/invar_fitting.py +++ b/deepmd/pd/model/task/invar_fitting.py @@ -147,7 +147,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) return super().deserialize(data) def output_def(self) -> FittingOutputDef: diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py index 07b9176c99..eba8314e4f 100644 --- a/deepmd/pt/infer/deep_eval.py +++ b/deepmd/pt/infer/deep_eval.py @@ -184,7 +184,12 @@ def get_dim_aparam(self) -> int: return self.dp.model["Default"].get_dim_aparam() def has_default_fparam(self) -> bool: - return self.dp.model["Default"].has_default_fparam() + """Check if the model has default frame parameters.""" + try: + return self.dp.model["Default"].has_default_fparam() + except AttributeError: + # for compatibility with old models + return False def get_intensive(self) -> bool: return self.dp.model["Default"].get_intensive() diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py index 56af5f4f43..37bab083d0 100644 --- a/deepmd/pt/model/atomic_model/base_atomic_model.py +++ b/deepmd/pt/model/atomic_model/base_atomic_model.py @@ -135,6 +135,10 @@ def get_intensive(self) -> bool: """Whether the fitting property is intensive.""" return False + def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" + return False + def reinit_atom_exclude( self, exclude_types: list[int] = [], diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index dda10b82a8..cee7aaf2f9 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -343,6 +343,7 @@ def get_dim_fparam(self) -> int: return self.fitting_net.get_dim_fparam() def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" return self.fitting_net.has_default_fparam() def get_default_fparam(self) -> Optional[torch.Tensor]: diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index d58261a481..d06f60f3f1 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -4,7 +4,6 @@ ) import torch -import numpy as np from deepmd.dpmodel import ( ModelOutputDef, @@ -525,6 +524,7 @@ def get_dim_fparam(self) -> int: @torch.jit.export def has_default_fparam(self) -> bool: + """Check if the model has default frame parameters.""" return self.atomic_model.has_default_fparam() def get_default_fparam(self) -> Optional[torch.Tensor]: diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py index 65b64220ae..0fe6a1352d 100644 --- a/deepmd/pt/model/task/dipole.py +++ b/deepmd/pt/model/task/dipole.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Callable, Optional, Union, @@ -72,6 +73,9 @@ class DipoleFittingNet(GeneralFitting): Only reducible variable are differentiable. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -93,7 +97,8 @@ def __init__( r_differentiable: bool = True, c_differentiable: bool = True, type_map: Optional[list[str]] = None, - **kwargs, + default_fparam: Optional[list] = None, + **kwargs: Any, ) -> None: self.embedding_width = embedding_width self.r_differentiable = r_differentiable @@ -114,6 +119,7 @@ def __init__( seed=seed, exclude_types=exclude_types, type_map=type_map, + default_fparam=default_fparam, **kwargs, ) @@ -132,7 +138,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("var_name", None) return super().deserialize(data) diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py index 568ef81c92..afbed5f748 100644 --- a/deepmd/pt/model/task/dos.py +++ b/deepmd/pt/model/task/dos.py @@ -57,6 +57,7 @@ def __init__( exclude_types: list[int] = [], mixed_types: bool = True, type_map: Optional[list[str]] = None, + default_fparam: Optional[list] = None, ) -> None: if bias_dos is not None: self.bias_dos = bias_dos @@ -83,6 +84,7 @@ def __init__( exclude_types=exclude_types, trainable=trainable, type_map=type_map, + default_fparam=default_fparam, ) def output_def(self) -> FittingOutputDef: @@ -101,7 +103,7 @@ def output_def(self) -> FittingOutputDef: @classmethod def deserialize(cls, data: dict) -> "DOSFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("@class", None) data.pop("var_name", None) data.pop("tot_ener_zero", None) diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 3968e21cdc..fb2c9111f7 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Optional, Union, ) @@ -67,7 +68,7 @@ def __init__( seed: Optional[Union[int, list[int]]] = None, type_map: Optional[list[str]] = None, default_fparam: Optional[list] = None, - **kwargs, + **kwargs: Any, ) -> None: super().__init__( "energy", @@ -92,7 +93,7 @@ def __init__( @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) data.pop("var_name") data.pop("dim_out") return super().deserialize(data) @@ -270,7 +271,7 @@ def forward( "dforce": vec_out, } - + @Fitting.register("ener_readout") @fitting_check_output class EnergyFittingNetReadout(InvarFitting): @@ -420,4 +421,4 @@ def forward( edge_energy = torch.sum(edge_atomic_contrib, dim=-2) # energy out = out + edge_energy / self.norm_e_fact - return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)} \ No newline at end of file + return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)} diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 4da177c5a1..12efb7c1f6 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -4,10 +4,10 @@ abstractmethod, ) from typing import ( + Any, Callable, Optional, Union, - List, ) import numpy as np @@ -37,6 +37,9 @@ to_numpy_array, to_torch_tensor, ) +from deepmd.utils.env_mat_stat import ( + StatItem, +) from deepmd.utils.finetune import ( get_index_between_two_maps, map_atom_exclude_types, @@ -44,9 +47,6 @@ from deepmd.utils.path import ( DPPath, ) -from deepmd.utils.env_mat_stat import ( - StatItem, -) dtype = env.GLOBAL_PT_FLOAT_PRECISION device = env.DEVICE @@ -62,7 +62,9 @@ def __new__(cls, *args, **kwargs): return BaseFitting.__new__(BaseFitting, *args, **kwargs) return super().__new__(cls) - def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False) -> None: + def share_params( + self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False + ) -> None: """ Share the parameters of self to the base_class with shared_level during multitask training. If not start from checkpoint (resume is False), @@ -81,16 +83,22 @@ def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2 for ii in range(self.numb_fparam): base_fparam[ii] += self.get_stats()["fparam"][ii] * model_prob fparam_avg = np.array([ii.compute_avg() for ii in base_fparam]) - fparam_std = np.array([ii.compute_std(protection=protection) for ii in base_fparam]) + fparam_std = np.array( + [ii.compute_std(protection=protection) for ii in base_fparam] + ) fparam_inv_std = 1.0 / fparam_std base_class.fparam_avg.copy_( torch.tensor( - fparam_avg, device=env.DEVICE, dtype=base_class.fparam_avg.dtype + fparam_avg, + device=env.DEVICE, + dtype=base_class.fparam_avg.dtype, ) ) base_class.fparam_inv_std.copy_( torch.tensor( - fparam_inv_std, device=env.DEVICE, dtype=base_class.fparam_inv_std.dtype + fparam_inv_std, + device=env.DEVICE, + dtype=base_class.fparam_inv_std.dtype, ) ) self.fparam_avg = base_class.fparam_avg @@ -104,18 +112,24 @@ def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2 for ii in range(self.numb_aparam): base_aparam[ii] += self.get_stats()["aparam"][ii] * model_prob aparam_avg = np.array([ii.compute_avg() for ii in base_aparam]) - aparam_std = np.array([ii.compute_std(protection=protection) for ii in base_aparam]) + aparam_std = np.array( + [ii.compute_std(protection=protection) for ii in base_aparam] + ) aparam_inv_std = 1.0 / aparam_std base_class.aparam_avg.copy_( torch.tensor( - aparam_avg, device=env.DEVICE, dtype=base_class.aparam_avg.dtype + aparam_avg, + device=env.DEVICE, + dtype=base_class.aparam_avg.dtype, ) ) base_class.aparam_inv_std.copy_( torch.tensor( - aparam_inv_std, device=env.DEVICE, dtype=base_class.aparam_inv_std.dtype + aparam_inv_std, + device=env.DEVICE, + dtype=base_class.aparam_inv_std.dtype, ) - ) + ) self.aparam_avg = base_class.aparam_avg self.aparam_inv_std = base_class.aparam_inv_std @@ -133,7 +147,7 @@ def save_to_file_fparam( Parameters ---------- - path : DPPath + stat_file_path : DPPath The path to save the statistics of fparam. """ assert stat_file_path is not None @@ -144,7 +158,9 @@ def save_to_file_fparam( _fparam_stat = [] for ii in range(self.numb_fparam): _tmp_stat = self.stats["fparam"][ii] - _fparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]) + _fparam_stat.append( + [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum] + ) _fparam_stat = np.array(_fparam_stat) fp.save_numpy(_fparam_stat) log.info(f"Save fparam stats to {fp}.") @@ -157,7 +173,7 @@ def save_to_file_aparam( Parameters ---------- - path : DPPath + stat_file_path : DPPath The path to save the statistics of aparam. """ assert stat_file_path is not None @@ -168,7 +184,9 @@ def save_to_file_aparam( _aparam_stat = [] for ii in range(self.numb_aparam): _tmp_stat = self.stats["aparam"][ii] - _aparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]) + _aparam_stat.append( + [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum] + ) _aparam_stat = np.array(_aparam_stat) fp.save_numpy(_aparam_stat) log.info(f"Save aparam stats to {fp}.") @@ -178,7 +196,7 @@ def restore_fparam_from_file(self, stat_file_path: DPPath) -> None: Parameters ---------- - path : DPPath + stat_file_path : DPPath The path to load the statistics of fparam. """ fp = stat_file_path / "fparam" @@ -186,7 +204,9 @@ def restore_fparam_from_file(self, stat_file_path: DPPath) -> None: assert arr.shape == (self.numb_fparam, 3) _fparam_stat = [] for ii in range(self.numb_fparam): - _fparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])) + _fparam_stat.append( + StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]) + ) self.stats["fparam"] = _fparam_stat log.info(f"Load fparam stats from {fp}.") @@ -195,7 +215,7 @@ def restore_aparam_from_file(self, stat_file_path: DPPath) -> None: Parameters ---------- - path : DPPath + stat_file_path : DPPath The path to load the statistics of aparam. """ fp = stat_file_path / "aparam" @@ -203,7 +223,9 @@ def restore_aparam_from_file(self, stat_file_path: DPPath) -> None: assert arr.shape == (self.numb_aparam, 3) _aparam_stat = [] for ii in range(self.numb_aparam): - _aparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])) + _aparam_stat.append( + StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]) + ) self.stats["aparam"] = _aparam_stat log.info(f"Load aparam stats from {fp}.") @@ -244,7 +266,9 @@ def compute_input_stats( else: sampled = merged() if callable(merged) else merged self.stats["fparam"] = [] - cat_data = to_numpy_array(torch.cat([frame["fparam"] for frame in sampled], dim=0)) + cat_data = to_numpy_array( + torch.cat([frame["fparam"] for frame in sampled], dim=0) + ) cat_data = np.reshape(cat_data, [-1, self.numb_fparam]) sumv = np.sum(cat_data, axis=0) sumv2 = np.sum(cat_data * cat_data, axis=0) @@ -261,7 +285,9 @@ def compute_input_stats( self.save_to_file_fparam(stat_file_path) fparam_avg = np.array([ii.compute_avg() for ii in self.stats["fparam"]]) - fparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["fparam"]]) + fparam_std = np.array( + [ii.compute_std(protection=protection) for ii in self.stats["fparam"]] + ) fparam_inv_std = 1.0 / fparam_std log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}") self.fparam_avg.copy_(to_torch_tensor(fparam_avg)) @@ -297,18 +323,18 @@ def compute_input_stats( self.save_to_file_aparam(stat_file_path) aparam_avg = np.array([ii.compute_avg() for ii in self.stats["aparam"]]) - aparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["aparam"]]) + aparam_std = np.array( + [ii.compute_std(protection=protection) for ii in self.stats["aparam"]] + ) aparam_inv_std = 1.0 / aparam_std log.info(f"aparam_avg is {aparam_avg}, aparam_inv_std is {aparam_inv_std}") self.aparam_avg.copy_(to_torch_tensor(aparam_avg)) self.aparam_inv_std.copy_(to_torch_tensor(aparam_inv_std)) - def get_stats(self) -> dict[str, List[StatItem]]: + def get_stats(self) -> dict[str, list[StatItem]]: """Get the statistics of the fitting_net.""" if self.stats is None: - raise RuntimeError( - "The statistics of fitting net has not been computed." - ) + raise RuntimeError("The statistics of fitting net has not been computed.") return self.stats @@ -362,6 +388,9 @@ class GeneralFitting(Fitting): A list of strings. Give the name to each type of atoms. use_aparam_as_mask: bool If True, the aparam will not be used in fitting net for embedding. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -385,8 +414,8 @@ def __init__( remove_vaccum_contribution: Optional[list[bool]] = None, type_map: Optional[list[str]] = None, use_aparam_as_mask: bool = False, - default_fparam: Optional[list] = None, - **kwargs, + default_fparam: Optional[list[float]] = None, + **kwargs: Any, ) -> None: super().__init__() self.var_name = var_name @@ -461,9 +490,9 @@ def __init__( if self.default_fparam is not None: if self.numb_fparam > 0: - assert ( - len(self.default_fparam) == self.numb_fparam - ), "default_fparam length mismatch!" + assert len(self.default_fparam) == self.numb_fparam, ( + "default_fparam length mismatch!" + ) self.register_buffer( "default_fparam_tensor", torch.tensor( @@ -537,7 +566,7 @@ def serialize(self) -> dict: """Serialize the fitting to dict.""" return { "@class": "Fitting", - "@version": 3, + "@version": 4, "var_name": self.var_name, "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, @@ -546,6 +575,7 @@ def serialize(self) -> dict: "numb_fparam": self.numb_fparam, "numb_aparam": self.numb_aparam, "dim_case_embd": self.dim_case_embd, + "default_fparam": self.default_fparam, "activation_function": self.activation_function, "precision": self.precision, "mixed_types": self.mixed_types, @@ -590,6 +620,7 @@ def get_dim_fparam(self) -> int: return self.numb_fparam def has_default_fparam(self) -> bool: + """Check if the fitting has default frame parameters.""" return self.default_fparam is not None def get_default_fparam(self) -> Optional[torch.Tensor]: @@ -653,6 +684,8 @@ def __setitem__(self, key, value) -> None: self.case_embd = value elif key in ["scale"]: self.scale = value + elif key in ["default_fparam_tensor"]: + self.default_fparam_tensor = value else: raise KeyError(key) @@ -671,6 +704,8 @@ def __getitem__(self, key): return self.case_embd elif key in ["scale"]: return self.scale + elif key in ["default_fparam_tensor"]: + return self.default_fparam_tensor else: raise KeyError(key) diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index ede0315e87..d8391f9fcf 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Optional, Union, ) @@ -80,6 +81,9 @@ class InvarFitting(GeneralFitting): A list of strings. Give the name to each type of atoms. use_aparam_as_mask: bool If True, the aparam will not be used in fitting net for embedding. + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -103,8 +107,8 @@ def __init__( atom_ener: Optional[list[Optional[torch.Tensor]]] = None, type_map: Optional[list[str]] = None, use_aparam_as_mask: bool = False, - default_fparam: Optional[list] = None, - **kwargs, + default_fparam: Optional[list[float]] = None, + **kwargs: Any, ) -> None: self.dim_out = dim_out self.atom_ener = atom_ener @@ -147,7 +151,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 3, 1) + check_version_compatibility(data.pop("@version", 1), 4, 1) return super().deserialize(data) def output_def(self) -> FittingOutputDef: diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index a326802918..282891e7a5 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Optional, Union, ) @@ -75,7 +76,9 @@ class PolarFittingNet(GeneralFitting): Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. - + default_fparam: list[float], optional + The default frame parameter. If set, when `fparam.npy` files are not included in the data system, + this value will be used as the default value for the frame parameter in the fitting net. """ def __init__( @@ -98,7 +101,8 @@ def __init__( scale: Optional[Union[list[float], float]] = None, shift_diag: bool = True, type_map: Optional[list[str]] = None, - **kwargs, + default_fparam: Optional[list] = None, + **kwargs: Any, ) -> None: self.embedding_width = embedding_width self.fit_diag = fit_diag @@ -139,6 +143,7 @@ def __init__( seed=seed, exclude_types=exclude_types, type_map=type_map, + default_fparam=default_fparam, **kwargs, ) @@ -195,7 +200,7 @@ def change_type_map( def serialize(self) -> dict: data = super().serialize() data["type"] = "polar" - data["@version"] = 4 + data["@version"] = 5 data["embedding_width"] = self.embedding_width data["fit_diag"] = self.fit_diag data["shift_diag"] = self.shift_diag @@ -206,7 +211,7 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, data: dict) -> "GeneralFitting": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 4, 1) + check_version_compatibility(data.pop("@version", 1), 5, 1) data.pop("var_name", None) return super().deserialize(data) diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py index 5ef0cd0233..c2440b7de3 100644 --- a/deepmd/pt/model/task/property.py +++ b/deepmd/pt/model/task/property.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Optional, Union, ) @@ -91,7 +92,8 @@ def __init__( mixed_types: bool = True, trainable: Union[bool, list[bool]] = True, seed: Optional[int] = None, - **kwargs, + default_fparam: Optional[list] = None, + **kwargs: Any, ) -> None: self.task_dim = task_dim self.intensive = intensive @@ -111,6 +113,7 @@ def __init__( mixed_types=mixed_types, trainable=trainable, seed=seed, + default_fparam=default_fparam, **kwargs, ) @@ -135,7 +138,7 @@ def get_intensive(self) -> bool: @classmethod def deserialize(cls, data: dict) -> "PropertyFittingNet": data = data.copy() - check_version_compatibility(data.pop("@version", 1), 4, 1) + check_version_compatibility(data.pop("@version", 1), 5, 1) data.pop("dim_out") data["property_name"] = data.pop("var_name") obj = super().deserialize(data) @@ -150,7 +153,7 @@ def serialize(self) -> dict: "task_dim": self.task_dim, "intensive": self.intensive, } - dd["@version"] = 4 + dd["@version"] = 5 return dd From c471471929c0c3b71fb116621cfdfd309c8a2149 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Thu, 4 Sep 2025 19:53:30 +0800 Subject: [PATCH 18/27] add argcheck for default_fparam --- deepmd/dpmodel/fitting/property_fitting.py | 18 ++++++++++ deepmd/utils/argcheck.py | 40 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py index 944b1f7958..dbd415bde1 100644 --- a/deepmd/dpmodel/fitting/property_fitting.py +++ b/deepmd/dpmodel/fitting/property_fitting.py @@ -12,6 +12,10 @@ from deepmd.dpmodel.fitting.invar_fitting import ( InvarFitting, ) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) from deepmd.utils.version import ( check_version_compatibility, ) @@ -113,6 +117,20 @@ def __init__( default_fparam=default_fparam, ) + def output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + self.var_name, + [self.dim_out], + reducible=True, + r_differentiable=False, + c_differentiable=False, + intensive=self.intensive, + ), + ] + ) + @classmethod def deserialize(cls, data: dict) -> "PropertyFittingNet": data = data.copy() diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 7f448fa1ee..8b0f101d3c 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1782,6 +1782,7 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant: def fitting_ener(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net." doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -1810,6 +1811,13 @@ def fitting_ener(): Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), Argument("default_fparam", list, optional=True, default=None), + Argument( + "default_fparam", + list[float], + optional=True, + default=None, + doc=doc_only_pt_supported + doc_default_fparam, + ), Argument( "dim_case_embd", int, @@ -1867,6 +1875,7 @@ def fitting_ener(): def fitting_dos(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net." doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -1884,6 +1893,13 @@ def fitting_dos(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "default_fparam", + list[float], + optional=True, + default=None, + doc=doc_only_pt_supported + doc_default_fparam, + ), Argument( "dim_case_embd", int, @@ -1922,6 +1938,7 @@ def fitting_dos(): def fitting_property(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net." doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built" doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -1937,6 +1954,13 @@ def fitting_property(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), + Argument( + "default_fparam", + list[float], + optional=True, + default=None, + doc=doc_only_pt_supported + doc_default_fparam, + ), Argument( "dim_case_embd", int, @@ -1984,6 +2008,7 @@ def fitting_property(): def fitting_polar(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net." doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -2013,6 +2038,13 @@ def fitting_polar(): default=0, doc=doc_only_pt_supported + doc_numb_aparam, ), + Argument( + "default_fparam", + list[float], + optional=True, + default=None, + doc=doc_only_pt_supported + doc_default_fparam, + ), Argument( "dim_case_embd", int, @@ -2062,6 +2094,7 @@ def fitting_polar(): def fitting_dipole(): doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams." doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams." + doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net." doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches." doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built." doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' @@ -2084,6 +2117,13 @@ def fitting_dipole(): default=0, doc=doc_only_pt_supported + doc_numb_aparam, ), + Argument( + "default_fparam", + list[float], + optional=True, + default=None, + doc=doc_only_pt_supported + doc_default_fparam, + ), Argument( "dim_case_embd", int, From b37360a7bb0028a62e9c47b98d6bb9f55d25b57c Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:46:15 +0800 Subject: [PATCH 19/27] Update argcheck.py --- deepmd/utils/argcheck.py | 1 - 1 file changed, 1 deletion(-) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 8b0f101d3c..709c0daaec 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1810,7 +1810,6 @@ def fitting_ener(): return [ Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam), Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam), - Argument("default_fparam", list, optional=True, default=None), Argument( "default_fparam", list[float], From 6c9026085e1a56cbfd9109bee6209c71e2f6918a Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Tue, 16 Dec 2025 23:13:33 +0800 Subject: [PATCH 20/27] add f_use_norm --- deepmd/pt/loss/ener.py | 28 +++++++++++++++++++++------- deepmd/utils/argcheck.py | 6 ++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index 75efd0277f..e41b45fa5a 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -52,11 +52,11 @@ def __init__( limit_pref_gf: float = 0.0, numb_generalized_coord: int = 0, use_l1_all: bool = False, - inference=False, - use_huber=False, - use_default_pf=False, - huber_delta=0.01, - **kwargs, + inference: bool = False, + use_huber: bool = False, + use_default_pf: bool = False, + f_use_norm: bool = False, + huber_delta: float = 0.01, ) -> None: r"""Construct a layer to compute loss on energy, force and virial. @@ -144,6 +144,9 @@ def __init__( self.inference = inference self.use_huber = use_huber self.huber_delta = huber_delta + self.f_use_norm = f_use_norm + if self.f_use_norm: + assert self.use_huber, "f_use_norm can only be True when use_huber is True." if self.use_huber and ( self.has_pf or self.has_gf or self.relative_f is not None ): @@ -278,9 +281,20 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): if not self.use_huber: loss += (pref_f * l2_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) else: + if not self.f_use_norm: + huber_f_input1 = force_pred.reshape(-1) + huber_f_input2 = force_label.reshape(-1) + else: + huber_f_input1 = torch.linalg.vector_norm( + (force_label - force_pred).reshape(-1, 3), + ord=2, + dim=1, + keepdim=True, + ) # l2 norm mae + huber_f_input2 = torch.zeros_like(huber_f_input1) l_huber_loss = custom_huber_loss( - force_pred.reshape(-1), - force_label.reshape(-1), + huber_f_input1, + huber_f_input2, delta=self.huber_delta, ) loss += pref_f * l_huber_loss diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 709c0daaec..9b8ec058a8 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2740,6 +2740,12 @@ def loss_ener(): default=False, doc=doc_use_huber, ), + Argument( + "f_use_norm", + bool, + optional=True, + default=False, + ), Argument( "huber_delta", float, From c39433bc89d2bbf6f46a972157b78280f16baf9c Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Thu, 18 Dec 2025 18:53:21 +0800 Subject: [PATCH 21/27] new mae --- deepmd/pt/loss/ener.py | 70 ++++++++++++++++++++++++++-------------- deepmd/utils/argcheck.py | 6 ++++ 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index e41b45fa5a..be9138b606 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -146,7 +146,9 @@ def __init__( self.huber_delta = huber_delta self.f_use_norm = f_use_norm if self.f_use_norm: - assert self.use_huber, "f_use_norm can only be True when use_huber is True." + assert self.use_huber or self.use_l1_all, ( + "f_use_norm can only be True when use_huber or use_l1_all is True." + ) if self.use_huber and ( self.has_pf or self.has_gf or self.relative_f is not None ): @@ -233,15 +235,11 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): l1_ener_loss = F.l1_loss( energy_pred.reshape(-1), energy_label.reshape(-1), - reduction="sum", + reduction="mean", ) - loss += pref_e * l1_ener_loss + loss += atom_norm * (pref_e * l1_ener_loss) more_loss["mae_e"] = self.display_if_exist( - F.l1_loss( - energy_pred.reshape(-1), - energy_label.reshape(-1), - reduction="mean", - ).detach(), + l1_ener_loss.detach() * atom_norm, find_energy, ) # more_loss['log_keys'].append('rmse_e') @@ -303,11 +301,21 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): rmse_f.detach(), find_force ) else: - l1_force_loss = F.l1_loss(force_label, force_pred, reduction="none") + l1_force_loss = F.l1_loss( + force_label.reshape(-1), + force_pred.reshape(-1), + reduction="mean", + ) more_loss["mae_f"] = self.display_if_exist( - l1_force_loss.mean().detach(), find_force + l1_force_loss.detach(), find_force ) - l1_force_loss = l1_force_loss.sum(-1).mean(-1).sum() + if self.f_use_norm: + l1_force_loss = torch.linalg.vector_norm( + (force_label - force_pred).reshape(-1, 3), + ord=2, + dim=1, + keepdim=True, + ).mean() # l2 norm mae loss += (pref_f * l1_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) if mae: mae_f = torch.mean(torch.abs(diff_f)) @@ -362,22 +370,36 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): find_virial = label.get("find_virial", 0.0) pref_v = pref_v * find_virial diff_v = label["virial"] - model_pred["virial"].reshape(-1, 9) - l2_virial_loss = torch.mean(torch.square(diff_v)) - if not self.inference: - more_loss["l2_virial_loss"] = self.display_if_exist( - l2_virial_loss.detach(), find_virial + if not self.use_l1_all: + l2_virial_loss = torch.mean(torch.square(diff_v)) + if not self.inference: + more_loss["l2_virial_loss"] = self.display_if_exist( + l2_virial_loss.detach(), find_virial + ) + if not self.use_huber: + loss += atom_norm * (pref_v * l2_virial_loss) + else: + l_huber_loss = custom_huber_loss( + atom_norm * model_pred["virial"].reshape(-1), + atom_norm * label["virial"].reshape(-1), + delta=self.huber_delta, + ) + loss += pref_v * l_huber_loss + rmse_v = l2_virial_loss.sqrt() * atom_norm + more_loss["rmse_v"] = self.display_if_exist( + rmse_v.detach(), find_virial ) - if not self.use_huber: - loss += atom_norm * (pref_v * l2_virial_loss) else: - l_huber_loss = custom_huber_loss( - atom_norm * model_pred["virial"].reshape(-1), - atom_norm * label["virial"].reshape(-1), - delta=self.huber_delta, + l1_virial_loss = F.l1_loss( + label["virial"].reshape(-1), + model_pred["virial"].reshape(-1), + reduction="mean", + ) + loss += atom_norm * (pref_v * l1_virial_loss) + more_loss["mae_v"] = self.display_if_exist( + l1_virial_loss.detach() * atom_norm, + find_virial, ) - loss += pref_v * l_huber_loss - rmse_v = l2_virial_loss.sqrt() * atom_norm - more_loss["rmse_v"] = self.display_if_exist(rmse_v.detach(), find_virial) if mae: mae_v = torch.mean(torch.abs(diff_v)) * atom_norm more_loss["mae_v"] = self.display_if_exist(mae_v.detach(), find_virial) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 9b8ec058a8..bbb0c01a4c 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2746,6 +2746,12 @@ def loss_ener(): optional=True, default=False, ), + Argument( + "use_l1_all", + bool, + optional=True, + default=False, + ), Argument( "huber_delta", float, From 4e9e5ea3bdae5403391da1b8773c457a5363dc6e Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Mon, 22 Dec 2025 17:25:22 +0800 Subject: [PATCH 22/27] Update ener.py --- deepmd/pt/loss/ener.py | 44 +++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index be9138b606..367d5c9092 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -330,16 +330,42 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): ) pref_pf = pref_pf * find_atom_pref atom_pref_reshape = atom_pref.reshape(-1) - l2_pref_force_loss = (torch.square(diff_f) * atom_pref_reshape).mean() - if not self.inference: - more_loss["l2_pref_force_loss"] = self.display_if_exist( - l2_pref_force_loss.detach(), find_atom_pref + if not self.use_l1_all: + l2_pref_force_loss = ( + torch.square(diff_f) * atom_pref_reshape + ).mean() + if not self.inference: + more_loss["l2_pref_force_loss"] = self.display_if_exist( + l2_pref_force_loss.detach(), find_atom_pref + ) + if not self.use_huber: + loss += (pref_pf * l2_pref_force_loss).to( + GLOBAL_PT_FLOAT_PRECISION + ) + else: + l_huber_loss = custom_huber_loss( + (atom_pref * force_pred).reshape(-1), + (atom_pref * force_label).reshape(-1), + delta=self.huber_delta, + ) + loss += pref_pf * l_huber_loss + rmse_pf = l2_pref_force_loss.sqrt() + more_loss["rmse_pf"] = self.display_if_exist( + rmse_pf.detach(), find_atom_pref ) - loss += (pref_pf * l2_pref_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) - rmse_pf = l2_pref_force_loss.sqrt() - more_loss["rmse_pf"] = self.display_if_exist( - rmse_pf.detach(), find_atom_pref - ) + else: + l1_pref_force_loss = (torch.abs(diff_f) * atom_pref_reshape).mean() + more_loss["mae_f"] = self.display_if_exist( + l1_pref_force_loss.detach(), find_atom_pref + ) + if self.f_use_norm: + l1_pref_force_loss = torch.linalg.vector_norm( + (diff_f * atom_pref_reshape).reshape(-1, 3), + ord=2, + dim=1, + keepdim=True, + ).mean() # l2 norm mae + loss += (pref_pf * l1_pref_force_loss).to(GLOBAL_PT_FLOAT_PRECISION) if self.has_gf and "drdq" in label: drdq = label["drdq"] From 5808b2e19a8e37f97d03219e1802c0c34284b693 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Mon, 22 Dec 2025 17:44:35 +0800 Subject: [PATCH 23/27] Update ener.py --- deepmd/pt/loss/ener.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index 367d5c9092..00a352424e 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + Any, Optional, ) @@ -57,6 +58,7 @@ def __init__( use_default_pf: bool = False, f_use_norm: bool = False, huber_delta: float = 0.01, + **kwargs: Any, ) -> None: r"""Construct a layer to compute loss on energy, force and virial. From ac5fbf92749b4e8bddd4e428a0760001ca14f6a8 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Mon, 17 Nov 2025 15:56:21 +0800 Subject: [PATCH 24/27] fix fitting fparam stat --- .../pt/model/atomic_model/dp_atomic_model.py | 9 ++++++++ deepmd/pt/model/task/fitting.py | 21 ++++++++++++++----- deepmd/pt/train/wrapper.py | 20 +++++++++++++++--- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py index cee7aaf2f9..832c2ee9f6 100644 --- a/deepmd/pt/model/atomic_model/dp_atomic_model.py +++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py @@ -328,6 +328,15 @@ def wrapped_sampler(): atom_exclude_types = self.atom_excl.get_exclude_types() for sample in sampled: sample["atom_exclude_types"] = list(atom_exclude_types) + if ( + "find_fparam" not in sampled[0] + and "fparam" not in sampled[0] + and self.has_default_fparam() + ): + default_fparam = self.get_default_fparam() + for sample in sampled: + nframe = sample["atype"].shape[0] + sample["fparam"] = default_fparam.repeat(nframe, 1) return sampled self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 12efb7c1f6..7ed071c771 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -63,7 +63,12 @@ def __new__(cls, *args, **kwargs): return super().__new__(cls) def share_params( - self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False + self, + base_class: "Fitting", + shared_level: int, + model_prob: float = 1.0, + protection: float = 1e-2, + resume: bool = False, ) -> None: """ Share the parameters of self to the base_class with shared_level during multitask training. @@ -132,7 +137,6 @@ def share_params( ) self.aparam_avg = base_class.aparam_avg self.aparam_inv_std = base_class.aparam_inv_std - # the following will successfully link all the params except buffers, which need manually link. for item in self._modules: self._modules[item] = base_class._modules[item] @@ -261,7 +265,11 @@ def compute_input_stats( # stat fparam if self.numb_fparam > 0: - if stat_file_path is not None and stat_file_path.is_dir(): + if ( + stat_file_path is not None + and stat_file_path.is_dir() + and (stat_file_path / "fparam").is_file() + ): self.restore_fparam_from_file(stat_file_path) else: sampled = merged() if callable(merged) else merged @@ -292,10 +300,13 @@ def compute_input_stats( log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}") self.fparam_avg.copy_(to_torch_tensor(fparam_avg)) self.fparam_inv_std.copy_(to_torch_tensor(fparam_inv_std)) - # stat aparam if self.numb_aparam > 0: - if stat_file_path is not None and stat_file_path.is_dir(): + if ( + stat_file_path is not None + and stat_file_path.is_dir() + and (stat_file_path / "aparam").is_file() + ): self.restore_aparam_from_file(stat_file_path) else: sampled = merged() if callable(merged) else merged diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py index 4ac86d43d6..cdef1e8533 100644 --- a/deepmd/pt/train/wrapper.py +++ b/deepmd/pt/train/wrapper.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging from typing import ( + Any, Optional, Union, ) @@ -59,7 +60,13 @@ def __init__( self.loss[task_key] = loss[task_key] self.inference_only = self.loss is None - def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2, resume=False) -> None: + def share_params( + self, + shared_links: dict[str, Any], + model_key_prob_map: dict, + data_stat_protect: float = 1e-2, + resume: bool = False, + ) -> None: """ Share the parameters of classes following rules defined in shared_links during multitask training. If not start from checkpoint (resume is False), @@ -129,9 +136,16 @@ def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2, link_class = self.model[ model_key_link ].atomic_model.__getattr__(class_type_link) - frac_prob = model_key_prob_map[model_key_link]/model_key_prob_map[model_key_base] + frac_prob = ( + model_key_prob_map[model_key_link] + / model_key_prob_map[model_key_base] + ) link_class.share_params( - base_class, shared_level_link, model_prob=frac_prob, protection=data_stat_protect, resume=resume + base_class, + shared_level_link, + model_prob=frac_prob, + protection=data_stat_protect, + resume=resume, ) log.warning( f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!" From 4cc677d6adf4fa1fd6202fbc6008bbd6bd0fe21f Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 16 Jan 2026 20:38:06 +0800 Subject: [PATCH 25/27] fix huber with atom_pref --- deepmd/pt/loss/ener.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py index 00a352424e..6ea48318da 100644 --- a/deepmd/pt/loss/ener.py +++ b/deepmd/pt/loss/ener.py @@ -346,8 +346,8 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False): ) else: l_huber_loss = custom_huber_loss( - (atom_pref * force_pred).reshape(-1), - (atom_pref * force_label).reshape(-1), + atom_pref_reshape * force_pred.reshape(-1), + atom_pref_reshape * force_label.reshape(-1), delta=self.huber_delta, ) loss += pref_pf * l_huber_loss From b4c8b6062eb6caee37a4e77ab45fdc4e673c8bd4 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Wed, 25 Mar 2026 16:50:27 +0800 Subject: [PATCH 26/27] fix has_default_fparam when dos or property --- deepmd/entrypoints/test.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index 69b1704471..a472387eab 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -678,7 +678,11 @@ def test_dos( if dp.get_dim_fparam() > 0: data.add( - "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False + "fparam", + dp.get_dim_fparam(), + atomic=False, + must=not dp.has_default_fparam(), + high_prec=False, ) if dp.get_dim_aparam() > 0: data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False) @@ -846,7 +850,11 @@ def test_property( if dp.get_dim_fparam() > 0: data.add( - "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False + "fparam", + dp.get_dim_fparam(), + atomic=False, + must=not dp.has_default_fparam(), + high_prec=False, ) if dp.get_dim_aparam() > 0: data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False) From ad98d0dff07954a4c720184a2dd91b532b7a88c2 Mon Sep 17 00:00:00 2001 From: Yuxiang Liu Date: Wed, 25 Mar 2026 09:21:58 +0000 Subject: [PATCH 27/27] feat: Add softmax to property for cooh nframes=3 --- deepmd/infer/deep_property.py | 11 +++++++++++ deepmd/pt/loss/property.py | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/deepmd/infer/deep_property.py b/deepmd/infer/deep_property.py index 5944491cc0..ddd76828bf 100644 --- a/deepmd/infer/deep_property.py +++ b/deepmd/infer/deep_property.py @@ -139,6 +139,17 @@ def eval( atomic_property = results[self.get_var_name()].reshape( nframes, natoms, self.get_task_dim() ) + # --- softmax-weighted averaging over frames (minimal) --- + print(f"Nframes == {nframes}") + if nframes != 3: + raise RuntimeError(f"Expected nframes == 3, got {nframes}") + scores = property.mean(axis=1) # (3,) + # If you want to favor *smaller* values (e.g., energies), use: scores = -scores + w = np.exp(scores - scores.max()); w /= w.sum() # (3,) + avg = (w[:, None] * property).sum(axis=0, keepdims=True) # (1, D) + property[:] = np.repeat(avg, nframes, axis=0) # (3, D) + # -------------------------------------------------------- + property = results[f"{self.get_var_name()}_redu"].reshape( nframes, self.get_task_dim() ) diff --git a/deepmd/pt/loss/property.py b/deepmd/pt/loss/property.py index 9d42c81b45..a801d9de23 100644 --- a/deepmd/pt/loss/property.py +++ b/deepmd/pt/loss/property.py @@ -91,6 +91,27 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False """ model_pred = model(**input_dict) var_name = self.var_name + + # ---- Softmax-weighted averaging over the batch added by YL---- + # model_pred[var_name]: (nbz, task_dim) + # 1) get a scalar score per sample (mean over task_dim) + # (If you want to favor smaller values, use `score_per_sample = -model_pred[var_name].mean(dim=1)`.) + score_per_sample = model_pred[var_name].mean(dim=1) # (nbz,) + weights = F.softmax(score_per_sample, dim=0) # (nbz,) + # 2) weighted average vector (1, task_dim) + avg_vec = (weights.unsqueeze(1) * model_pred[var_name]).sum(dim=0, keepdim=True) + # 3) replace all predictions with the averaged vector (broadcast over batch) + model_pred[var_name] = avg_vec.expand_as(model_pred[var_name]) + # ---------------------------------------------------- + + nbz = model_pred[var_name].shape[0] + #=======Raise error when nbz!=3======= + if nbz != 3: + raise RuntimeError( + f"[PropertyLoss] Expected batch size nbz == 3 for softmax-avg, got nbz == {nbz}. " + "Ensure your DataLoader yields triples (batch_size=3, drop_last=True)." + ) + nbz = model_pred[var_name].shape[0] assert model_pred[var_name].shape == (nbz, self.task_dim) assert label[var_name].shape == (nbz, self.task_dim)