From b8c0859cf8b090a2bb83ba99ec49d389f6cb1e73 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 18 Jun 2025 14:13:32 +0800
Subject: [PATCH 01/27] feat(pt): add default_fparam

      "numb_fparam": 2,
      "default_fparam": [0.0, 1.0],
---
 deepmd/entrypoints/test.py                    |  8 ++++--
 deepmd/infer/deep_eval.py                     |  6 +++++
 deepmd/pt/infer/deep_eval.py                  |  3 +++
 .../pt/model/atomic_model/dp_atomic_model.py  |  3 +++
 deepmd/pt/model/model/make_model.py           |  4 +++
 deepmd/pt/model/task/ener.py                  |  2 ++
 deepmd/pt/model/task/fitting.py               | 27 ++++++++++++++++++-
 deepmd/pt/model/task/invar_fitting.py         |  2 ++
 deepmd/pt/train/training.py                   |  8 ++++--
 deepmd/utils/argcheck.py                      |  1 +
 10 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index db605b0de1..5b22d16be4 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -298,7 +298,11 @@ def test_ener(
         data.add("atom_ener", 1, atomic=True, must=True, high_prec=False)
     if dp.get_dim_fparam() > 0:
         data.add(
-            "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
+            "fparam",
+            dp.get_dim_fparam(),
+            atomic=False,
+            must=not dp.has_default_fparam(),
+            high_prec=False,
         )
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
@@ -334,7 +338,7 @@ def test_ener(
         atype = test_data["type"][:numb_test].reshape([numb_test, -1])
     else:
         atype = test_data["type"][0]
-    if dp.get_dim_fparam() > 0:
+    if dp.get_dim_fparam() > 0 and test_data["find_fparam"] != 0.0:
         fparam = test_data["fparam"][:numb_test]
     else:
         fparam = None
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index ee61abe58c..881a2f899f 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -160,6 +160,9 @@ def get_type_map(self) -> list[str]:
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this DP."""
 
+    def has_default_fparam(self) -> bool:
+        return False
+
     @abstractmethod
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
@@ -370,6 +373,9 @@ def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this DP."""
         return self.deep_eval.get_dim_fparam()
 
+    def has_default_fparam(self) -> bool:
+        return self.deep_eval.has_default_fparam()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
         return self.deep_eval.get_dim_aparam()
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index c4d5d028ce..07b9176c99 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -183,6 +183,9 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
         return self.dp.model["Default"].get_dim_aparam()
 
+    def has_default_fparam(self) -> bool:
+        return self.dp.model["Default"].has_default_fparam()
+
     def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
 
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 5a5655b72c..cde6cae9a8 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -308,6 +308,9 @@ def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
 
+    def has_default_fparam(self) -> bool:
+        return self.fitting_net.has_default_fparam()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index c32abaa095..0b90c2cc44 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -522,6 +522,10 @@ def get_dim_fparam(self) -> int:
             """Get the number (dimension) of frame parameters of this atomic model."""
             return self.atomic_model.get_dim_fparam()
 
+        @torch.jit.export
+        def has_default_fparam(self) -> bool:
+            return self.atomic_model.has_default_fparam()
+
         @torch.jit.export
         def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 07351b33f6..5e993fec1b 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -56,6 +56,7 @@ def __init__(
         mixed_types: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
         type_map: Optional[list[str]] = None,
+        default_fparam: Optional[list] = None,
         **kwargs,
     ) -> None:
         super().__init__(
@@ -74,6 +75,7 @@ def __init__(
             mixed_types=mixed_types,
             seed=seed,
             type_map=type_map,
+            default_fparam=default_fparam,
             **kwargs,
         )
 
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 0865b61f52..054dce01eb 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -227,6 +227,7 @@ def __init__(
         remove_vaccum_contribution: Optional[list[bool]] = None,
         type_map: Optional[list[str]] = None,
         use_aparam_as_mask: bool = False,
+        default_fparam: Optional[list] = None,
         **kwargs,
     ) -> None:
         super().__init__()
@@ -238,6 +239,7 @@ def __init__(
         self.resnet_dt = resnet_dt
         self.numb_fparam = numb_fparam
         self.numb_aparam = numb_aparam
+        self.default_fparam = default_fparam
         self.dim_case_embd = dim_case_embd
         self.activation_function = activation_function
         self.precision = precision
@@ -299,6 +301,20 @@ def __init__(
         else:
             self.case_embd = None
 
+        if self.default_fparam is not None:
+            if self.numb_fparam > 0:
+                assert (
+                    len(self.default_fparam) == self.numb_fparam
+                ), "default_fparam length mismatch!"
+            self.register_buffer(
+                "default_fparam_tensor",
+                torch.tensor(
+                    np.array(self.default_fparam), dtype=self.prec, device=device
+                ),
+            )
+        else:
+            self.default_fparam_tensor = None
+
         in_dim = (
             self.dim_descrpt
             + self.numb_fparam
@@ -415,6 +431,9 @@ def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.numb_fparam
 
+    def has_default_fparam(self) -> bool:
+        return self.default_fparam is not None
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
@@ -509,6 +528,13 @@ def _forward_common(
     ):
         # cast the input to internal precsion
         xx = descriptor.to(self.prec)
+        nf, nloc, nd = xx.shape
+
+        if self.numb_fparam > 0 and fparam is None:
+            # use default fparam
+            assert self.default_fparam_tensor is not None
+            fparam = torch.tile(self.default_fparam_tensor.unsqueeze(0), [nf, 1])
+
         fparam = fparam.to(self.prec) if fparam is not None else None
         aparam = aparam.to(self.prec) if aparam is not None else None
 
@@ -521,7 +547,6 @@ def _forward_common(
             xx_zeros = torch.zeros_like(xx)
         else:
             xx_zeros = None
-        nf, nloc, nd = xx.shape
         net_dim_out = self._net_out_dim()
 
         if nd != self.dim_descrpt:
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index b1599eac60..f9ab2265f8 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -103,6 +103,7 @@ def __init__(
         atom_ener: Optional[list[Optional[torch.Tensor]]] = None,
         type_map: Optional[list[str]] = None,
         use_aparam_as_mask: bool = False,
+        default_fparam: Optional[list] = None,
         **kwargs,
     ) -> None:
         self.dim_out = dim_out
@@ -128,6 +129,7 @@ def __init__(
             else [x is not None for x in atom_ener],
             type_map=type_map,
             use_aparam_as_mask=use_aparam_as_mask,
+            default_fparam=default_fparam,
             **kwargs,
         )
 
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 193dcd8cb9..09103e283e 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -1126,7 +1126,8 @@ def get_data(self, is_train=True, task_key="Default"):
         label_dict = {}
         for item_key in batch_data:
             if item_key in input_keys:
-                input_dict[item_key] = batch_data[item_key]
+                if item_key != "fparam" or batch_data["find_fparam"] != 0.0:
+                    input_dict[item_key] = batch_data[item_key]
             else:
                 if item_key not in ["sid", "fid"]:
                     label_dict[item_key] = batch_data[item_key]
@@ -1205,7 +1206,10 @@ def get_additional_data_requirement(_model):
     if _model.get_dim_fparam() > 0:
         fparam_requirement_items = [
             DataRequirementItem(
-                "fparam", _model.get_dim_fparam(), atomic=False, must=True
+                "fparam",
+                _model.get_dim_fparam(),
+                atomic=False,
+                must=not _model.has_default_fparam(),
             )
         ]
         additional_data_requirement += fparam_requirement_items
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index fb911550dd..6d7285593e 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1773,6 +1773,7 @@ def fitting_ener():
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument("default_fparam", list, optional=True, default=None),
         Argument(
             "dim_case_embd",
             int,

From 28be7f6da3eecb03d5886943085d138202bb8290 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 11 Jul 2025 19:42:52 +0800
Subject: [PATCH 02/27] Update stat.py

---
 deepmd/pt/utils/stat.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index cf6892b49d..182cba6ed6 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -59,6 +59,14 @@ def make_stat_input(datasets, dataloaders, nbatches):
                 except StopIteration:
                     iterator = iter(dataloaders[i])
                     stat_data = next(iterator)
+                if (
+                    "find_fparam" in stat_data
+                    and "fparam" in stat_data
+                    and stat_data["find_fparam"] == 0.0
+                ):
+                    # for model using default fparam
+                    stat_data.pop("fparam")
+                    stat_data.pop("find_fparam")
                 for dd in stat_data:
                     if stat_data[dd] is None:
                         sys_stat[dd] = None

From 503ec287875a92b6bde665ac824423b77e605c13 Mon Sep 17 00:00:00 2001
From: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com>
Date: Thu, 24 Jul 2025 18:19:04 +0800
Subject: [PATCH 03/27] Write fparam/aparam statistic to stat_file (#47)

* Add fparam/aparam stat

* Add fparam/aparam stat in share_fitting

* Add fparam default value if default_fparam is not None

* Add model_prob in share_fitting_params

* Add protection when share_params of fitting net
---
 .../pt/model/atomic_model/dp_atomic_model.py  |   6 +-
 deepmd/pt/model/model/make_model.py           |   5 +
 deepmd/pt/model/task/fitting.py               | 263 ++++++++++++++----
 deepmd/pt/train/training.py                   |  36 ++-
 deepmd/pt/train/wrapper.py                    |   5 +-
 deepmd/utils/env_mat_stat.py                  |   7 +
 6 files changed, 253 insertions(+), 69 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index cde6cae9a8..e19670cf90 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -6,6 +6,7 @@
 )
 
 import torch
+import numpy as np
 
 from deepmd.dpmodel import (
     FittingOutputDef,
@@ -300,7 +301,7 @@ def wrapped_sampler():
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
         self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
+            wrapped_sampler, protection=self.data_stat_protect, stat_file_path=stat_file_path
         )
         self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
@@ -311,6 +312,9 @@ def get_dim_fparam(self) -> int:
     def has_default_fparam(self) -> bool:
         return self.fitting_net.has_default_fparam()
 
+    def get_default_fparam(self) -> Optional[np.array]:
+        return self.fitting_net.get_default_fparam()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 0b90c2cc44..63cf7db6fe 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -4,6 +4,7 @@
 )
 
 import torch
+import numpy as np
 
 from deepmd.dpmodel import (
     ModelOutputDef,
@@ -526,6 +527,10 @@ def get_dim_fparam(self) -> int:
         def has_default_fparam(self) -> bool:
             return self.atomic_model.has_default_fparam()
 
+        @torch.jit.export
+        def get_default_fparam(self) -> Optional[np.array]:
+            return self.atomic_model.get_default_fparam()
+
         @torch.jit.export
         def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 054dce01eb..4c5c3a02da 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -7,6 +7,7 @@
     Callable,
     Optional,
     Union,
+    List,
 )
 
 import numpy as np
@@ -40,6 +41,12 @@
     get_index_between_two_maps,
     map_atom_exclude_types,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 device = env.DEVICE
@@ -55,7 +62,7 @@ def __new__(cls, *args, **kwargs):
             return BaseFitting.__new__(BaseFitting, *args, **kwargs)
         return super().__new__(cls)
 
-    def share_params(self, base_class, shared_level, resume=False) -> None:
+    def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False) -> None:
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
@@ -66,16 +73,145 @@ def share_params(self, base_class, shared_level, resume=False) -> None:
         )
         if shared_level == 0:
             # only not share the bias_atom_e and the case_embd
+            # link fparam buffers
+            if self.numb_fparam > 0:
+                if not resume:
+                    base_fparam = base_class.stats["fparam"]
+                    assert len(base_fparam) == self.numb_fparam
+                    for ii in range(self.numb_fparam):
+                        base_fparam[ii] += self.get_stats()["fparam"][ii] * model_prob
+                    fparam_avg = np.array([ii.compute_avg() for ii in base_fparam])
+                    fparam_std = np.array([ii.compute_std(protection=protection) for ii in base_fparam])
+                    fparam_inv_std = 1.0 / fparam_std
+                    base_class.fparam_avg.copy_(
+                        torch.tensor(
+                            fparam_avg, device=env.DEVICE, dtype=base_class.fparam_avg.dtype
+                        )
+                    )
+                    base_class.fparam_inv_std.copy_(
+                        torch.tensor(
+                            fparam_inv_std, device=env.DEVICE, dtype=base_class.fparam_inv_std.dtype
+                        )
+                    )
+                self.fparam_avg = base_class.fparam_avg
+                self.fparam_inv_std = base_class.fparam_inv_std
+
+            # link aparam buffers
+            if self.numb_aparam > 0:
+                if not resume:
+                    base_aparam = base_class.stats["aparam"]
+                    assert len(base_aparam) == self.numb_aparam
+                    for ii in range(self.numb_aparam):
+                        base_aparam[ii] += self.get_stats()["aparam"][ii] * model_prob
+                    aparam_avg = np.array([ii.compute_avg() for ii in base_aparam])
+                    aparam_std = np.array([ii.compute_std(protection=protection) for ii in base_aparam])
+                    aparam_inv_std = 1.0 / aparam_std
+                    base_class.aparam_avg.copy_(
+                        torch.tensor(
+                            aparam_avg, device=env.DEVICE, dtype=base_class.aparam_avg.dtype
+                        )
+                    )
+                    base_class.aparam_inv_std.copy_(
+                        torch.tensor(
+                            aparam_inv_std, device=env.DEVICE, dtype=base_class.aparam_inv_std.dtype
+                        )
+                    )     
+                self.aparam_avg = base_class.aparam_avg
+                self.aparam_inv_std = base_class.aparam_inv_std
+
             # the following will successfully link all the params except buffers, which need manually link.
             for item in self._modules:
                 self._modules[item] = base_class._modules[item]
         else:
             raise NotImplementedError
 
+    def save_to_file_fparam(
+        self,
+        stat_file_path: DPPath,
+    ) -> None:
+        """Save the statistics of fparam.
+
+        Parameters
+        ----------
+        path : DPPath
+            The path to save the statistics of fparam.
+        """
+        assert stat_file_path is not None
+        stat_file_path.mkdir(exist_ok=True, parents=True)
+        if len(self.stats) == 0:
+            raise ValueError("The statistics hasn't been computed.")
+        fp = stat_file_path / "fparam"
+        _fparam_stat = []
+        for ii in range(self.numb_fparam):
+            _tmp_stat = self.stats["fparam"][ii]
+            _fparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum])
+        _fparam_stat = np.array(_fparam_stat)
+        fp.save_numpy(_fparam_stat)
+        log.info(f"Save fparam stats to {fp}.")
+
+    def save_to_file_aparam(
+        self,
+        stat_file_path: DPPath,
+    ) -> None:
+        """Save the statistics of aparam.
+
+        Parameters
+        ----------
+        path : DPPath
+            The path to save the statistics of aparam.
+        """
+        assert stat_file_path is not None
+        stat_file_path.mkdir(exist_ok=True, parents=True)
+        if len(self.stats) == 0:
+            raise ValueError("The statistics hasn't been computed.")
+        fp = stat_file_path / "aparam"
+        _aparam_stat = []
+        for ii in range(self.numb_aparam):
+            _tmp_stat = self.stats["aparam"][ii]
+            _aparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum])
+        _aparam_stat = np.array(_aparam_stat)
+        fp.save_numpy(_aparam_stat)
+        log.info(f"Save aparam stats to {fp}.")
+
+    def restore_fparam_from_file(self, stat_file_path: DPPath) -> None:
+        """Load the statistics of fparam.
+
+        Parameters
+        ----------
+        path : DPPath
+            The path to load the statistics of fparam.
+        """
+        fp = stat_file_path / "fparam"
+        arr = fp.load_numpy()
+        assert arr.shape == (self.numb_fparam, 3)
+        _fparam_stat = []
+        for ii in range(self.numb_fparam):
+            _fparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]))
+        self.stats["fparam"] = _fparam_stat
+        log.info(f"Load fparam stats from {fp}.")
+
+    def restore_aparam_from_file(self, stat_file_path: DPPath) -> None:
+        """Load the statistics of aparam.
+
+        Parameters
+        ----------
+        path : DPPath
+            The path to load the statistics of aparam.
+        """
+        fp = stat_file_path / "aparam"
+        arr = fp.load_numpy()
+        assert arr.shape == (self.numb_aparam, 3)
+        _aparam_stat = []
+        for ii in range(self.numb_aparam):
+            _aparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]))
+        self.stats["aparam"] = _aparam_stat
+        log.info(f"Load aparam stats from {fp}.")
+
     def compute_input_stats(
         self,
         merged: Union[Callable[[], list[dict]], list[dict]],
         protection: float = 1e-2,
+        stat_file_path: Optional[DPPath] = None,
     ) -> None:
         """
         Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
@@ -91,67 +227,89 @@ def compute_input_stats(
                 the lazy function helps by only sampling once.
         protection : float
             Divided-by-zero protection
+        stat_file_path : Optional[DPPath]
+            The path to the stat file.
         """
         if self.numb_fparam == 0 and self.numb_aparam == 0:
             # skip data statistics
+            self.stats = None
             return
-        if callable(merged):
-            sampled = merged()
-        else:
-            sampled = merged
+
+        self.stats = {}
+
         # stat fparam
         if self.numb_fparam > 0:
-            cat_data = torch.cat([frame["fparam"] for frame in sampled], dim=0)
-            cat_data = torch.reshape(cat_data, [-1, self.numb_fparam])
-            fparam_avg = torch.mean(cat_data, dim=0)
-            fparam_std = torch.std(cat_data, dim=0, unbiased=False)
-            fparam_std = torch.where(
-                fparam_std < protection,
-                torch.tensor(
-                    protection, dtype=fparam_std.dtype, device=fparam_std.device
-                ),
-                fparam_std,
-            )
+            if stat_file_path is not None and stat_file_path.is_dir():
+                self.restore_fparam_from_file(stat_file_path)
+            else:
+                sampled = merged() if callable(merged) else merged
+                self.stats["fparam"] = []
+                cat_data = to_numpy_array(torch.cat([frame["fparam"] for frame in sampled], dim=0))
+                cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
+                sumv = np.sum(cat_data, axis=0)
+                sumv2 = np.sum(cat_data * cat_data, axis=0)
+                sumn = cat_data.shape[0]
+                for ii in range(self.numb_fparam):
+                    self.stats["fparam"].append(
+                        StatItem(
+                            number=sumn,
+                            sum=sumv[ii],
+                            squared_sum=sumv2[ii],
+                        )
+                    )
+                if stat_file_path is not None:
+                    self.save_to_file_fparam(stat_file_path)
+
+            fparam_avg = np.array([ii.compute_avg() for ii in self.stats["fparam"]])
+            fparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["fparam"]])
             fparam_inv_std = 1.0 / fparam_std
-            self.fparam_avg.copy_(
-                torch.tensor(fparam_avg, device=env.DEVICE, dtype=self.fparam_avg.dtype)
-            )
-            self.fparam_inv_std.copy_(
-                torch.tensor(
-                    fparam_inv_std, device=env.DEVICE, dtype=self.fparam_inv_std.dtype
-                )
-            )
+            log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}")
+            self.fparam_avg.copy_(to_torch_tensor(fparam_avg))
+            self.fparam_inv_std.copy_(to_torch_tensor(fparam_inv_std))
+
         # stat aparam
         if self.numb_aparam > 0:
-            sys_sumv = []
-            sys_sumv2 = []
-            sys_sumn = []
-            for ss_ in [frame["aparam"] for frame in sampled]:
-                ss = torch.reshape(ss_, [-1, self.numb_aparam])
-                sys_sumv.append(torch.sum(ss, dim=0))
-                sys_sumv2.append(torch.sum(ss * ss, dim=0))
-                sys_sumn.append(ss.shape[0])
-            sumv = torch.sum(torch.stack(sys_sumv), dim=0)
-            sumv2 = torch.sum(torch.stack(sys_sumv2), dim=0)
-            sumn = sum(sys_sumn)
-            aparam_avg = sumv / sumn
-            aparam_std = torch.sqrt(sumv2 / sumn - (sumv / sumn) ** 2)
-            aparam_std = torch.where(
-                aparam_std < protection,
-                torch.tensor(
-                    protection, dtype=aparam_std.dtype, device=aparam_std.device
-                ),
-                aparam_std,
-            )
+            if stat_file_path is not None and stat_file_path.is_dir():
+                self.restore_aparam_from_file(stat_file_path)
+            else:
+                sampled = merged() if callable(merged) else merged
+                self.stats["aparam"] = []
+                sys_sumv = []
+                sys_sumv2 = []
+                sys_sumn = []
+                for ss_ in [frame["aparam"] for frame in sampled]:
+                    ss = np.reshape(to_numpy_array(ss_), [-1, self.numb_aparam])
+                    sys_sumv.append(np.sum(ss, axis=0))
+                    sys_sumv2.append(np.sum(ss * ss, axis=0))
+                    sys_sumn.append(ss.shape[0])
+                sumv = np.sum(np.stack(sys_sumv), axis=0)
+                sumv2 = np.sum(np.stack(sys_sumv2), axis=0)
+                sumn = sum(sys_sumn)
+                for ii in range(self.numb_aparam):
+                    self.stats["aparam"].append(
+                        StatItem(
+                            number=sumn,
+                            sum=sumv[ii],
+                            squared_sum=sumv2[ii],
+                        )
+                    )
+                if stat_file_path is not None:
+                    self.save_to_file_aparam(stat_file_path)
+
+            aparam_avg = np.array([ii.compute_avg() for ii in self.stats["aparam"]])
+            aparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["aparam"]])
             aparam_inv_std = 1.0 / aparam_std
-            self.aparam_avg.copy_(
-                torch.tensor(aparam_avg, device=env.DEVICE, dtype=self.aparam_avg.dtype)
-            )
-            self.aparam_inv_std.copy_(
-                torch.tensor(
-                    aparam_inv_std, device=env.DEVICE, dtype=self.aparam_inv_std.dtype
-                )
+            log.info(f"aparam_avg is {aparam_avg}, aparam_inv_std is {aparam_inv_std}")
+            self.aparam_avg.copy_(to_torch_tensor(aparam_avg))
+            self.aparam_inv_std.copy_(to_torch_tensor(aparam_inv_std))
+
+    def get_stats(self) -> dict[str, List[StatItem]]:
+        """Get the statistics of the fitting_net."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of fitting net has not been computed."
             )
+        return self.stats
 
 
 class GeneralFitting(Fitting):
@@ -434,6 +592,9 @@ def get_dim_fparam(self) -> int:
     def has_default_fparam(self) -> bool:
         return self.default_fparam is not None
 
+    def get_default_fparam(self) -> Optional[np.array]:
+        return self.default_fparam_tensor.cpu().numpy()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 09103e283e..a0f4ca3f43 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -583,11 +583,30 @@ def single_model_finetune(
             frz_model = torch.jit.load(init_frz_model, map_location=DEVICE)
             self.model.load_state_dict(frz_model.state_dict())
 
+        # Get model prob for multi-task
+        if self.multi_task:
+            self.model_prob = np.array([0.0 for key in self.model_keys])
+            if training_params.get("model_prob", None) is not None:
+                model_prob = training_params["model_prob"]
+                for ii, model_key in enumerate(self.model_keys):
+                    if model_key in model_prob:
+                        self.model_prob[ii] += float(model_prob[model_key])
+            else:
+                for ii, model_key in enumerate(self.model_keys):
+                    self.model_prob[ii] += float(len(self.training_data[model_key]))
+            sum_prob = np.sum(self.model_prob)
+            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
+            self.model_prob = self.model_prob / sum_prob
+
         # Multi-task share params
         if shared_links is not None:
+            _data_stat_protect = np.array([model_params["model_dict"][ii].get("data_stat_protect", 1e-2) for ii in model_params["model_dict"]])
+            assert np.allclose(_data_stat_protect, _data_stat_protect[0]), f"Model key 'data_stat_protect' must be the same in each branch when multitask!"
             self.wrapper.share_params(
                 shared_links,
                 resume=(resuming and not self.finetune_update_stat) or self.rank != 0,
+                model_key_prob_map = dict(zip(self.model_keys, self.model_prob)),
+                data_stat_protect = _data_stat_protect[0]
             )
 
         if dist.is_available() and dist.is_initialized():
@@ -637,21 +656,6 @@ def warm_up_linear(step, warmup_steps):
         else:
             raise ValueError(f"Not supported optimizer type '{self.opt_type}'")
 
-        # Get model prob for multi-task
-        if self.multi_task:
-            self.model_prob = np.array([0.0 for key in self.model_keys])
-            if training_params.get("model_prob", None) is not None:
-                model_prob = training_params["model_prob"]
-                for ii, model_key in enumerate(self.model_keys):
-                    if model_key in model_prob:
-                        self.model_prob[ii] += float(model_prob[model_key])
-            else:
-                for ii, model_key in enumerate(self.model_keys):
-                    self.model_prob[ii] += float(len(self.training_data[model_key]))
-            sum_prob = np.sum(self.model_prob)
-            assert sum_prob > 0.0, "Sum of model prob must be larger than 0!"
-            self.model_prob = self.model_prob / sum_prob
-
         # Tensorboard
         self.enable_tensorboard = training_params.get("tensorboard", False)
         self.tensorboard_log_dir = training_params.get("tensorboard_log_dir", "log")
@@ -1204,12 +1208,14 @@ def print_on_training(
 def get_additional_data_requirement(_model):
     additional_data_requirement = []
     if _model.get_dim_fparam() > 0:
+        _fparam_default = _model.get_default_fparam() if _model.has_default_fparam() else 0.0
         fparam_requirement_items = [
             DataRequirementItem(
                 "fparam",
                 _model.get_dim_fparam(),
                 atomic=False,
                 must=not _model.has_default_fparam(),
+                default=_fparam_default,
             )
         ]
         additional_data_requirement += fparam_requirement_items
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 9a2cbff295..4ac86d43d6 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -59,7 +59,7 @@ def __init__(
                     self.loss[task_key] = loss[task_key]
         self.inference_only = self.loss is None
 
-    def share_params(self, shared_links, resume=False) -> None:
+    def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2, resume=False) -> None:
         """
         Share the parameters of classes following rules defined in shared_links during multitask training.
         If not start from checkpoint (resume is False),
@@ -129,8 +129,9 @@ def share_params(self, shared_links, resume=False) -> None:
                         link_class = self.model[
                             model_key_link
                         ].atomic_model.__getattr__(class_type_link)
+                        frac_prob = model_key_prob_map[model_key_link]/model_key_prob_map[model_key_base]
                         link_class.share_params(
-                            base_class, shared_level_link, resume=resume
+                            base_class, shared_level_link, model_prob=frac_prob, protection=data_stat_protect, resume=resume
                         )
                         log.warning(
                             f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"
diff --git a/deepmd/utils/env_mat_stat.py b/deepmd/utils/env_mat_stat.py
index ecc0b7b62f..3fa4d7d410 100644
--- a/deepmd/utils/env_mat_stat.py
+++ b/deepmd/utils/env_mat_stat.py
@@ -48,6 +48,13 @@ def __add__(self, other: "StatItem") -> "StatItem":
             squared_sum=self.squared_sum + other.squared_sum,
         )
 
+    def __mul__(self, scalar: float) -> "StatItem":
+        return StatItem(
+            number=self.number * scalar,
+            sum=self.sum * scalar,
+            squared_sum=self.squared_sum * scalar,
+        )
+
     def compute_avg(self, default: float = 0) -> float:
         """Compute the average of the environment matrix.
 

From 82646e9ff458bcc039c7a40c070d854e9c83b37c Mon Sep 17 00:00:00 2001
From: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com>
Date: Wed, 30 Jul 2025 17:52:52 +0800
Subject: [PATCH 04/27] delete torch.jit.export of get_default_fparam (#51)

---
 deepmd/pt/model/atomic_model/dp_atomic_model.py | 2 +-
 deepmd/pt/model/model/make_model.py             | 3 +--
 deepmd/pt/model/task/fitting.py                 | 4 ++--
 deepmd/pt/train/training.py                     | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index e19670cf90..5bf61a01ab 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -312,7 +312,7 @@ def get_dim_fparam(self) -> int:
     def has_default_fparam(self) -> bool:
         return self.fitting_net.has_default_fparam()
 
-    def get_default_fparam(self) -> Optional[np.array]:
+    def get_default_fparam(self) -> Optional[torch.Tensor]:
         return self.fitting_net.get_default_fparam()
 
     def get_dim_aparam(self) -> int:
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index 63cf7db6fe..d58261a481 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -527,8 +527,7 @@ def get_dim_fparam(self) -> int:
         def has_default_fparam(self) -> bool:
             return self.atomic_model.has_default_fparam()
 
-        @torch.jit.export
-        def get_default_fparam(self) -> Optional[np.array]:
+        def get_default_fparam(self) -> Optional[torch.Tensor]:
             return self.atomic_model.get_default_fparam()
 
         @torch.jit.export
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 4c5c3a02da..35c26c376c 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -592,8 +592,8 @@ def get_dim_fparam(self) -> int:
     def has_default_fparam(self) -> bool:
         return self.default_fparam is not None
 
-    def get_default_fparam(self) -> Optional[np.array]:
-        return self.default_fparam_tensor.cpu().numpy()
+    def get_default_fparam(self) -> Optional[torch.Tensor]:
+        return self.default_fparam_tensor
 
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index a0f4ca3f43..589fe620ee 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -1208,7 +1208,7 @@ def print_on_training(
 def get_additional_data_requirement(_model):
     additional_data_requirement = []
     if _model.get_dim_fparam() > 0:
-        _fparam_default = _model.get_default_fparam() if _model.has_default_fparam() else 0.0
+        _fparam_default = _model.get_default_fparam().cpu().numpy() if _model.has_default_fparam() else 0.0
         fparam_requirement_items = [
             DataRequirementItem(
                 "fparam",

From 13ee082661e03d13b4d460bec936cc838edd06fd Mon Sep 17 00:00:00 2001
From: anyangml <anyangpeng.ca@gmail.com>
Date: Tue, 21 Oct 2025 02:31:49 +0000
Subject: [PATCH 05/27] add edge readout

---
 .../descriptor/make_base_descriptor.py        |   7 +
 deepmd/dpmodel/fitting/make_base_fitting.py   |   3 +
 deepmd/dpmodel/utils/learning_rate.py         |  33 ++++
 .../pt/model/atomic_model/dp_atomic_model.py  |  32 +++-
 deepmd/pt/model/descriptor/dpa1.py            |   7 +
 deepmd/pt/model/descriptor/dpa3.py            |   7 +
 deepmd/pt/model/descriptor/repflows.py        |  18 ++
 deepmd/pt/model/model/__init__.py             |   6 +-
 deepmd/pt/model/task/ener.py                  | 162 ++++++++++++++++++
 deepmd/pt/model/task/invar_fitting.py         |   2 +
 deepmd/pt/train/training.py                   |  14 +-
 deepmd/pt/utils/learning_rate.py              |   2 +
 deepmd/utils/argcheck.py                      | 109 +++++++++++-
 deepmd/utils/path.py                          |   2 +-
 14 files changed, 386 insertions(+), 18 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py
index f45e85e516..97cc7abf65 100644
--- a/deepmd/dpmodel/descriptor/make_base_descriptor.py
+++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py
@@ -148,6 +148,13 @@ def compute_input_stats(
             """Update mean and stddev for descriptor elements."""
             raise NotImplementedError
 
+        def get_norm_fact(self) -> list[float]:
+            """Returns the norm factor."""
+            raise NotImplementedError
+
+        def get_additional_output_for_fitting(self):
+            raise NotImplementedError
+            
         def enable_compression(
             self,
             min_nbor_dist: float,
diff --git a/deepmd/dpmodel/fitting/make_base_fitting.py b/deepmd/dpmodel/fitting/make_base_fitting.py
index 201b5e27d1..ccd10a8e11 100644
--- a/deepmd/dpmodel/fitting/make_base_fitting.py
+++ b/deepmd/dpmodel/fitting/make_base_fitting.py
@@ -67,6 +67,9 @@ def compute_output_stats(self, merged) -> NoReturn:
             """Update the output bias for fitting net."""
             raise NotImplementedError
 
+        def need_additional_input(self) -> bool:
+            return False
+            
         @abstractmethod
         def get_type_map(self) -> list[str]:
             """Get the name to each type of atoms."""
diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py
index 90c18fca22..a19069f938 100644
--- a/deepmd/dpmodel/utils/learning_rate.py
+++ b/deepmd/dpmodel/utils/learning_rate.py
@@ -51,3 +51,36 @@ def value(self, step) -> np.float64:
         if step_lr < self.min_lr:
             step_lr = self.min_lr
         return step_lr
+
+class LearningRateWSD:
+    def __init__(
+        self,
+        start_lr,
+        stop_lr,
+        stop_steps,
+        decay_mode="85:10:5",  # stable-decay-stable
+        **kwargs,
+    ) -> None:
+        self.start_lr = start_lr
+        self.stop_lr = stop_lr
+        self.stop_steps = stop_steps
+        self.decay_mode = [float(ii) for ii in decay_mode.split(":")]
+        assert len(self.decay_mode) == 3
+        self.decay_start_rate = self.decay_mode[0] / sum(self.decay_mode)
+        self.decay_end_rate = (self.decay_mode[0] + self.decay_mode[1]) / sum(
+            self.decay_mode
+        )
+    def value(self, step) -> np.float64:
+        if step < self.decay_start_rate * self.stop_steps:
+            return self.start_lr
+        elif step >= self.decay_end_rate * self.stop_steps:
+            return self.stop_lr
+        else:
+            # linear decay
+            decay_rate = (self.start_lr - self.stop_lr) / (
+                self.decay_end_rate * self.stop_steps
+                - self.decay_start_rate * self.stop_steps
+            )
+            return self.start_lr - decay_rate * (
+                step - self.decay_start_rate * self.stop_steps
+            )
\ No newline at end of file
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 5bf61a01ab..e2be1eb097 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -247,15 +247,29 @@ def forward_atomic(
         if self.enable_eval_descriptor_hook:
             self.eval_descriptor_list.append(descriptor.detach())
         # energy, force
-        fit_ret = self.fitting_net(
-            descriptor,
-            atype,
-            gr=rot_mat,
-            g2=g2,
-            h2=h2,
-            fparam=fparam,
-            aparam=aparam,
-        )
+        if not self.fitting_net.need_additional_input():
+            fit_ret = self.fitting_net(
+                descriptor,
+                atype,
+                gr=rot_mat,
+                g2=g2,
+                h2=h2,
+                fparam=fparam,
+                aparam=aparam,
+            )
+        else:
+            add_input = self.descriptor.get_additional_output_for_fitting()
+            fit_ret = self.fitting_net(
+                descriptor,
+                atype,
+                gr=rot_mat,
+                g2=g2,
+                h2=h2,
+                fparam=fparam,
+                aparam=aparam,
+                sw=sw,
+                edge_index=add_input.get("edge_index", None),
+            )
         return fit_ret
 
     def get_out_bias(self) -> torch.Tensor:
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 9c1e144f48..97b1e29da3 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -353,6 +353,13 @@ def get_dim_out(self) -> int:
 
     def get_dim_emb(self) -> int:
         return self.se_atten.dim_emb
+    
+    def get_norm_fact(self) -> list[float]:
+        """Returns the norm factor."""
+        return [float(self.get_nnei())]
+
+    def get_additional_output_for_fitting(self) -> dict[str, Optional[torch.Tensor]]:
+        return {}
 
     def mixed_types(self) -> bool:
         """If true, the descriptor
diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index dd2da9a3c8..df8016b0a1 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -245,6 +245,13 @@ def get_dim_out(self) -> int:
     def get_dim_emb(self) -> int:
         """Returns the embedding dimension of this descriptor."""
         return self.repflows.dim_emb
+    
+    def get_norm_fact(self) -> list[float]:
+        """Returns the norm factor."""
+        return self.repflows.get_norm_fact()
+
+    def get_additional_output_for_fitting(self):
+        return self.repflows.get_additional_output_for_fitting()
 
     def mixed_types(self) -> bool:
         """If true, the descriptor
diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py
index 0bcca7a132..0d453c8aa0 100644
--- a/deepmd/pt/model/descriptor/repflows.py
+++ b/deepmd/pt/model/descriptor/repflows.py
@@ -254,6 +254,8 @@ def __init__(
         self.use_exp_switch = use_exp_switch
         self.use_dynamic_sel = use_dynamic_sel
         self.sel_reduce_factor = sel_reduce_factor
+        self.dynamic_e_sel = self.nnei / self.sel_reduce_factor
+        self.dynamic_a_sel = self.a_sel / self.sel_reduce_factor
         if self.use_dynamic_sel and not self.smooth_edge_update:
             raise NotImplementedError(
                 "smooth_edge_update must be True when use_dynamic_sel is True!"
@@ -321,6 +323,7 @@ def __init__(
                 )
             )
         self.layers = torch.nn.ModuleList(layers)
+        self.additional_output_for_fitting: dict[str, Optional[torch.Tensor]] = {}
 
         wanted_shape = (self.ntypes, self.nnei, 4)
         mean = torch.zeros(wanted_shape, dtype=self.prec, device=env.DEVICE)
@@ -330,6 +333,8 @@ def __init__(
         self.register_buffer("mean", mean)
         self.register_buffer("stddev", stddev)
         self.stats = None
+    
+    additional_output_for_fitting: dict[str, Optional[torch.Tensor]]
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
@@ -362,6 +367,17 @@ def get_dim_in(self) -> int:
     def get_dim_emb(self) -> int:
         """Returns the embedding dimension e_dim."""
         return self.e_dim
+    
+    def get_additional_output_for_fitting(self):
+        return self.additional_output_for_fitting
+
+    def get_norm_fact(self) -> list[float]:
+        """Returns the norm factor."""
+        return [
+            float(self.dynamic_e_sel if self.use_dynamic_sel else self.nnei),
+            # float(self.dynamic_a_sel if self.use_dynamic_sel else self.a_sel),
+        ]
+
 
     def __setitem__(self, key, value) -> None:
         if key in ("avg", "data_avg", "davg"):
@@ -535,10 +551,12 @@ def forward(
             angle_input = angle_input[a_nlist_mask]
             # n_angle x 1
             a_sw = (a_sw[:, :, :, None] * a_sw[:, :, None, :])[a_nlist_mask]
+            self.additional_output_for_fitting["edge_index"] = edge_index
         else:
             # avoid jit assertion
             edge_index = torch.zeros([2, 1], device=nlist.device, dtype=nlist.dtype)
             angle_index = torch.zeros([3, 1], device=nlist.device, dtype=nlist.dtype)
+            self.additional_output_for_fitting["edge_index"] = None
         # get edge and angle embedding
         # nb x nloc x nnei x e_dim [OR] n_edge x e_dim
         if not self.edge_init_use_dist:
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 8d451f087f..d78a1ea0b5 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -90,8 +90,10 @@ def _get_standard_model_components(model_params, ntypes):
     fitting_net["ntypes"] = descriptor.get_ntypes()
     fitting_net["type_map"] = copy.deepcopy(model_params["type_map"])
     fitting_net["mixed_types"] = descriptor.mixed_types()
-    if fitting_net["type"] in ["dipole", "polar"]:
+    if fitting_net["type"] in ["dipole", "polar", "ener_readout"]:
         fitting_net["embedding_width"] = descriptor.get_dim_emb()
+    if fitting_net["type"] in ["ener_readout"]:
+        fitting_net["norm_fact"] = descriptor.get_norm_fact()
     fitting_net["dim_descrpt"] = descriptor.get_dim_out()
     grad_force = "direct" not in fitting_net["type"]
     if not grad_force:
@@ -262,7 +264,7 @@ def get_standard_model(model_params):
         modelcls = PolarModel
     elif fitting_net_type == "dos":
         modelcls = DOSModel
-    elif fitting_net_type in ["ener", "direct_force_ener"]:
+    elif fitting_net_type in ["ener", "direct_force_ener", "ener_readout"]:
         modelcls = EnergyModel
     elif fitting_net_type == "property":
         modelcls = PropertyModel
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 5e993fec1b..3968e21cdc 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -13,9 +13,19 @@
     OutputVariableDef,
     fitting_check_output,
 )
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pt.model.network.mlp import (
+    FittingNet,
+    NetworkCollection,
+)
 from deepmd.pt.model.network.network import (
     ResidualDeep,
 )
+from deepmd.pt.model.network.utils import (
+    aggregate,
+)
 from deepmd.pt.model.task.fitting import (
     Fitting,
     GeneralFitting,
@@ -259,3 +269,155 @@ def forward(
             "energy": outs.to(env.GLOBAL_PT_FLOAT_PRECISION),
             "dforce": vec_out,
         }
+
+        
+@Fitting.register("ener_readout")
+@fitting_check_output
+class EnergyFittingNetReadout(InvarFitting):
+    def __init__(
+        self,
+        ntypes: int,
+        dim_descrpt: int,
+        neuron: list[int] = [128, 128, 128],
+        bias_atom_e: Optional[torch.Tensor] = None,
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        dim_case_embd: int = 0,
+        embedding_width: int = 128,
+        activation_function: str = "tanh",
+        precision: str = DEFAULT_PRECISION,
+        mixed_types: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        norm_fact: list[float] = [120.0],
+        add_edge_readout: bool = True,
+        slim_edge_readout: bool = False,
+        **kwargs,
+    ) -> None:
+        """Construct a fitting net for energy.
+
+        Args:
+        - ntypes: Element count.
+        - embedding_width: Embedding width per atom.
+        - neuron: Number of neurons in each hidden layers of the fitting net.
+        - bias_atom_e: Average energy per atom for each element.
+        - resnet_dt: Using time-step in the ResNet construction.
+        """
+        self.add_edge_readout = add_edge_readout
+        super().__init__(
+            "energy",
+            ntypes,
+            dim_descrpt,
+            1,
+            neuron=neuron,
+            bias_atom_e=bias_atom_e,
+            resnet_dt=resnet_dt,
+            numb_fparam=numb_fparam,
+            numb_aparam=numb_aparam,
+            dim_case_embd=dim_case_embd,
+            activation_function=activation_function,
+            precision=precision,
+            mixed_types=mixed_types,
+            seed=seed,
+            type_map=type_map,
+            **kwargs,
+        )
+
+        # embedding for edge readout
+        self.embedding_width = embedding_width
+        self.slim_edge_readout = slim_edge_readout
+        self.norm_e_fact = norm_fact[0]
+
+        if self.add_edge_readout:
+            self.edge_embed = NetworkCollection(
+                1 if not self.mixed_types else 0,
+                self.ntypes,
+                network_type="fitting_network",
+                networks=[
+                    FittingNet(
+                        self.embedding_width,
+                        1,
+                        self.neuron if not self.slim_edge_readout else self.neuron[:1],
+                        self.activation_function,
+                        self.resnet_dt,
+                        self.precision,
+                        bias_out=True,
+                        seed=child_seed(self.seed + 100, ii),
+                    )
+                    for ii in range(self.ntypes if not self.mixed_types else 1)
+                ],
+            )
+        else:
+            self.edge_embed = None
+
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = self.trainable
+
+    # make jit happy with torch 2.0.0
+    exclude_types: list[int]
+
+    def need_additional_input(self) -> bool:
+        return True
+
+    def serialize(self) -> dict:
+        raise NotImplementedError
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "EnergyFittingNetReadout":
+        raise NotImplementedError
+
+    def forward(
+        self,
+        descriptor: torch.Tensor,
+        atype: torch.Tensor,
+        gr: Optional[torch.Tensor] = None,
+        g2: Optional[torch.Tensor] = None,
+        h2: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        sw: Optional[torch.Tensor] = None,
+        edge_index: Optional[torch.Tensor] = None,
+    ):
+        """Based on embedding net output, alculate total energy.
+
+        Args:
+        - inputs: Embedding matrix. Its shape is [nframes, natoms[0], self.dim_descrpt].
+        - natoms: Tell atom count and element count. Its shape is [2+self.ntypes].
+
+        Returns
+        -------
+        - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
+        """
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
+            self.var_name
+        ]
+        nf, nloc, _ = descriptor.shape
+
+        if self.add_edge_readout:
+            assert g2 is not None
+            assert sw is not None
+            assert self.edge_embed is not None
+            # nf x nloc x nnei x d [OR] nedge x d
+            edge_feature = g2
+            # nf x nloc x nnei x 1 [OR] nedge x 1
+            edge_atomic_contrib = self.edge_embed.networks[0](edge_feature)
+            # nf x nloc x nnei x 1 [OR] nedge x 1
+            edge_atomic_contrib = edge_atomic_contrib * sw.unsqueeze(-1)
+            if edge_index is not None:
+                # use dynamic sel
+                n2e_index, n_ext2e_index = edge_index[0], edge_index[1]
+                # nf x nloc x 1
+                edge_energy = aggregate(
+                    edge_atomic_contrib,
+                    n2e_index,
+                    average=False,
+                    num_owner=nf * nloc,
+                ).reshape(nf, nloc, 1)
+            else:
+                # nf x nloc x 1
+                edge_energy = torch.sum(edge_atomic_contrib, dim=-2)
+            # energy
+            out = out + edge_energy / self.norm_e_fact
+        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
\ No newline at end of file
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index f9ab2265f8..ede0315e87 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -172,6 +172,8 @@ def forward(
         h2: Optional[torch.Tensor] = None,
         fparam: Optional[torch.Tensor] = None,
         aparam: Optional[torch.Tensor] = None,
+        sw: Optional[torch.Tensor] = None,
+        edge_index: Optional[torch.Tensor] = None,
     ):
         """Based on embedding net output, alculate total energy.
 
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 589fe620ee..e389252dc9 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -61,6 +61,7 @@
 )
 from deepmd.pt.utils.learning_rate import (
     LearningRateExp,
+    LearningRateWSD
 )
 from deepmd.pt.utils.stat import (
     make_stat_input,
@@ -258,12 +259,15 @@ def get_sample():
             return get_sample
 
         def get_lr(lr_params):
-            assert lr_params.get("type", "exp") == "exp", (
-                "Only learning rate `exp` is supported!"
-            )
+            lr_type = lr_params.get("type", "exp")
             lr_params["stop_steps"] = self.num_steps - self.warmup_steps
-            lr_exp = LearningRateExp(**lr_params)
-            return lr_exp
+            if lr_type == "exp":
+                lr_schedule = LearningRateExp(**lr_params)
+            elif lr_type == "wsd":
+                lr_schedule = LearningRateWSD(**lr_params)
+            else:
+                raise ValueError(f"Not supported learning rate type '{lr_type}'")
+            return lr_schedule
 
         # Optimizer
         if self.multi_task and training_params.get("optim_dict", None) is not None:
diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py
index 3502434bc0..ed3fa8e519 100644
--- a/deepmd/pt/utils/learning_rate.py
+++ b/deepmd/pt/utils/learning_rate.py
@@ -1,8 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from deepmd.dpmodel.utils.learning_rate import (
     LearningRateExp,
+    LearningRateWSD,
 )
 
 __all__ = [
     "LearningRateExp",
+    "LearningRateWSD",
 ]
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 6d7285593e..7ef7d02741 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2443,13 +2443,24 @@ def learning_rate_exp():
     ]
     return args
 
+def learning_rate_wsd():
+    doc_start_lr = "The learning rate at the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training. "
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-5, doc=doc_stop_lr),
+        Argument("decay_mode", str, optional=True, default="85:10:5"),
+    ]
+    return args
 
 def learning_rate_variant_type_args():
     doc_lr = "The type of the learning rate."
 
     return Variant(
         "type",
-        [Argument("exp", dict, learning_rate_exp())],
+        [Argument("exp", dict, learning_rate_exp()),
+        Argument("wsd", dict, learning_rate_wsd())],
         optional=True,
         default_tag="exp",
         doc=doc_lr,
@@ -2766,6 +2777,102 @@ def loss_ener_spin():
     ]
 
 
+@fitting_args_plugin.register("ener_readout")
+def fitting_ener_readout():
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
+    doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\
+- bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
+- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
+    doc_layer_name = (
+        "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
+        "If two layers, either in the same fitting or different fittings, "
+        "have the same name, they will share the same neural network parameters. "
+        "The shape of these layers should be the same. "
+        "If null is given for a layer, parameters will not be shared."
+    )
+    doc_use_aparam_as_mask = (
+        "Whether to use the aparam as a mask in input."
+        "If True, the aparam will not be used in fitting net for embedding."
+        "When descrpt is se_a_mask, the aparam will be used as a mask to indicate the input atom is real/virtual. And use_aparam_as_mask should be set to True."
+    )
+
+    return [
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "dim_case_embd",
+            int,
+            optional=True,
+            default=0,
+            doc=doc_only_pt_supported + doc_dim_case_embd,
+        ),
+        Argument(
+            "neuron",
+            list[int],
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable",
+            [list[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
+        ),
+        Argument(
+            "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "atom_ener",
+            list[Optional[float]],
+            optional=True,
+            default=[],
+            doc=doc_atom_ener,
+        ),
+        Argument("layer_name", list[str], optional=True, doc=doc_layer_name),
+        Argument(
+            "use_aparam_as_mask",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_aparam_as_mask,
+        ),
+        Argument(
+            "add_edge_readout",
+            bool,
+            optional=True,
+            default=True,
+        ),
+        Argument(
+            "slim_edge_readout",
+            bool,
+            optional=True,
+            default=False,
+        ),
+    ]
+
+
 @loss_args_plugin.register("dos")
 def loss_dos():
     doc_start_pref_dos = start_pref("Density of State (DOS)")
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index 87a44aa70d..57a6708d76 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -329,7 +329,7 @@ def _load_h5py(cls, path: str, mode: str = "r") -> h5py.File:
         # this method has cache to avoid duplicated
         # loading from different DPH5Path
         # However the file will be never closed?
-        return h5py.File(path, mode)
+        return h5py.File(path, mode, locking=False)
 
     def load_numpy(self) -> np.ndarray:
         """Load NumPy array.

From e14804d5019da294e49cbbda567b35ebde726289 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 15 Aug 2025 18:49:09 +0800
Subject: [PATCH 06/27] add add_chg_spin_ebd

---
 .../pt/model/atomic_model/dp_atomic_model.py  |  6 ++-
 deepmd/pt/model/descriptor/dpa3.py            | 48 ++++++++++++++++++-
 deepmd/utils/argcheck.py                      | 14 +++++-
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index e2be1eb097..b90aa113ce 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -6,7 +6,6 @@
 )
 
 import torch
-import numpy as np
 
 from deepmd.dpmodel import (
     FittingOutputDef,
@@ -242,6 +241,7 @@ def forward_atomic(
             nlist,
             mapping=mapping,
             comm_dict=comm_dict,
+            fparam=fparam,
         )
         assert descriptor is not None
         if self.enable_eval_descriptor_hook:
@@ -315,7 +315,9 @@ def wrapped_sampler():
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
         self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect, stat_file_path=stat_file_path
+            wrapped_sampler,
+            protection=self.data_stat_protect,
+            stat_file_path=stat_file_path,
         )
         self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index df8016b0a1..3e532a61ae 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -31,6 +31,7 @@
     UpdateSel,
 )
 from deepmd.pt.utils.utils import (
+    ActivationFn,
     to_numpy_array,
 )
 from deepmd.utils.data_system import (
@@ -119,6 +120,7 @@ def __init__(
         use_tebd_bias: bool = False,
         use_loc_mapping: bool = True,
         type_map: Optional[list[str]] = None,
+        add_chg_spin_ebd: bool = False,
     ) -> None:
         super().__init__()
 
@@ -170,8 +172,10 @@ def init_subclass_params(sub_data, sub_class):
             precision=precision,
             seed=child_seed(seed, 1),
         )
+        self.act = ActivationFn(activation_function)
 
         self.use_econf_tebd = use_econf_tebd
+        self.add_chg_spin_ebd = add_chg_spin_ebd
         self.use_loc_mapping = use_loc_mapping
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
@@ -188,6 +192,33 @@ def init_subclass_params(sub_data, sub_class):
         self.concat_output_tebd = concat_output_tebd
         self.precision = precision
         self.prec = PRECISION_DICT[self.precision]
+
+        if self.add_chg_spin_ebd:
+            # -100 ~ 100 is a conservative bound
+            self.chg_embedding = TypeEmbedNet(
+                200,
+                self.tebd_dim,
+                precision=precision,
+                seed=child_seed(seed, 3),
+            )
+            # 100 is a conservative upper bound
+            self.spin_embedding = TypeEmbedNet(
+                100,
+                self.tebd_dim,
+                precision=precision,
+                seed=child_seed(seed, 4),
+            )
+            self.mix_cs_mlp = MLPLayer(
+                2 * self.tebd_dim,
+                self.tebd_dim,
+                precision=precision,
+                seed=child_seed(seed, 3),
+            )
+        else:
+            self.chg_embedding = None
+            self.spin_embedding = None
+            self.mix_cs_mlp = None
+
         self.exclude_types = exclude_types
         self.env_protection = env_protection
         self.trainable = trainable
@@ -245,7 +276,7 @@ def get_dim_out(self) -> int:
     def get_dim_emb(self) -> int:
         """Returns the embedding dimension of this descriptor."""
         return self.repflows.dim_emb
-    
+
     def get_norm_fact(self) -> list[float]:
         """Returns the norm factor."""
         return self.repflows.get_norm_fact()
@@ -457,6 +488,7 @@ def forward(
         nlist: torch.Tensor,
         mapping: Optional[torch.Tensor] = None,
         comm_dict: Optional[dict[str, torch.Tensor]] = None,
+        fparam: Optional[torch.Tensor] = None,
     ):
         """Compute the descriptor.
 
@@ -500,6 +532,20 @@ def forward(
             node_ebd_ext = self.type_embedding(extended_atype[:, :nloc])
         else:
             node_ebd_ext = self.type_embedding(extended_atype)
+
+        if self.add_chg_spin_ebd:
+            assert fparam is not None
+            assert self.chg_embedding is not None
+            assert self.spin_embedding is not None
+            charge = fparam[:, 0].to(dtype=torch.int64) + 100
+            spin = fparam[:, 1].to(dtype=torch.int64)
+            chg_ebd = self.chg_embedding(charge)
+            spin_ebd = self.spin_embedding(spin)
+            sys_cs_embd = self.act(
+                self.mix_cs_mlp(torch.cat((chg_ebd, spin_ebd), dim=-1))
+            )
+            node_ebd_ext = node_ebd_ext + sys_cs_embd.unsqueeze(1)
+
         node_ebd_inp = node_ebd_ext[:, :nloc, :]
         # repflows
         node_ebd, edge_ebd, h2, rot_mat, sw = self.repflows(
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 7ef7d02741..d9e6798167 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1387,6 +1387,12 @@ def descrpt_dpa3_args():
             default=False,
             doc=doc_concat_output_tebd,
         ),
+        Argument(
+            "add_chg_spin_ebd",
+            bool,
+            optional=True,
+            default=False,
+        ),
         Argument(
             "activation_function",
             str,
@@ -2443,6 +2449,7 @@ def learning_rate_exp():
     ]
     return args
 
+
 def learning_rate_wsd():
     doc_start_lr = "The learning rate at the start of the training."
     doc_stop_lr = "The desired learning rate at the end of the training. "
@@ -2454,13 +2461,16 @@ def learning_rate_wsd():
     ]
     return args
 
+
 def learning_rate_variant_type_args():
     doc_lr = "The type of the learning rate."
 
     return Variant(
         "type",
-        [Argument("exp", dict, learning_rate_exp()),
-        Argument("wsd", dict, learning_rate_wsd())],
+        [
+            Argument("exp", dict, learning_rate_exp()),
+            Argument("wsd", dict, learning_rate_wsd()),
+        ],
         optional=True,
         default_tag="exp",
         doc=doc_lr,

From 58d8c10d8b0ae1c8dda40eee65edf7cf880a4029 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 22 Oct 2025 20:00:41 +0800
Subject: [PATCH 07/27] update add_chg_spin_ebd for default fparam

---
 deepmd/pt/model/atomic_model/dp_atomic_model.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index b90aa113ce..20ecac3195 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -235,13 +235,24 @@ def forward_atomic(
         atype = extended_atype[:, :nloc]
         if self.do_grad_r() or self.do_grad_c():
             extended_coord.requires_grad_(True)
+
+        if self.fitting_net.get_dim_fparam() > 0 and fparam is None:
+            # use default fparam
+            default_fparam_tensor = self.fitting_net.get_default_fparam()
+            assert default_fparam_tensor is not None
+            fparam_input_for_des = torch.tile(
+                default_fparam_tensor.unsqueeze(0), [nframes, 1]
+            )
+        else:
+            fparam_input_for_des = fparam
+
         descriptor, rot_mat, g2, h2, sw = self.descriptor(
             extended_coord,
             extended_atype,
             nlist,
             mapping=mapping,
             comm_dict=comm_dict,
-            fparam=fparam,
+            fparam=fparam_input_for_des,
         )
         assert descriptor is not None
         if self.enable_eval_descriptor_hook:

From c98b1fc1b50e0e552c4ceb22ed9a585ebd16bbec Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sun, 26 Oct 2025 15:55:18 +0800
Subject: [PATCH 08/27] fix multitask

---
 deepmd/pt/model/descriptor/dpa3.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index 3e532a61ae..9010638608 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -322,6 +322,9 @@ def share_params(self, base_class, shared_level, resume=False) -> None:
         # share all parameters in type_embedding, repflow
         if shared_level == 0:
             self._modules["type_embedding"] = base_class._modules["type_embedding"]
+            for kk in ["chg_embedding", "spin_embedding", "mix_cs_mlp"]:
+                if kk in self._modules:
+                    self._modules[kk] = base_class._modules[kk]
             self.repflows.share_params(base_class.repflows, 0, resume=resume)
         # shared_level: 1
         # share all parameters in type_embedding

From 82286fd5359448315d54e1e775ebc52c0f69c789 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 10 Sep 2025 16:50:10 +0800
Subject: [PATCH 09/27] add update_use_layernorm

---
 deepmd/dpmodel/descriptor/dpa3.py           |  2 ++
 deepmd/pt/model/descriptor/dpa3.py          |  1 +
 deepmd/pt/model/descriptor/repflow_layer.py | 21 +++++++++++++++++++++
 deepmd/pt/model/descriptor/repflows.py      |  3 +++
 deepmd/utils/argcheck.py                    |  6 ++++++
 5 files changed, 33 insertions(+)

diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py
index 79fa6c8b68..1464b40391 100644
--- a/deepmd/dpmodel/descriptor/dpa3.py
+++ b/deepmd/dpmodel/descriptor/dpa3.py
@@ -177,6 +177,7 @@ def __init__(
         use_exp_switch: bool = False,
         use_dynamic_sel: bool = False,
         sel_reduce_factor: float = 10.0,
+        update_use_layernorm: bool = False,
     ) -> None:
         self.n_dim = n_dim
         self.e_dim = e_dim
@@ -207,6 +208,7 @@ def __init__(
         self.use_exp_switch = use_exp_switch
         self.use_dynamic_sel = use_dynamic_sel
         self.sel_reduce_factor = sel_reduce_factor
+        self.update_use_layernorm = update_use_layernorm
 
     def __getitem__(self, key):
         if hasattr(self, key):
diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index 9010638608..f824e0842b 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -167,6 +167,7 @@ def init_subclass_params(sub_data, sub_class):
             use_dynamic_sel=self.repflow_args.use_dynamic_sel,
             sel_reduce_factor=self.repflow_args.sel_reduce_factor,
             use_loc_mapping=use_loc_mapping,
+            update_use_layernorm=self.repflow_args.update_use_layernorm,
             exclude_types=exclude_types,
             env_protection=env_protection,
             precision=precision,
diff --git a/deepmd/pt/model/descriptor/repflow_layer.py b/deepmd/pt/model/descriptor/repflow_layer.py
index 36e738b8b2..ba3158e72b 100644
--- a/deepmd/pt/model/descriptor/repflow_layer.py
+++ b/deepmd/pt/model/descriptor/repflow_layer.py
@@ -58,6 +58,7 @@ def __init__(
         use_dynamic_sel: bool = False,
         sel_reduce_factor: float = 10.0,
         smooth_edge_update: bool = False,
+        update_use_layernorm: bool = False,
         activation_function: str = "silu",
         update_style: str = "res_residual",
         update_residual: float = 0.1,
@@ -96,6 +97,7 @@ def __init__(
         self.update_style = update_style
         self.update_residual = update_residual
         self.update_residual_init = update_residual_init
+        self.update_use_layernorm = update_use_layernorm
         self.a_compress_e_rate = a_compress_e_rate
         self.a_compress_use_split = a_compress_use_split
         self.precision = precision
@@ -194,6 +196,17 @@ def __init__(
                 )
             )
 
+        if self.update_use_layernorm:
+            self.node_layernorm = torch.nn.LayerNorm(self.n_dim)
+            self.edge_layernorm = torch.nn.LayerNorm(self.e_dim)
+            self.angle_layernorm = (
+                torch.nn.LayerNorm(self.a_dim) if self.update_angle else None
+            )
+        else:
+            self.node_layernorm = None
+            self.edge_layernorm = None
+            self.angle_layernorm = None
+
         if self.update_angle:
             self.angle_dim = self.a_dim
             if self.a_compress_rate == 0:
@@ -1117,6 +1130,14 @@ def forward(
 
         # update angle_ebd
         a_updated = self.list_update(a_update_list, "angle")
+        if self.update_use_layernorm:
+            assert self.node_layernorm is not None
+            n_updated = self.node_layernorm(n_updated)
+            assert self.edge_layernorm is not None
+            e_updated = self.edge_layernorm(e_updated)
+            if self.update_angle:
+                assert self.angle_layernorm is not None
+                a_updated = self.angle_layernorm(a_updated)
         return n_updated, e_updated, a_updated
 
     @torch.jit.export
diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py
index 0d453c8aa0..d122f4834d 100644
--- a/deepmd/pt/model/descriptor/repflows.py
+++ b/deepmd/pt/model/descriptor/repflows.py
@@ -217,6 +217,7 @@ def __init__(
         use_dynamic_sel: bool = False,
         sel_reduce_factor: float = 10.0,
         use_loc_mapping: bool = True,
+        update_use_layernorm: bool = False,
         optim_update: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
@@ -283,6 +284,7 @@ def __init__(
         self.precision = precision
         self.epsilon = 1e-4
         self.seed = seed
+        self.update_use_layernorm = update_use_layernorm
 
         self.edge_embd = MLPLayer(
             1, self.e_dim, precision=precision, seed=child_seed(seed, 0)
@@ -319,6 +321,7 @@ def __init__(
                     use_dynamic_sel=self.use_dynamic_sel,
                     sel_reduce_factor=self.sel_reduce_factor,
                     smooth_edge_update=self.smooth_edge_update,
+                    update_use_layernorm=self.update_use_layernorm,
                     seed=child_seed(child_seed(seed, 1), ii),
                 )
             )
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index d9e6798167..4cd43909d6 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1671,6 +1671,12 @@ def dpa3_repflow_args():
             default=10.0,
             doc=doc_sel_reduce_factor,
         ),
+        Argument(
+            "update_use_layernorm",
+            bool,
+            optional=True,
+            default=False,
+        ),
     ]
 
 

From 7c2287e6c8b44af8448a85373eed294a0c79a5b5 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 10 Sep 2025 17:09:47 +0800
Subject: [PATCH 10/27] add GatedMLP

---
 deepmd/dpmodel/descriptor/dpa3.py           |  4 +
 deepmd/pt/model/descriptor/dpa3.py          |  3 +
 deepmd/pt/model/descriptor/repflow_layer.py | 51 ++++++++++---
 deepmd/pt/model/descriptor/repflows.py      | 11 ++-
 deepmd/pt/model/network/mlp.py              | 83 +++++++++++++++++++++
 deepmd/utils/argcheck.py                    | 12 +++
 6 files changed, 149 insertions(+), 15 deletions(-)

diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py
index 1464b40391..b66f78c04c 100644
--- a/deepmd/dpmodel/descriptor/dpa3.py
+++ b/deepmd/dpmodel/descriptor/dpa3.py
@@ -178,6 +178,8 @@ def __init__(
         use_dynamic_sel: bool = False,
         sel_reduce_factor: float = 10.0,
         update_use_layernorm: bool = False,
+        use_gated_mlp: bool = False,
+        gated_mlp_norm: str = "none",
     ) -> None:
         self.n_dim = n_dim
         self.e_dim = e_dim
@@ -209,6 +211,8 @@ def __init__(
         self.use_dynamic_sel = use_dynamic_sel
         self.sel_reduce_factor = sel_reduce_factor
         self.update_use_layernorm = update_use_layernorm
+        self.use_gated_mlp = use_gated_mlp
+        self.gated_mlp_norm = gated_mlp_norm
 
     def __getitem__(self, key):
         if hasattr(self, key):
diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index f824e0842b..2924f7fc0f 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -167,7 +167,10 @@ def init_subclass_params(sub_data, sub_class):
             use_dynamic_sel=self.repflow_args.use_dynamic_sel,
             sel_reduce_factor=self.repflow_args.sel_reduce_factor,
             use_loc_mapping=use_loc_mapping,
+            # followings are new added param
             update_use_layernorm=self.repflow_args.update_use_layernorm,
+            use_gated_mlp=self.repflow_args.use_gated_mlp,
+            gated_mlp_norm=self.repflow_args.gated_mlp_norm,
             exclude_types=exclude_types,
             env_protection=env_protection,
             precision=precision,
diff --git a/deepmd/pt/model/descriptor/repflow_layer.py b/deepmd/pt/model/descriptor/repflow_layer.py
index ba3158e72b..4d52f26a59 100644
--- a/deepmd/pt/model/descriptor/repflow_layer.py
+++ b/deepmd/pt/model/descriptor/repflow_layer.py
@@ -17,6 +17,7 @@
     get_residual,
 )
 from deepmd.pt.model.network.mlp import (
+    GatedMLP,
     MLPLayer,
 )
 from deepmd.pt.model.network.utils import (
@@ -59,6 +60,8 @@ def __init__(
         sel_reduce_factor: float = 10.0,
         smooth_edge_update: bool = False,
         update_use_layernorm: bool = False,
+        use_gated_mlp: bool = False,
+        gated_mlp_norm: str = "none",
         activation_function: str = "silu",
         update_style: str = "res_residual",
         update_residual: float = 0.1,
@@ -98,6 +101,10 @@ def __init__(
         self.update_residual = update_residual
         self.update_residual_init = update_residual_init
         self.update_use_layernorm = update_use_layernorm
+        self.use_gated_mlp = use_gated_mlp
+        if self.use_gated_mlp:
+            assert not optim_update, "Gated MLP does not support optim update!"
+        self.gated_mlp_norm = gated_mlp_norm
         self.a_compress_e_rate = a_compress_e_rate
         self.a_compress_use_split = a_compress_use_split
         self.precision = precision
@@ -160,12 +167,22 @@ def __init__(
             )
 
         # node edge message
-        self.node_edge_linear = MLPLayer(
-            self.edge_info_dim,
-            self.n_multi_edge_message * n_dim,
-            precision=precision,
-            seed=child_seed(seed, 4),
-        )
+        if not self.use_gated_mlp:
+            self.node_edge_linear = MLPLayer(
+                self.edge_info_dim,
+                self.n_multi_edge_message * n_dim,
+                precision=precision,
+                seed=child_seed(seed, 4),
+            )
+        else:
+            self.node_edge_linear = GatedMLP(
+                self.edge_info_dim,
+                self.n_multi_edge_message * n_dim,
+                activation_function=self.activation_function,
+                norm=self.gated_mlp_norm,
+                precision=precision,
+                seed=child_seed(seed, 4),
+            )
         if self.update_style == "res_residual":
             for head_index in range(self.n_multi_edge_message):
                 self.n_residual.append(
@@ -245,12 +262,22 @@ def __init__(
                     self.a_compress_e_linear = None
 
             # edge angle message
-            self.edge_angle_linear1 = MLPLayer(
-                self.angle_dim,
-                self.e_dim,
-                precision=precision,
-                seed=child_seed(seed, 10),
-            )
+            if not self.use_gated_mlp:
+                self.edge_angle_linear1 = MLPLayer(
+                    self.angle_dim,
+                    self.e_dim,
+                    precision=precision,
+                    seed=child_seed(seed, 10),
+                )
+            else:
+                self.edge_angle_linear1 = GatedMLP(
+                    self.angle_dim,
+                    self.e_dim,
+                    activation_function=self.activation_function,
+                    norm=self.gated_mlp_norm,
+                    precision=precision,
+                    seed=child_seed(seed, 10),
+                )
             self.edge_angle_linear2 = MLPLayer(
                 self.e_dim,
                 self.e_dim,
diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py
index d122f4834d..ec375a3acd 100644
--- a/deepmd/pt/model/descriptor/repflows.py
+++ b/deepmd/pt/model/descriptor/repflows.py
@@ -218,6 +218,8 @@ def __init__(
         sel_reduce_factor: float = 10.0,
         use_loc_mapping: bool = True,
         update_use_layernorm: bool = False,
+        use_gated_mlp: bool = False,
+        gated_mlp_norm: str = "none",
         optim_update: bool = True,
         seed: Optional[Union[int, list[int]]] = None,
     ) -> None:
@@ -285,6 +287,8 @@ def __init__(
         self.epsilon = 1e-4
         self.seed = seed
         self.update_use_layernorm = update_use_layernorm
+        self.use_gated_mlp = use_gated_mlp
+        self.gated_mlp_norm = gated_mlp_norm
 
         self.edge_embd = MLPLayer(
             1, self.e_dim, precision=precision, seed=child_seed(seed, 0)
@@ -322,6 +326,8 @@ def __init__(
                     sel_reduce_factor=self.sel_reduce_factor,
                     smooth_edge_update=self.smooth_edge_update,
                     update_use_layernorm=self.update_use_layernorm,
+                    use_gated_mlp=self.use_gated_mlp,
+                    gated_mlp_norm=self.gated_mlp_norm,
                     seed=child_seed(child_seed(seed, 1), ii),
                 )
             )
@@ -336,7 +342,7 @@ def __init__(
         self.register_buffer("mean", mean)
         self.register_buffer("stddev", stddev)
         self.stats = None
-    
+
     additional_output_for_fitting: dict[str, Optional[torch.Tensor]]
 
     def get_rcut(self) -> float:
@@ -370,7 +376,7 @@ def get_dim_in(self) -> int:
     def get_dim_emb(self) -> int:
         """Returns the embedding dimension e_dim."""
         return self.e_dim
-    
+
     def get_additional_output_for_fitting(self):
         return self.additional_output_for_fitting
 
@@ -381,7 +387,6 @@ def get_norm_fact(self) -> list[float]:
             # float(self.dynamic_a_sel if self.use_dynamic_sel else self.a_sel),
         ]
 
-
     def __setitem__(self, key, value) -> None:
         if key in ("avg", "data_avg", "davg"):
             self.mean = value
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
index 22675d6163..463158d7e5 100644
--- a/deepmd/pt/model/network/mlp.py
+++ b/deepmd/pt/model/network/mlp.py
@@ -275,6 +275,89 @@ def check_load_param(ss):
         return obj
 
 
+class GatedMLP(nn.Module):
+    """Gated MLP
+    similar model structure is used in CGCNN and M3GNet.
+    """
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        *,
+        activation_function: Optional[str] = None,
+        norm: str = "batch",
+        bias: bool = True,
+        precision: str = DEFAULT_PRECISION,
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        """Initialize a gated MLP.
+
+        Args:
+            input_dim (int): the input dimension
+            output_dim (int): the output dimension
+            activation_function (str, optional): The name of the activation function to use in
+                the gated MLP. Must be one of "relu", "silu", "tanh", or "gelu".
+                Default = "silu"
+            norm (str, optional): The name of the normalization layer to use on the
+                updated atom features. Must be one of "batch", "layer", or None.
+                Default = "batch"
+            bias (bool): whether to use bias in each Linear layers.
+                Default = True
+        """
+        super().__init__()
+        self.mlp_core = MLPLayer(
+            input_dim,
+            output_dim,
+            bias=bias,
+            precision=precision,
+            seed=seed,
+        )
+        self.mlp_gate = MLPLayer(
+            input_dim,
+            output_dim,
+            bias=bias,
+            precision=precision,
+            seed=seed,
+        )
+        # for jit
+        self.matrix = self.mlp_core.matrix
+        self.bias = self.mlp_core.bias
+        self.act = ActivationFn(activation_function)
+        self.sigmoid = nn.Sigmoid()
+        self.norm1 = find_normalization(name=norm, dim=output_dim)
+        self.norm2 = find_normalization(name=norm, dim=output_dim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs a forward pass through the MLP.
+
+        Args:
+            x (Tensor): a tensor of shape (batch_size, input_dim)
+
+        Returns
+        -------
+        Tensor: a tensor of shape (batch_size, output_dim)
+        """
+        if self.norm1 is None:
+            core = self.act(self.mlp_core(x))
+            gate = self.sigmoid(self.mlp_gate(x))
+        else:
+            core = self.act(self.norm1(self.mlp_core(x)))
+            gate = self.sigmoid(self.norm2(self.mlp_gate(x)))
+        return core * gate
+
+
+def find_normalization(name: str, dim: int | None = None) -> nn.Module | None:
+    """Return an normalization function using name."""
+    if name is None:
+        return None
+    return {
+        "batch": nn.BatchNorm1d(dim),
+        "layer": nn.LayerNorm(dim),
+        "none": None,
+    }.get(name.lower(), None)
+
+
 MLP_ = make_multilayer_network(MLPLayer, nn.Module)
 
 
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 4cd43909d6..2da6dd349a 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1677,6 +1677,18 @@ def dpa3_repflow_args():
             optional=True,
             default=False,
         ),
+        Argument(
+            "use_gated_mlp",
+            bool,
+            optional=True,
+            default=False,
+        ),
+        Argument(
+            "gated_mlp_norm",
+            str,
+            optional=True,
+            default="none",
+        ),
     ]
 
 

From 1404623a4b3395e940483eedd78a6fb0bf147688 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 5 Nov 2025 18:01:58 +0800
Subject: [PATCH 11/27] add add_case_embd

---
 .../pt/model/atomic_model/dp_atomic_model.py  |  6 ++++
 deepmd/pt/model/descriptor/dpa3.py            | 36 +++++++++++++++++--
 deepmd/pt/model/model/__init__.py             |  4 +++
 deepmd/pt/model/task/fitting.py               |  8 +++++
 deepmd/utils/argcheck.py                      | 12 +++++++
 5 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index 20ecac3195..dda10b82a8 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -246,6 +246,11 @@ def forward_atomic(
         else:
             fparam_input_for_des = fparam
 
+        if self.fitting_net.get_dim_case_embd() > 0:
+            case_embd_input_for_des = self.fitting_net.get_case_embd()
+        else:
+            case_embd_input_for_des = None
+
         descriptor, rot_mat, g2, h2, sw = self.descriptor(
             extended_coord,
             extended_atype,
@@ -253,6 +258,7 @@ def forward_atomic(
             mapping=mapping,
             comm_dict=comm_dict,
             fparam=fparam_input_for_des,
+            case_embd=case_embd_input_for_des,
         )
         assert descriptor is not None
         if self.enable_eval_descriptor_hook:
diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py
index 2924f7fc0f..5134177392 100644
--- a/deepmd/pt/model/descriptor/dpa3.py
+++ b/deepmd/pt/model/descriptor/dpa3.py
@@ -121,6 +121,8 @@ def __init__(
         use_loc_mapping: bool = True,
         type_map: Optional[list[str]] = None,
         add_chg_spin_ebd: bool = False,
+        add_case_embd: bool = False,
+        dim_case_embd: int = 0,
     ) -> None:
         super().__init__()
 
@@ -180,6 +182,8 @@ def init_subclass_params(sub_data, sub_class):
 
         self.use_econf_tebd = use_econf_tebd
         self.add_chg_spin_ebd = add_chg_spin_ebd
+        self.add_case_embd = add_case_embd
+        self.dim_case_embd = dim_case_embd
         self.use_loc_mapping = use_loc_mapping
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
@@ -216,13 +220,25 @@ def init_subclass_params(sub_data, sub_class):
                 2 * self.tebd_dim,
                 self.tebd_dim,
                 precision=precision,
-                seed=child_seed(seed, 3),
+                seed=child_seed(seed, 5),
             )
         else:
             self.chg_embedding = None
             self.spin_embedding = None
             self.mix_cs_mlp = None
 
+        if self.add_case_embd:
+            assert self.dim_case_embd > 0
+            self.case_embd_mlp = MLPLayer(
+                self.dim_case_embd,
+                self.tebd_dim,
+                precision=precision,
+                bias=False,
+                seed=child_seed(seed, 6),
+            )
+        else:
+            self.case_embd_mlp = None
+
         self.exclude_types = exclude_types
         self.env_protection = env_protection
         self.trainable = trainable
@@ -326,7 +342,12 @@ def share_params(self, base_class, shared_level, resume=False) -> None:
         # share all parameters in type_embedding, repflow
         if shared_level == 0:
             self._modules["type_embedding"] = base_class._modules["type_embedding"]
-            for kk in ["chg_embedding", "spin_embedding", "mix_cs_mlp"]:
+            for kk in [
+                "chg_embedding",
+                "spin_embedding",
+                "mix_cs_mlp",
+                "case_embd_mlp",
+            ]:
                 if kk in self._modules:
                     self._modules[kk] = base_class._modules[kk]
             self.repflows.share_params(base_class.repflows, 0, resume=resume)
@@ -496,6 +517,7 @@ def forward(
         mapping: Optional[torch.Tensor] = None,
         comm_dict: Optional[dict[str, torch.Tensor]] = None,
         fparam: Optional[torch.Tensor] = None,
+        case_embd: Optional[torch.Tensor] = None,
     ):
         """Compute the descriptor.
 
@@ -511,6 +533,10 @@ def forward(
             The index mapping, mapps extended region index to local region.
         comm_dict
             The data needed for communication for parallel inference.
+        fparam
+            The frame-level parameters. shape: nf x nfparam
+        case_embd
+            The case (dataset) embedding for multitask training with shared fitting. shape: nf x dim_case_embd
 
         Returns
         -------
@@ -553,6 +579,12 @@ def forward(
             )
             node_ebd_ext = node_ebd_ext + sys_cs_embd.unsqueeze(1)
 
+        if self.add_case_embd:
+            assert case_embd is not None
+            assert self.case_embd_mlp is not None
+            case_embd_out = self.case_embd_mlp(case_embd)
+            node_ebd_ext = node_ebd_ext + case_embd_out.unsqueeze(0).unsqueeze(0)
+
         node_ebd_inp = node_ebd_ext[:, :nloc, :]
         # repflows
         node_ebd, edge_ebd, h2, rot_mat, sw = self.repflows(
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index d78a1ea0b5..d8980ba4c7 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -83,6 +83,10 @@ def _get_standard_model_components(model_params, ntypes):
     # descriptor
     model_params["descriptor"]["ntypes"] = ntypes
     model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
+    # explicitly add dim_case_embd if using case embedding
+    if model_params["descriptor"].get("type", "se_e2_a") in ["dpa3"]:
+        dim_case_embd = model_params.get("fitting_net", {}).get("dim_case_embd", 0)
+        model_params["descriptor"]["dim_case_embd"] = dim_case_embd
     descriptor = BaseDescriptor(**model_params["descriptor"])
     # fitting
     fitting_net = model_params.get("fitting_net", {})
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 35c26c376c..4da177c5a1 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -599,6 +599,14 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
 
+    def get_dim_case_embd(self) -> int:
+        """Get the number (dimension) of dataset embedding."""
+        return self.dim_case_embd
+
+    def get_case_embd(self) -> Optional[torch.Tensor]:
+        """Get the dataset embedding."""
+        return self.case_embd
+
     # make jit happy
     exclude_types: list[int]
 
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 2da6dd349a..a94200fbec 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1376,6 +1376,10 @@ def descrpt_dpa3_args():
         "Whether to use local atom index mapping in training or non-parallel inference. "
         "When True, local indexing and mapping are applied to neighbor lists and embeddings during descriptor computation."
     )
+    doc_add_chg_spin_ebd = (
+        "Whether to use charge and spin embedding in the type embedding."
+    )
+    doc_add_case_embd = "Whether to use case (dataset) embedding in the type embedding."
     return [
         # doc_repflow args
         Argument("repflow", dict, dpa3_repflow_args(), doc=doc_repflow),
@@ -1392,6 +1396,14 @@ def descrpt_dpa3_args():
             bool,
             optional=True,
             default=False,
+            doc=doc_add_chg_spin_ebd,
+        ),
+        Argument(
+            "add_case_embd",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_add_case_embd,
         ),
         Argument(
             "activation_function",

From 219266cf6d2ba81dfd1b60551cb4b53b68f9d6f5 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 19 Nov 2025 17:15:16 +0800
Subject: [PATCH 12/27] Update training.py

---
 deepmd/pt/train/training.py | 67 +++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index e389252dc9..126465f42f 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -61,7 +61,7 @@
 )
 from deepmd.pt.utils.learning_rate import (
     LearningRateExp,
-    LearningRateWSD
+    LearningRateWSD,
 )
 from deepmd.pt.utils.stat import (
     make_stat_input,
@@ -345,14 +345,14 @@ def get_lr(lr_params):
                 self.validation_data,
                 self.valid_numb_batch,
             ) = get_data_loader(training_data, validation_data, training_params)
-            training_data.print_summary(
-                "training", to_numpy_array(self.training_dataloader.sampler.weights)
-            )
-            if validation_data is not None:
-                validation_data.print_summary(
-                    "validation",
-                    to_numpy_array(self.validation_dataloader.sampler.weights),
-                )
+            # training_data.print_summary(
+            #     "training", to_numpy_array(self.training_dataloader.sampler.weights)
+            # )
+            # if validation_data is not None:
+            #     validation_data.print_summary(
+            #         "validation",
+            #         to_numpy_array(self.validation_dataloader.sampler.weights),
+            #     )
         else:
             (
                 self.training_dataloader,
@@ -388,20 +388,20 @@ def get_lr(lr_params):
                     training_params["data_dict"][model_key],
                 )
 
-                training_data[model_key].print_summary(
-                    f"training in {model_key}",
-                    to_numpy_array(self.training_dataloader[model_key].sampler.weights),
-                )
-                if (
-                    validation_data is not None
-                    and validation_data[model_key] is not None
-                ):
-                    validation_data[model_key].print_summary(
-                        f"validation in {model_key}",
-                        to_numpy_array(
-                            self.validation_dataloader[model_key].sampler.weights
-                        ),
-                    )
+                # training_data[model_key].print_summary(
+                #     f"training in {model_key}",
+                #     to_numpy_array(self.training_dataloader[model_key].sampler.weights),
+                # )
+                # if (
+                #     validation_data is not None
+                #     and validation_data[model_key] is not None
+                # ):
+                #     validation_data[model_key].print_summary(
+                #         f"validation in {model_key}",
+                #         to_numpy_array(
+                #             self.validation_dataloader[model_key].sampler.weights
+                #         ),
+                #     )
 
         # Learning rate
         self.warmup_steps = training_params.get("warmup_steps", 0)
@@ -604,13 +604,20 @@ def single_model_finetune(
 
         # Multi-task share params
         if shared_links is not None:
-            _data_stat_protect = np.array([model_params["model_dict"][ii].get("data_stat_protect", 1e-2) for ii in model_params["model_dict"]])
-            assert np.allclose(_data_stat_protect, _data_stat_protect[0]), f"Model key 'data_stat_protect' must be the same in each branch when multitask!"
+            _data_stat_protect = np.array(
+                [
+                    model_params["model_dict"][ii].get("data_stat_protect", 1e-2)
+                    for ii in model_params["model_dict"]
+                ]
+            )
+            assert np.allclose(_data_stat_protect, _data_stat_protect[0]), (
+                "Model key 'data_stat_protect' must be the same in each branch when multitask!"
+            )
             self.wrapper.share_params(
                 shared_links,
                 resume=(resuming and not self.finetune_update_stat) or self.rank != 0,
-                model_key_prob_map = dict(zip(self.model_keys, self.model_prob)),
-                data_stat_protect = _data_stat_protect[0]
+                model_key_prob_map=dict(zip(self.model_keys, self.model_prob)),
+                data_stat_protect=_data_stat_protect[0],
             )
 
         if dist.is_available() and dist.is_initialized():
@@ -1212,7 +1219,11 @@ def print_on_training(
 def get_additional_data_requirement(_model):
     additional_data_requirement = []
     if _model.get_dim_fparam() > 0:
-        _fparam_default = _model.get_default_fparam().cpu().numpy() if _model.has_default_fparam() else 0.0
+        _fparam_default = (
+            _model.get_default_fparam().cpu().numpy()
+            if _model.has_default_fparam()
+            else 0.0
+        )
         fparam_requirement_items = [
             DataRequirementItem(
                 "fparam",

From e2777c03c40e9b487f0f20fab29318af52c50be3 Mon Sep 17 00:00:00 2001
From: Chun Cai <amoycaic@gmail.com>
Date: Fri, 29 Aug 2025 15:17:34 +0800
Subject: [PATCH 13/27] feat: handle masked forces in test (#4893)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- New Features
- Added per-atom weighting for force evaluation: computes and reports
weighted MAE/RMSE alongside unweighted metrics, includes weighted
metrics in system-average summaries, logs weighted force metrics, and
safely handles zero-weight cases. Also propagates the per-atom weight
field into reporting.

- Tests
- Added end-to-end tests validating weighted vs unweighted force
MAE/RMSE and verifying evaluator outputs when using per-atom weight
masks.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 deepmd/entrypoints/test.py      | 24 +++++++-
 source/tests/pt/test_dp_test.py | 99 +++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index 5b22d16be4..69b1704471 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -291,6 +291,7 @@ def test_ener(
 
     data.add("energy", 1, atomic=False, must=False, high_prec=True)
     data.add("force", 3, atomic=True, must=False, high_prec=False)
+    data.add("atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3)
     data.add("virial", 9, atomic=False, must=False, high_prec=False)
     if dp.has_efield:
         data.add("efield", 3, atomic=True, must=True, high_prec=False)
@@ -317,6 +318,7 @@ def test_ener(
     find_force = test_data.get("find_force")
     find_virial = test_data.get("find_virial")
     find_force_mag = test_data.get("find_force_mag")
+    find_atom_pref = test_data.get("find_atom_pref")
     mixed_type = data.mixed_type
     natoms = len(test_data["type"][0])
     nframes = test_data["box"].shape[0]
@@ -423,6 +425,16 @@ def test_ener(
     diff_f = force - test_data["force"][:numb_test]
     mae_f = mae(diff_f)
     rmse_f = rmse(diff_f)
+    size_f = diff_f.size
+    if find_atom_pref == 1:
+        atom_weight = test_data["atom_pref"][:numb_test]
+        weight_sum = np.sum(atom_weight)
+        if weight_sum > 0:
+            mae_fw = np.sum(np.abs(diff_f) * atom_weight) / weight_sum
+            rmse_fw = np.sqrt(np.sum(diff_f * diff_f * atom_weight) / weight_sum)
+        else:
+            mae_fw = 0.0
+            rmse_fw = 0.0
     diff_v = virial - test_data["virial"][:numb_test]
     mae_v = mae(diff_v)
     rmse_v = rmse(diff_v)
@@ -457,8 +469,13 @@ def test_ener(
     if not out_put_spin and find_force == 1:
         log.info(f"Force  MAE         : {mae_f:e} eV/A")
         log.info(f"Force  RMSE        : {rmse_f:e} eV/A")
-        dict_to_return["mae_f"] = (mae_f, force.size)
-        dict_to_return["rmse_f"] = (rmse_f, force.size)
+        dict_to_return["mae_f"] = (mae_f, size_f)
+        dict_to_return["rmse_f"] = (rmse_f, size_f)
+        if find_atom_pref == 1:
+            log.info(f"Force weighted MAE : {mae_fw:e} eV/A")
+            log.info(f"Force weighted RMSE: {rmse_fw:e} eV/A")
+            dict_to_return["mae_fw"] = (mae_fw, weight_sum)
+            dict_to_return["rmse_fw"] = (rmse_fw, weight_sum)
     if out_put_spin and find_force == 1:
         log.info(f"Force atom MAE      : {mae_fr:e} eV/A")
         log.info(f"Force atom RMSE     : {rmse_fr:e} eV/A")
@@ -604,6 +621,9 @@ def print_ener_sys_avg(avg: dict[str, float]) -> None:
     if "rmse_f" in avg:
         log.info(f"Force  MAE         : {avg['mae_f']:e} eV/A")
         log.info(f"Force  RMSE        : {avg['rmse_f']:e} eV/A")
+        if "rmse_fw" in avg:
+            log.info(f"Force weighted MAE : {avg['mae_fw']:e} eV/A")
+            log.info(f"Force weighted RMSE: {avg['rmse_fw']:e} eV/A")
     else:
         log.info(f"Force atom MAE      : {avg['mae_fr']:e} eV/A")
         log.info(f"Force spin MAE      : {avg['mae_fm']:e} eV/uB")
diff --git a/source/tests/pt/test_dp_test.py b/source/tests/pt/test_dp_test.py
index c2915c7ee7..085bff88de 100644
--- a/source/tests/pt/test_dp_test.py
+++ b/source/tests/pt/test_dp_test.py
@@ -15,12 +15,19 @@
 import torch
 
 from deepmd.entrypoints.test import test as dp_test
+from deepmd.entrypoints.test import test_ener as dp_test_ener
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
 from deepmd.pt.entrypoints.main import (
     get_trainer,
 )
 from deepmd.pt.utils.utils import (
     to_numpy_array,
 )
+from deepmd.utils.data import (
+    DeepmdData,
+)
 
 from .model.test_permutation import (
     model_property,
@@ -140,6 +147,98 @@ def setUp(self) -> None:
             json.dump(self.config, fp, indent=4)
 
 
+class TestDPTestForceWeight(DPTest, unittest.TestCase):
+    def setUp(self) -> None:
+        self.detail_file = "test_dp_test_force_weight_detail"
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        system_dir = self._prepare_weighted_system()
+        data_file = [system_dir]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_se_e2_a)
+        self.system_dir = system_dir
+        self.input_json = "test_dp_test_force_weight.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+    def _prepare_weighted_system(self) -> str:
+        src = Path(__file__).parent / "water/data/single"
+        tmp_dir = tempfile.mkdtemp()
+        shutil.copytree(src, tmp_dir, dirs_exist_ok=True)
+        set_dir = Path(tmp_dir) / "set.000"
+        forces = np.load(set_dir / "force.npy")
+        forces[0, :3] += 1.0
+        forces[0, -3:] += 10.0
+        np.save(set_dir / "force.npy", forces)
+        natoms = forces.shape[1] // 3
+        atom_pref = np.ones((forces.shape[0], natoms), dtype=forces.dtype)
+        atom_pref[:, 0] = 2.0
+        atom_pref[:, -1] = 0.0
+        np.save(set_dir / "atom_pref.npy", atom_pref)
+        return tmp_dir
+
+    def test_force_weight(self) -> None:
+        trainer = get_trainer(deepcopy(self.config))
+        with torch.device("cpu"):
+            trainer.get_data(is_train=False)
+        model = torch.jit.script(trainer.model)
+        tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
+        torch.jit.save(model, tmp_model.name)
+        dp = DeepEval(tmp_model.name)
+        data = DeepmdData(
+            self.system_dir,
+            set_prefix="set",
+            shuffle_test=False,
+            type_map=dp.get_type_map(),
+            sort_atoms=False,
+        )
+        err = dp_test_ener(
+            dp,
+            data,
+            self.system_dir,
+            numb_test=1,
+            detail_file=None,
+            has_atom_ener=False,
+        )
+        test_data = data.get_test()
+        coord = test_data["coord"].reshape([1, -1])
+        box = test_data["box"][:1]
+        atype = test_data["type"][0]
+        ret = dp.eval(
+            coord,
+            box,
+            atype,
+            fparam=None,
+            aparam=None,
+            atomic=False,
+            efield=None,
+            mixed_type=False,
+            spin=None,
+        )
+        force_pred = ret[1].reshape([1, -1])
+        force_true = test_data["force"][:1]
+        weight = test_data["atom_pref"][:1]
+        diff = force_pred - force_true
+        mae_unweighted = np.sum(np.abs(diff)) / diff.size
+        rmse_unweighted = np.sqrt(np.sum(diff * diff) / diff.size)
+        denom = weight.sum()
+        mae_weighted = np.sum(np.abs(diff) * weight) / denom
+        rmse_weighted = np.sqrt(np.sum(diff * diff * weight) / denom)
+        np.testing.assert_allclose(err["mae_f"][0], mae_unweighted)
+        np.testing.assert_allclose(err["rmse_f"][0], rmse_unweighted)
+        np.testing.assert_allclose(err["mae_fw"][0], mae_weighted)
+        np.testing.assert_allclose(err["rmse_fw"][0], rmse_weighted)
+        os.unlink(tmp_model.name)
+
+    def tearDown(self) -> None:
+        super().tearDown()
+        shutil.rmtree(self.system_dir)
+
+
 class TestDPTestPropertySeA(unittest.TestCase):
     def setUp(self) -> None:
         self.detail_file = "test_dp_test_property_detail"

From 7f547b880c460aa081d2d1843355eab977e12987 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 19 Nov 2025 17:54:31 +0800
Subject: [PATCH 14/27] add use_default_pf

---
 deepmd/pt/loss/ener.py   | 9 +++++++--
 deepmd/utils/argcheck.py | 6 ++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 10e2bf9971..75efd0277f 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -54,6 +54,7 @@ def __init__(
         use_l1_all: bool = False,
         inference=False,
         use_huber=False,
+        use_default_pf=False,
         huber_delta=0.01,
         **kwargs,
     ) -> None:
@@ -131,6 +132,7 @@ def __init__(
         self.limit_pref_pf = limit_pref_pf
         self.start_pref_gf = start_pref_gf
         self.limit_pref_gf = limit_pref_gf
+        self.use_default_pf = use_default_pf
         self.relative_f = relative_f
         self.enable_atom_ener_coeff = enable_atom_ener_coeff
         self.numb_generalized_coord = numb_generalized_coord
@@ -301,7 +303,9 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
 
             if self.has_pf and "atom_pref" in label:
                 atom_pref = label["atom_pref"]
-                find_atom_pref = label.get("find_atom_pref", 0.0)
+                find_atom_pref = (
+                    label.get("find_atom_pref", 0.0) if not self.use_default_pf else 1.0
+                )
                 pref_pf = pref_pf * find_atom_pref
                 atom_pref_reshape = atom_pref.reshape(-1)
                 l2_pref_force_loss = (torch.square(diff_f) * atom_pref_reshape).mean()
@@ -410,7 +414,7 @@ def label_requirement(self) -> list[DataRequirementItem]:
                     high_prec=True,
                 )
             )
-        if self.has_f:
+        if self.has_f or self.has_pf or self.relative_f is not None or self.has_gf:
             label_requirement.append(
                 DataRequirementItem(
                     "force",
@@ -449,6 +453,7 @@ def label_requirement(self) -> list[DataRequirementItem]:
                     must=False,
                     high_prec=False,
                     repeat=3,
+                    default=1.0,
                 )
             )
         if self.has_gf > 0:
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index a94200fbec..7f448fa1ee 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2659,6 +2659,12 @@ def loss_ener():
             default=0.00,
             doc=doc_limit_pref_pf,
         ),
+        Argument(
+            "use_default_pf",
+            bool,
+            optional=True,
+            default=False,
+        ),
         Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
         Argument(
             "enable_atom_ener_coeff",

From 773fb32259514bd0cc610b7b22cea761131b0528 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 30 May 2025 21:54:47 +0800
Subject: [PATCH 15/27] add init from direct model

---
 deepmd/pt/train/training.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 126465f42f..12a891cf32 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -541,8 +541,30 @@ def collect_single_finetune_params(
                     state_dict["_extra_state"] = self.wrapper.state_dict()[
                         "_extra_state"
                     ]
-
-                self.wrapper.load_state_dict(state_dict)
+                try:
+                    self.wrapper.load_state_dict(state_dict)
+                except RuntimeError as e:
+                    # init from direct fitting
+                    rm_list = []
+                    for kk in state_dict:
+                        # delete direct heads
+                        if (
+                            "fitting_net.force_embed." in kk
+                            or "fitting_net.noise_embed" in kk
+                        ):
+                            rm_list.append(kk)
+                    for kk in rm_list:
+                        state_dict.pop(kk)
+                    state_dict["_extra_state"] = self.wrapper.state_dict()[
+                        "_extra_state"
+                    ]
+                    out_shape_list = [
+                        "model.Default.atomic_model.out_bias",
+                        "model.Default.atomic_model.out_std",
+                    ]
+                    for kk in out_shape_list:
+                        state_dict[kk] = state_dict[kk][:1, :, :1]
+                    self.wrapper.load_state_dict(state_dict)
 
                 # change bias for fine-tuning
                 if finetune_model is not None:

From a18fd7299026425b8fd5b3018c302fbf792b6166 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sat, 31 May 2025 13:30:03 +0800
Subject: [PATCH 16/27] Update training.py

---
 deepmd/pt/train/training.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 12a891cf32..7e0761915f 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -541,6 +541,9 @@ def collect_single_finetune_params(
                     state_dict["_extra_state"] = self.wrapper.state_dict()[
                         "_extra_state"
                     ]
+                old_model_params = self.wrapper.state_dict()["_extra_state"][
+                    "model_params"
+                ]
                 try:
                     self.wrapper.load_state_dict(state_dict)
                 except RuntimeError as e:
@@ -555,9 +558,7 @@ def collect_single_finetune_params(
                             rm_list.append(kk)
                     for kk in rm_list:
                         state_dict.pop(kk)
-                    state_dict["_extra_state"] = self.wrapper.state_dict()[
-                        "_extra_state"
-                    ]
+                    state_dict["_extra_state"]["model_params"] = old_model_params
                     out_shape_list = [
                         "model.Default.atomic_model.out_bias",
                         "model.Default.atomic_model.out_std",

From 32b94312631b2dceff189bd0d3f2ed13f1062a77 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 4 Sep 2025 19:53:30 +0800
Subject: [PATCH 17/27] add full default fparam

---
 .../dpmodel/atomic_model/base_atomic_model.py |   4 +
 .../dpmodel/atomic_model/dp_atomic_model.py   |   4 +
 deepmd/dpmodel/fitting/dipole_fitting.py      |   7 +-
 deepmd/dpmodel/fitting/dos_fitting.py         |   4 +-
 deepmd/dpmodel/fitting/ener_fitting.py        |   4 +-
 deepmd/dpmodel/fitting/general_fitting.py     |  33 +++++-
 deepmd/dpmodel/fitting/invar_fitting.py       |   7 +-
 .../dpmodel/fitting/polarizability_fitting.py |   9 +-
 deepmd/dpmodel/fitting/property_fitting.py    |   9 +-
 deepmd/dpmodel/infer/deep_eval.py             |   4 +
 deepmd/dpmodel/model/make_model.py            |   4 +
 deepmd/infer/deep_eval.py                     |   2 +
 deepmd/jax/fitting/fitting.py                 |   1 +
 deepmd/pd/model/task/ener.py                  |   2 +-
 deepmd/pd/model/task/fitting.py               |   9 +-
 deepmd/pd/model/task/invar_fitting.py         |   2 +-
 deepmd/pt/infer/deep_eval.py                  |   7 +-
 .../model/atomic_model/base_atomic_model.py   |   4 +
 .../pt/model/atomic_model/dp_atomic_model.py  |   1 +
 deepmd/pt/model/model/make_model.py           |   2 +-
 deepmd/pt/model/task/dipole.py                |  10 +-
 deepmd/pt/model/task/dos.py                   |   4 +-
 deepmd/pt/model/task/ener.py                  |   9 +-
 deepmd/pt/model/task/fitting.py               | 101 ++++++++++++------
 deepmd/pt/model/task/invar_fitting.py         |  10 +-
 deepmd/pt/model/task/polarizability.py        |  13 ++-
 deepmd/pt/model/task/property.py              |   9 +-
 27 files changed, 211 insertions(+), 64 deletions(-)

diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py
index eb95886598..1158d278e5 100644
--- a/deepmd/dpmodel/atomic_model/base_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py
@@ -88,6 +88,10 @@ def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return False
+
     def reinit_atom_exclude(
         self,
         exclude_types: list[int] = [],
diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
index 2fa072cc78..8bae07dcad 100644
--- a/deepmd/dpmodel/atomic_model/dp_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -233,6 +233,10 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting.get_dim_aparam()
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return self.fitting.has_default_fparam()
+
     def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py
index fcaea43338..f49c148377 100644
--- a/deepmd/dpmodel/fitting/dipole_fitting.py
+++ b/deepmd/dpmodel/fitting/dipole_fitting.py
@@ -84,6 +84,9 @@ class DipoleFitting(GeneralFitting):
             Only reducible variable are differentiable.
     type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
+    default_fparam: list[float], optional
+            The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+            this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -110,6 +113,7 @@ def __init__(
         c_differentiable: bool = True,
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list[float]] = None,
     ) -> None:
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -144,6 +148,7 @@ def __init__(
             exclude_types=exclude_types,
             type_map=type_map,
             seed=seed,
+            default_fparam=default_fparam,
         )
 
     def _net_out_dim(self):
@@ -161,7 +166,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         var_name = data.pop("var_name", None)
         assert var_name == "dipole"
         return super().deserialize(data)
diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py
index 2f6df77eac..4bc34b8abf 100644
--- a/deepmd/dpmodel/fitting/dos_fitting.py
+++ b/deepmd/dpmodel/fitting/dos_fitting.py
@@ -46,6 +46,7 @@ def __init__(
         exclude_types: list[int] = [],
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list] = None,
     ) -> None:
         if bias_dos is not None:
             self.bias_dos = bias_dos
@@ -70,12 +71,13 @@ def __init__(
             exclude_types=exclude_types,
             type_map=type_map,
             seed=seed,
+            default_fparam=default_fparam,
         )
 
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data["numb_dos"] = data.pop("dim_out")
         data.pop("tot_ener_zero", None)
         data.pop("var_name", None)
diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py
index 6435b6468f..794c074485 100644
--- a/deepmd/dpmodel/fitting/ener_fitting.py
+++ b/deepmd/dpmodel/fitting/ener_fitting.py
@@ -46,6 +46,7 @@ def __init__(
         exclude_types: list[int] = [],
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list] = None,
     ) -> None:
         super().__init__(
             var_name="energy",
@@ -70,12 +71,13 @@ def __init__(
             exclude_types=exclude_types,
             type_map=type_map,
             seed=seed,
+            default_fparam=default_fparam,
         )
 
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("var_name")
         data.pop("dim_out")
         return super().deserialize(data)
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
index c6c51725bb..66bcd7007d 100644
--- a/deepmd/dpmodel/fitting/general_fitting.py
+++ b/deepmd/dpmodel/fitting/general_fitting.py
@@ -94,6 +94,9 @@ class GeneralFitting(NativeOP, BaseFitting):
             A list of strings. Give the name to each type of atoms.
     seed: Optional[Union[int, list[int]]]
         Random seed for initializing the network parameters.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -120,6 +123,7 @@ def __init__(
         remove_vaccum_contribution: Optional[list[bool]] = None,
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list[float]] = None,
     ) -> None:
         self.var_name = var_name
         self.ntypes = ntypes
@@ -129,6 +133,7 @@ def __init__(
         self.numb_fparam = numb_fparam
         self.numb_aparam = numb_aparam
         self.dim_case_embd = dim_case_embd
+        self.default_fparam = default_fparam
         self.rcond = rcond
         self.tot_ener_zero = tot_ener_zero
         self.trainable = trainable
@@ -177,6 +182,15 @@ def __init__(
             self.case_embd = np.zeros(self.dim_case_embd, dtype=self.prec)
         else:
             self.case_embd = None
+
+        if self.default_fparam is not None:
+            if self.numb_fparam > 0:
+                assert len(self.default_fparam) == self.numb_fparam, (
+                    "default_fparam length mismatch!"
+                )
+            self.default_fparam_tensor = np.array(self.default_fparam, dtype=self.prec)
+        else:
+            self.default_fparam_tensor = None
         # init networks
         in_dim = (
             self.dim_descrpt
@@ -216,6 +230,10 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
 
+    def has_default_fparam(self) -> bool:
+        """Check if the fitting has default frame parameters."""
+        return self.default_fparam is not None
+
     def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
@@ -273,6 +291,8 @@ def __setitem__(self, key, value) -> None:
             self.case_embd = value
         elif key in ["scale"]:
             self.scale = value
+        elif key in ["default_fparam_tensor"]:
+            self.default_fparam_tensor = value
         else:
             raise KeyError(key)
 
@@ -291,6 +311,8 @@ def __getitem__(self, key):
             return self.case_embd
         elif key in ["scale"]:
             return self.scale
+        elif key in ["default_fparam_tensor"]:
+            return self.default_fparam_tensor
         else:
             raise KeyError(key)
 
@@ -305,7 +327,7 @@ def serialize(self) -> dict:
         """Serialize the fitting to dict."""
         return {
             "@class": "Fitting",
-            "@version": 3,
+            "@version": 4,
             "var_name": self.var_name,
             "ntypes": self.ntypes,
             "dim_descrpt": self.dim_descrpt,
@@ -314,6 +336,7 @@ def serialize(self) -> dict:
             "numb_fparam": self.numb_fparam,
             "numb_aparam": self.numb_aparam,
             "dim_case_embd": self.dim_case_embd,
+            "default_fparam": self.default_fparam,
             "rcond": self.rcond,
             "activation_function": self.activation_function,
             "precision": self.precision,
@@ -402,6 +425,14 @@ def _call_common(
             xx_zeros = xp.zeros_like(xx)
         else:
             xx_zeros = None
+
+        if self.numb_fparam > 0 and fparam is None:
+            # use default fparam
+            assert self.default_fparam_tensor is not None
+            fparam = xp.tile(
+                xp.reshape(self.default_fparam_tensor, (1, self.numb_fparam)), (nf, 1)
+            )
+
         # check fparam dim, concate to input descriptor
         if self.numb_fparam > 0:
             assert fparam is not None, "fparam should not be None"
diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
index b5d3a02d86..9e97eac22a 100644
--- a/deepmd/dpmodel/fitting/invar_fitting.py
+++ b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -110,6 +110,9 @@ class InvarFitting(GeneralFitting):
             Atomic contributions of the excluded atom types are set zero.
     type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
 
     """
 
@@ -138,6 +141,7 @@ def __init__(
         exclude_types: list[int] = [],
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list[float]] = None,
     ) -> None:
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -173,6 +177,7 @@ def __init__(
             else [x is not None for x in atom_ener],
             type_map=type_map,
             seed=seed,
+            default_fparam=default_fparam,
         )
 
     def serialize(self) -> dict:
@@ -185,7 +190,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         return super().deserialize(data)
 
     def _net_out_dim(self):
diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py
index bfc337a177..cc20e4c932 100644
--- a/deepmd/dpmodel/fitting/polarizability_fitting.py
+++ b/deepmd/dpmodel/fitting/polarizability_fitting.py
@@ -90,6 +90,9 @@ class PolarFitting(GeneralFitting):
             Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
     type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
+    default_fparam: list[float], optional
+            The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+            this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -117,6 +120,7 @@ def __init__(
         shift_diag: bool = True,
         type_map: Optional[list[str]] = None,
         seed: Optional[Union[int, list[int]]] = None,
+        default_fparam: Optional[list[float]] = None,
     ) -> None:
         if tot_ener_zero:
             raise NotImplementedError("tot_ener_zero is not implemented")
@@ -164,6 +168,7 @@ def __init__(
             exclude_types=exclude_types,
             type_map=type_map,
             seed=seed,
+            default_fparam=default_fparam,
         )
 
     def _net_out_dim(self):
@@ -189,7 +194,7 @@ def __getitem__(self, key):
     def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "polar"
-        data["@version"] = 4
+        data["@version"] = 5
         data["embedding_width"] = self.embedding_width
         data["fit_diag"] = self.fit_diag
         data["shift_diag"] = self.shift_diag
@@ -200,7 +205,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 4, 1)
+        check_version_compatibility(data.pop("@version", 1), 5, 1)
         var_name = data.pop("var_name", None)
         assert var_name == "polar"
         return super().deserialize(data)
diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
index 6d0aa3546f..944b1f7958 100644
--- a/deepmd/dpmodel/fitting/property_fitting.py
+++ b/deepmd/dpmodel/fitting/property_fitting.py
@@ -61,6 +61,9 @@ class PropertyFittingNet(InvarFitting):
             Atomic contributions of the excluded atom types are set zero.
     type_map: list[str], Optional
             A list of strings. Give the name to each type of atoms.
+    default_fparam: list[float], optional
+            The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+            this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -83,6 +86,7 @@ def __init__(
         mixed_types: bool = True,
         exclude_types: list[int] = [],
         type_map: Optional[list[str]] = None,
+        default_fparam: Optional[list] = None,
         # not used
         seed: Optional[int] = None,
     ) -> None:
@@ -106,12 +110,13 @@ def __init__(
             mixed_types=mixed_types,
             exclude_types=exclude_types,
             type_map=type_map,
+            default_fparam=default_fparam,
         )
 
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 4, 1)
+        check_version_compatibility(data.pop("@version"), 5, 1)
         data.pop("dim_out")
         data["property_name"] = data.pop("var_name")
         data.pop("tot_ener_zero")
@@ -131,6 +136,6 @@ def serialize(self) -> dict:
             "task_dim": self.task_dim,
             "intensive": self.intensive,
         }
-        dd["@version"] = 4
+        dd["@version"] = 5
 
         return dd
diff --git a/deepmd/dpmodel/infer/deep_eval.py b/deepmd/dpmodel/infer/deep_eval.py
index 91fa0ac2ac..1aea58ef33 100644
--- a/deepmd/dpmodel/infer/deep_eval.py
+++ b/deepmd/dpmodel/infer/deep_eval.py
@@ -120,6 +120,10 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
         return self.dp.get_dim_aparam()
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return self.dp.has_default_fparam()
+
     @property
     def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index ec0b986394..95ed7adbb7 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -563,6 +563,10 @@ def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
             return self.atomic_model.get_dim_aparam()
 
+        def has_default_fparam(self) -> bool:
+            """Check if the model has default frame parameters."""
+            return self.atomic_model.has_default_fparam()
+
         def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 881a2f899f..9079e0b8fe 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -161,6 +161,7 @@ def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this DP."""
 
     def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
         return False
 
     @abstractmethod
@@ -374,6 +375,7 @@ def get_dim_fparam(self) -> int:
         return self.deep_eval.get_dim_fparam()
 
     def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
         return self.deep_eval.has_default_fparam()
 
     def get_dim_aparam(self) -> int:
diff --git a/deepmd/jax/fitting/fitting.py b/deepmd/jax/fitting/fitting.py
index d62681490c..e69bded640 100644
--- a/deepmd/jax/fitting/fitting.py
+++ b/deepmd/jax/fitting/fitting.py
@@ -35,6 +35,7 @@ def setattr_for_general_fitting(name: str, value: Any) -> Any:
         "fparam_inv_std",
         "aparam_avg",
         "aparam_inv_std",
+        "default_fparam_tensor",
     }:
         value = to_jax_array(value)
         if value is not None:
diff --git a/deepmd/pd/model/task/ener.py b/deepmd/pd/model/task/ener.py
index 789ef75066..738990b2d8 100644
--- a/deepmd/pd/model/task/ener.py
+++ b/deepmd/pd/model/task/ener.py
@@ -72,7 +72,7 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("var_name")
         data.pop("dim_out")
         return super().deserialize(data)
diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py
index a478c12f97..e7b11b35bc 100644
--- a/deepmd/pd/model/task/fitting.py
+++ b/deepmd/pd/model/task/fitting.py
@@ -95,6 +95,10 @@ class GeneralFitting(Fitting):
         Number of frame parameters.
     numb_aparam : int
         Number of atomic parameters.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
+        This parameter is not supported in PaddlePaddle.
     dim_case_embd : int
         Dimension of case specific embedding.
     activation_function : str
@@ -145,6 +149,7 @@ def __init__(
         remove_vaccum_contribution: Optional[list[bool]] = None,
         type_map: Optional[list[str]] = None,
         use_aparam_as_mask: bool = False,
+        default_fparam: Optional[list[float]] = None,
         **kwargs,
     ) -> None:
         super().__init__()
@@ -157,6 +162,7 @@ def __init__(
         self.numb_fparam = numb_fparam
         self.numb_aparam = numb_aparam
         self.dim_case_embd = dim_case_embd
+        self.default_fparam = default_fparam
         self.activation_function = activation_function
         self.precision = precision
         self.prec = PRECISION_DICT[self.precision]
@@ -282,7 +288,7 @@ def serialize(self) -> dict:
         """Serialize the fitting to dict."""
         return {
             "@class": "Fitting",
-            "@version": 3,
+            "@version": 4,
             "var_name": self.var_name,
             "ntypes": self.ntypes,
             "dim_descrpt": self.dim_descrpt,
@@ -291,6 +297,7 @@ def serialize(self) -> dict:
             "numb_fparam": self.numb_fparam,
             "numb_aparam": self.numb_aparam,
             "dim_case_embd": self.dim_case_embd,
+            "default_fparam": self.default_fparam,
             "activation_function": self.activation_function,
             "precision": self.precision,
             "mixed_types": self.mixed_types,
diff --git a/deepmd/pd/model/task/invar_fitting.py b/deepmd/pd/model/task/invar_fitting.py
index b92c862dc8..176acdeb20 100644
--- a/deepmd/pd/model/task/invar_fitting.py
+++ b/deepmd/pd/model/task/invar_fitting.py
@@ -147,7 +147,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = copy.deepcopy(data)
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         return super().deserialize(data)
 
     def output_def(self) -> FittingOutputDef:
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 07b9176c99..eba8314e4f 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -184,7 +184,12 @@ def get_dim_aparam(self) -> int:
         return self.dp.model["Default"].get_dim_aparam()
 
     def has_default_fparam(self) -> bool:
-        return self.dp.model["Default"].has_default_fparam()
+        """Check if the model has default frame parameters."""
+        try:
+            return self.dp.model["Default"].has_default_fparam()
+        except AttributeError:
+            # for compatibility with old models
+            return False
 
     def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index 56af5f4f43..37bab083d0 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -135,6 +135,10 @@ def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
         return False
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return False
+
     def reinit_atom_exclude(
         self,
         exclude_types: list[int] = [],
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index dda10b82a8..cee7aaf2f9 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -343,6 +343,7 @@ def get_dim_fparam(self) -> int:
         return self.fitting_net.get_dim_fparam()
 
     def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
         return self.fitting_net.has_default_fparam()
 
     def get_default_fparam(self) -> Optional[torch.Tensor]:
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
index d58261a481..d06f60f3f1 100644
--- a/deepmd/pt/model/model/make_model.py
+++ b/deepmd/pt/model/model/make_model.py
@@ -4,7 +4,6 @@
 )
 
 import torch
-import numpy as np
 
 from deepmd.dpmodel import (
     ModelOutputDef,
@@ -525,6 +524,7 @@ def get_dim_fparam(self) -> int:
 
         @torch.jit.export
         def has_default_fparam(self) -> bool:
+            """Check if the model has default frame parameters."""
             return self.atomic_model.has_default_fparam()
 
         def get_default_fparam(self) -> Optional[torch.Tensor]:
diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py
index 65b64220ae..0fe6a1352d 100644
--- a/deepmd/pt/model/task/dipole.py
+++ b/deepmd/pt/model/task/dipole.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Callable,
     Optional,
     Union,
@@ -72,6 +73,9 @@ class DipoleFittingNet(GeneralFitting):
         Only reducible variable are differentiable.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -93,7 +97,8 @@ def __init__(
         r_differentiable: bool = True,
         c_differentiable: bool = True,
         type_map: Optional[list[str]] = None,
-        **kwargs,
+        default_fparam: Optional[list] = None,
+        **kwargs: Any,
     ) -> None:
         self.embedding_width = embedding_width
         self.r_differentiable = r_differentiable
@@ -114,6 +119,7 @@ def __init__(
             seed=seed,
             exclude_types=exclude_types,
             type_map=type_map,
+            default_fparam=default_fparam,
             **kwargs,
         )
 
@@ -132,7 +138,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("var_name", None)
         return super().deserialize(data)
 
diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py
index 568ef81c92..afbed5f748 100644
--- a/deepmd/pt/model/task/dos.py
+++ b/deepmd/pt/model/task/dos.py
@@ -57,6 +57,7 @@ def __init__(
         exclude_types: list[int] = [],
         mixed_types: bool = True,
         type_map: Optional[list[str]] = None,
+        default_fparam: Optional[list] = None,
     ) -> None:
         if bias_dos is not None:
             self.bias_dos = bias_dos
@@ -83,6 +84,7 @@ def __init__(
             exclude_types=exclude_types,
             trainable=trainable,
             type_map=type_map,
+            default_fparam=default_fparam,
         )
 
     def output_def(self) -> FittingOutputDef:
@@ -101,7 +103,7 @@ def output_def(self) -> FittingOutputDef:
     @classmethod
     def deserialize(cls, data: dict) -> "DOSFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("@class", None)
         data.pop("var_name", None)
         data.pop("tot_ener_zero", None)
diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py
index 3968e21cdc..fb2c9111f7 100644
--- a/deepmd/pt/model/task/ener.py
+++ b/deepmd/pt/model/task/ener.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -67,7 +68,7 @@ def __init__(
         seed: Optional[Union[int, list[int]]] = None,
         type_map: Optional[list[str]] = None,
         default_fparam: Optional[list] = None,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super().__init__(
             "energy",
@@ -92,7 +93,7 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("var_name")
         data.pop("dim_out")
         return super().deserialize(data)
@@ -270,7 +271,7 @@ def forward(
             "dforce": vec_out,
         }
 
-        
+
 @Fitting.register("ener_readout")
 @fitting_check_output
 class EnergyFittingNetReadout(InvarFitting):
@@ -420,4 +421,4 @@ def forward(
                 edge_energy = torch.sum(edge_atomic_contrib, dim=-2)
             # energy
             out = out + edge_energy / self.norm_e_fact
-        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
\ No newline at end of file
+        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 4da177c5a1..12efb7c1f6 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -4,10 +4,10 @@
     abstractmethod,
 )
 from typing import (
+    Any,
     Callable,
     Optional,
     Union,
-    List,
 )
 
 import numpy as np
@@ -37,6 +37,9 @@
     to_numpy_array,
     to_torch_tensor,
 )
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
 from deepmd.utils.finetune import (
     get_index_between_two_maps,
     map_atom_exclude_types,
@@ -44,9 +47,6 @@
 from deepmd.utils.path import (
     DPPath,
 )
-from deepmd.utils.env_mat_stat import (
-    StatItem,
-)
 
 dtype = env.GLOBAL_PT_FLOAT_PRECISION
 device = env.DEVICE
@@ -62,7 +62,9 @@ def __new__(cls, *args, **kwargs):
             return BaseFitting.__new__(BaseFitting, *args, **kwargs)
         return super().__new__(cls)
 
-    def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False) -> None:
+    def share_params(
+        self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False
+    ) -> None:
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
         If not start from checkpoint (resume is False),
@@ -81,16 +83,22 @@ def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2
                     for ii in range(self.numb_fparam):
                         base_fparam[ii] += self.get_stats()["fparam"][ii] * model_prob
                     fparam_avg = np.array([ii.compute_avg() for ii in base_fparam])
-                    fparam_std = np.array([ii.compute_std(protection=protection) for ii in base_fparam])
+                    fparam_std = np.array(
+                        [ii.compute_std(protection=protection) for ii in base_fparam]
+                    )
                     fparam_inv_std = 1.0 / fparam_std
                     base_class.fparam_avg.copy_(
                         torch.tensor(
-                            fparam_avg, device=env.DEVICE, dtype=base_class.fparam_avg.dtype
+                            fparam_avg,
+                            device=env.DEVICE,
+                            dtype=base_class.fparam_avg.dtype,
                         )
                     )
                     base_class.fparam_inv_std.copy_(
                         torch.tensor(
-                            fparam_inv_std, device=env.DEVICE, dtype=base_class.fparam_inv_std.dtype
+                            fparam_inv_std,
+                            device=env.DEVICE,
+                            dtype=base_class.fparam_inv_std.dtype,
                         )
                     )
                 self.fparam_avg = base_class.fparam_avg
@@ -104,18 +112,24 @@ def share_params(self, base_class, shared_level, model_prob=1.0, protection=1e-2
                     for ii in range(self.numb_aparam):
                         base_aparam[ii] += self.get_stats()["aparam"][ii] * model_prob
                     aparam_avg = np.array([ii.compute_avg() for ii in base_aparam])
-                    aparam_std = np.array([ii.compute_std(protection=protection) for ii in base_aparam])
+                    aparam_std = np.array(
+                        [ii.compute_std(protection=protection) for ii in base_aparam]
+                    )
                     aparam_inv_std = 1.0 / aparam_std
                     base_class.aparam_avg.copy_(
                         torch.tensor(
-                            aparam_avg, device=env.DEVICE, dtype=base_class.aparam_avg.dtype
+                            aparam_avg,
+                            device=env.DEVICE,
+                            dtype=base_class.aparam_avg.dtype,
                         )
                     )
                     base_class.aparam_inv_std.copy_(
                         torch.tensor(
-                            aparam_inv_std, device=env.DEVICE, dtype=base_class.aparam_inv_std.dtype
+                            aparam_inv_std,
+                            device=env.DEVICE,
+                            dtype=base_class.aparam_inv_std.dtype,
                         )
-                    )     
+                    )
                 self.aparam_avg = base_class.aparam_avg
                 self.aparam_inv_std = base_class.aparam_inv_std
 
@@ -133,7 +147,7 @@ def save_to_file_fparam(
 
         Parameters
         ----------
-        path : DPPath
+        stat_file_path : DPPath
             The path to save the statistics of fparam.
         """
         assert stat_file_path is not None
@@ -144,7 +158,9 @@ def save_to_file_fparam(
         _fparam_stat = []
         for ii in range(self.numb_fparam):
             _tmp_stat = self.stats["fparam"][ii]
-            _fparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum])
+            _fparam_stat.append(
+                [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]
+            )
         _fparam_stat = np.array(_fparam_stat)
         fp.save_numpy(_fparam_stat)
         log.info(f"Save fparam stats to {fp}.")
@@ -157,7 +173,7 @@ def save_to_file_aparam(
 
         Parameters
         ----------
-        path : DPPath
+        stat_file_path : DPPath
             The path to save the statistics of aparam.
         """
         assert stat_file_path is not None
@@ -168,7 +184,9 @@ def save_to_file_aparam(
         _aparam_stat = []
         for ii in range(self.numb_aparam):
             _tmp_stat = self.stats["aparam"][ii]
-            _aparam_stat.append([_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum])
+            _aparam_stat.append(
+                [_tmp_stat.number, _tmp_stat.sum, _tmp_stat.squared_sum]
+            )
         _aparam_stat = np.array(_aparam_stat)
         fp.save_numpy(_aparam_stat)
         log.info(f"Save aparam stats to {fp}.")
@@ -178,7 +196,7 @@ def restore_fparam_from_file(self, stat_file_path: DPPath) -> None:
 
         Parameters
         ----------
-        path : DPPath
+        stat_file_path : DPPath
             The path to load the statistics of fparam.
         """
         fp = stat_file_path / "fparam"
@@ -186,7 +204,9 @@ def restore_fparam_from_file(self, stat_file_path: DPPath) -> None:
         assert arr.shape == (self.numb_fparam, 3)
         _fparam_stat = []
         for ii in range(self.numb_fparam):
-            _fparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]))
+            _fparam_stat.append(
+                StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])
+            )
         self.stats["fparam"] = _fparam_stat
         log.info(f"Load fparam stats from {fp}.")
 
@@ -195,7 +215,7 @@ def restore_aparam_from_file(self, stat_file_path: DPPath) -> None:
 
         Parameters
         ----------
-        path : DPPath
+        stat_file_path : DPPath
             The path to load the statistics of aparam.
         """
         fp = stat_file_path / "aparam"
@@ -203,7 +223,9 @@ def restore_aparam_from_file(self, stat_file_path: DPPath) -> None:
         assert arr.shape == (self.numb_aparam, 3)
         _aparam_stat = []
         for ii in range(self.numb_aparam):
-            _aparam_stat.append(StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2]))
+            _aparam_stat.append(
+                StatItem(number=arr[ii][0], sum=arr[ii][1], squared_sum=arr[ii][2])
+            )
         self.stats["aparam"] = _aparam_stat
         log.info(f"Load aparam stats from {fp}.")
 
@@ -244,7 +266,9 @@ def compute_input_stats(
             else:
                 sampled = merged() if callable(merged) else merged
                 self.stats["fparam"] = []
-                cat_data = to_numpy_array(torch.cat([frame["fparam"] for frame in sampled], dim=0))
+                cat_data = to_numpy_array(
+                    torch.cat([frame["fparam"] for frame in sampled], dim=0)
+                )
                 cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
                 sumv = np.sum(cat_data, axis=0)
                 sumv2 = np.sum(cat_data * cat_data, axis=0)
@@ -261,7 +285,9 @@ def compute_input_stats(
                     self.save_to_file_fparam(stat_file_path)
 
             fparam_avg = np.array([ii.compute_avg() for ii in self.stats["fparam"]])
-            fparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["fparam"]])
+            fparam_std = np.array(
+                [ii.compute_std(protection=protection) for ii in self.stats["fparam"]]
+            )
             fparam_inv_std = 1.0 / fparam_std
             log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}")
             self.fparam_avg.copy_(to_torch_tensor(fparam_avg))
@@ -297,18 +323,18 @@ def compute_input_stats(
                     self.save_to_file_aparam(stat_file_path)
 
             aparam_avg = np.array([ii.compute_avg() for ii in self.stats["aparam"]])
-            aparam_std = np.array([ii.compute_std(protection=protection) for ii in self.stats["aparam"]])
+            aparam_std = np.array(
+                [ii.compute_std(protection=protection) for ii in self.stats["aparam"]]
+            )
             aparam_inv_std = 1.0 / aparam_std
             log.info(f"aparam_avg is {aparam_avg}, aparam_inv_std is {aparam_inv_std}")
             self.aparam_avg.copy_(to_torch_tensor(aparam_avg))
             self.aparam_inv_std.copy_(to_torch_tensor(aparam_inv_std))
 
-    def get_stats(self) -> dict[str, List[StatItem]]:
+    def get_stats(self) -> dict[str, list[StatItem]]:
         """Get the statistics of the fitting_net."""
         if self.stats is None:
-            raise RuntimeError(
-                "The statistics of fitting net has not been computed."
-            )
+            raise RuntimeError("The statistics of fitting net has not been computed.")
         return self.stats
 
 
@@ -362,6 +388,9 @@ class GeneralFitting(Fitting):
         A list of strings. Give the name to each type of atoms.
     use_aparam_as_mask: bool
         If True, the aparam will not be used in fitting net for embedding.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -385,8 +414,8 @@ def __init__(
         remove_vaccum_contribution: Optional[list[bool]] = None,
         type_map: Optional[list[str]] = None,
         use_aparam_as_mask: bool = False,
-        default_fparam: Optional[list] = None,
-        **kwargs,
+        default_fparam: Optional[list[float]] = None,
+        **kwargs: Any,
     ) -> None:
         super().__init__()
         self.var_name = var_name
@@ -461,9 +490,9 @@ def __init__(
 
         if self.default_fparam is not None:
             if self.numb_fparam > 0:
-                assert (
-                    len(self.default_fparam) == self.numb_fparam
-                ), "default_fparam length mismatch!"
+                assert len(self.default_fparam) == self.numb_fparam, (
+                    "default_fparam length mismatch!"
+                )
             self.register_buffer(
                 "default_fparam_tensor",
                 torch.tensor(
@@ -537,7 +566,7 @@ def serialize(self) -> dict:
         """Serialize the fitting to dict."""
         return {
             "@class": "Fitting",
-            "@version": 3,
+            "@version": 4,
             "var_name": self.var_name,
             "ntypes": self.ntypes,
             "dim_descrpt": self.dim_descrpt,
@@ -546,6 +575,7 @@ def serialize(self) -> dict:
             "numb_fparam": self.numb_fparam,
             "numb_aparam": self.numb_aparam,
             "dim_case_embd": self.dim_case_embd,
+            "default_fparam": self.default_fparam,
             "activation_function": self.activation_function,
             "precision": self.precision,
             "mixed_types": self.mixed_types,
@@ -590,6 +620,7 @@ def get_dim_fparam(self) -> int:
         return self.numb_fparam
 
     def has_default_fparam(self) -> bool:
+        """Check if the fitting has default frame parameters."""
         return self.default_fparam is not None
 
     def get_default_fparam(self) -> Optional[torch.Tensor]:
@@ -653,6 +684,8 @@ def __setitem__(self, key, value) -> None:
             self.case_embd = value
         elif key in ["scale"]:
             self.scale = value
+        elif key in ["default_fparam_tensor"]:
+            self.default_fparam_tensor = value
         else:
             raise KeyError(key)
 
@@ -671,6 +704,8 @@ def __getitem__(self, key):
             return self.case_embd
         elif key in ["scale"]:
             return self.scale
+        elif key in ["default_fparam_tensor"]:
+            return self.default_fparam_tensor
         else:
             raise KeyError(key)
 
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
index ede0315e87..d8391f9fcf 100644
--- a/deepmd/pt/model/task/invar_fitting.py
+++ b/deepmd/pt/model/task/invar_fitting.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -80,6 +81,9 @@ class InvarFitting(GeneralFitting):
         A list of strings. Give the name to each type of atoms.
     use_aparam_as_mask: bool
         If True, the aparam will not be used in fitting net for embedding.
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -103,8 +107,8 @@ def __init__(
         atom_ener: Optional[list[Optional[torch.Tensor]]] = None,
         type_map: Optional[list[str]] = None,
         use_aparam_as_mask: bool = False,
-        default_fparam: Optional[list] = None,
-        **kwargs,
+        default_fparam: Optional[list[float]] = None,
+        **kwargs: Any,
     ) -> None:
         self.dim_out = dim_out
         self.atom_ener = atom_ener
@@ -147,7 +151,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         return super().deserialize(data)
 
     def output_def(self) -> FittingOutputDef:
diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py
index a326802918..282891e7a5 100644
--- a/deepmd/pt/model/task/polarizability.py
+++ b/deepmd/pt/model/task/polarizability.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -75,7 +76,9 @@ class PolarFittingNet(GeneralFitting):
         Whether to shift the diagonal part of the polarizability matrix. The shift operation is carried out after scale.
     type_map: list[str], Optional
         A list of strings. Give the name to each type of atoms.
-
+    default_fparam: list[float], optional
+        The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
+        this value will be used as the default value for the frame parameter in the fitting net.
     """
 
     def __init__(
@@ -98,7 +101,8 @@ def __init__(
         scale: Optional[Union[list[float], float]] = None,
         shift_diag: bool = True,
         type_map: Optional[list[str]] = None,
-        **kwargs,
+        default_fparam: Optional[list] = None,
+        **kwargs: Any,
     ) -> None:
         self.embedding_width = embedding_width
         self.fit_diag = fit_diag
@@ -139,6 +143,7 @@ def __init__(
             seed=seed,
             exclude_types=exclude_types,
             type_map=type_map,
+            default_fparam=default_fparam,
             **kwargs,
         )
 
@@ -195,7 +200,7 @@ def change_type_map(
     def serialize(self) -> dict:
         data = super().serialize()
         data["type"] = "polar"
-        data["@version"] = 4
+        data["@version"] = 5
         data["embedding_width"] = self.embedding_width
         data["fit_diag"] = self.fit_diag
         data["shift_diag"] = self.shift_diag
@@ -206,7 +211,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "GeneralFitting":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 4, 1)
+        check_version_compatibility(data.pop("@version", 1), 5, 1)
         data.pop("var_name", None)
         return super().deserialize(data)
 
diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py
index 5ef0cd0233..c2440b7de3 100644
--- a/deepmd/pt/model/task/property.py
+++ b/deepmd/pt/model/task/property.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -91,7 +92,8 @@ def __init__(
         mixed_types: bool = True,
         trainable: Union[bool, list[bool]] = True,
         seed: Optional[int] = None,
-        **kwargs,
+        default_fparam: Optional[list] = None,
+        **kwargs: Any,
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
@@ -111,6 +113,7 @@ def __init__(
             mixed_types=mixed_types,
             trainable=trainable,
             seed=seed,
+            default_fparam=default_fparam,
             **kwargs,
         )
 
@@ -135,7 +138,7 @@ def get_intensive(self) -> bool:
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 4, 1)
+        check_version_compatibility(data.pop("@version", 1), 5, 1)
         data.pop("dim_out")
         data["property_name"] = data.pop("var_name")
         obj = super().deserialize(data)
@@ -150,7 +153,7 @@ def serialize(self) -> dict:
             "task_dim": self.task_dim,
             "intensive": self.intensive,
         }
-        dd["@version"] = 4
+        dd["@version"] = 5
 
         return dd
 

From c471471929c0c3b71fb116621cfdfd309c8a2149 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 4 Sep 2025 19:53:30 +0800
Subject: [PATCH 18/27] add argcheck for default_fparam

---
 deepmd/dpmodel/fitting/property_fitting.py | 18 ++++++++++
 deepmd/utils/argcheck.py                   | 40 ++++++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
index 944b1f7958..dbd415bde1 100644
--- a/deepmd/dpmodel/fitting/property_fitting.py
+++ b/deepmd/dpmodel/fitting/property_fitting.py
@@ -12,6 +12,10 @@
 from deepmd.dpmodel.fitting.invar_fitting import (
     InvarFitting,
 )
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+    OutputVariableDef,
+)
 from deepmd.utils.version import (
     check_version_compatibility,
 )
@@ -113,6 +117,20 @@ def __init__(
             default_fparam=default_fparam,
         )
 
+    def output_def(self) -> FittingOutputDef:
+        return FittingOutputDef(
+            [
+                OutputVariableDef(
+                    self.var_name,
+                    [self.dim_out],
+                    reducible=True,
+                    r_differentiable=False,
+                    c_differentiable=False,
+                    intensive=self.intensive,
+                ),
+            ]
+        )
+
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 7f448fa1ee..8b0f101d3c 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1782,6 +1782,7 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
 def fitting_ener():
     doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
     doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
@@ -1810,6 +1811,13 @@ def fitting_ener():
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument("default_fparam", list, optional=True, default=None),
+        Argument(
+            "default_fparam",
+            list[float],
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_default_fparam,
+        ),
         Argument(
             "dim_case_embd",
             int,
@@ -1867,6 +1875,7 @@ def fitting_ener():
 def fitting_dos():
     doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
     doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
@@ -1884,6 +1893,13 @@ def fitting_dos():
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "default_fparam",
+            list[float],
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_default_fparam,
+        ),
         Argument(
             "dim_case_embd",
             int,
@@ -1922,6 +1938,7 @@ def fitting_dos():
 def fitting_property():
     doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
     doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built"
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
@@ -1937,6 +1954,13 @@ def fitting_property():
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "default_fparam",
+            list[float],
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_default_fparam,
+        ),
         Argument(
             "dim_case_embd",
             int,
@@ -1984,6 +2008,7 @@ def fitting_property():
 def fitting_polar():
     doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
     doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
@@ -2013,6 +2038,13 @@ def fitting_polar():
             default=0,
             doc=doc_only_pt_supported + doc_numb_aparam,
         ),
+        Argument(
+            "default_fparam",
+            list[float],
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_default_fparam,
+        ),
         Argument(
             "dim_case_embd",
             int,
@@ -2062,6 +2094,7 @@ def fitting_polar():
 def fitting_dipole():
     doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
     doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
@@ -2084,6 +2117,13 @@ def fitting_dipole():
             default=0,
             doc=doc_only_pt_supported + doc_numb_aparam,
         ),
+        Argument(
+            "default_fparam",
+            list[float],
+            optional=True,
+            default=None,
+            doc=doc_only_pt_supported + doc_default_fparam,
+        ),
         Argument(
             "dim_case_embd",
             int,

From b37360a7bb0028a62e9c47b98d6bb9f55d25b57c Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 22 Dec 2025 16:46:15 +0800
Subject: [PATCH 19/27] Update argcheck.py

---
 deepmd/utils/argcheck.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 8b0f101d3c..709c0daaec 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1810,7 +1810,6 @@ def fitting_ener():
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
-        Argument("default_fparam", list, optional=True, default=None),
         Argument(
             "default_fparam",
             list[float],

From 6c9026085e1a56cbfd9109bee6209c71e2f6918a Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Tue, 16 Dec 2025 23:13:33 +0800
Subject: [PATCH 20/27] add f_use_norm

---
 deepmd/pt/loss/ener.py   | 28 +++++++++++++++++++++-------
 deepmd/utils/argcheck.py |  6 ++++++
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 75efd0277f..e41b45fa5a 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -52,11 +52,11 @@ def __init__(
         limit_pref_gf: float = 0.0,
         numb_generalized_coord: int = 0,
         use_l1_all: bool = False,
-        inference=False,
-        use_huber=False,
-        use_default_pf=False,
-        huber_delta=0.01,
-        **kwargs,
+        inference: bool = False,
+        use_huber: bool = False,
+        use_default_pf: bool = False,
+        f_use_norm: bool = False,
+        huber_delta: float = 0.01,
     ) -> None:
         r"""Construct a layer to compute loss on energy, force and virial.
 
@@ -144,6 +144,9 @@ def __init__(
         self.inference = inference
         self.use_huber = use_huber
         self.huber_delta = huber_delta
+        self.f_use_norm = f_use_norm
+        if self.f_use_norm:
+            assert self.use_huber, "f_use_norm can only be True when use_huber is True."
         if self.use_huber and (
             self.has_pf or self.has_gf or self.relative_f is not None
         ):
@@ -278,9 +281,20 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
                     if not self.use_huber:
                         loss += (pref_f * l2_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
                     else:
+                        if not self.f_use_norm:
+                            huber_f_input1 = force_pred.reshape(-1)
+                            huber_f_input2 = force_label.reshape(-1)
+                        else:
+                            huber_f_input1 = torch.linalg.vector_norm(
+                                (force_label - force_pred).reshape(-1, 3),
+                                ord=2,
+                                dim=1,
+                                keepdim=True,
+                            )  # l2 norm mae
+                            huber_f_input2 = torch.zeros_like(huber_f_input1)
                         l_huber_loss = custom_huber_loss(
-                            force_pred.reshape(-1),
-                            force_label.reshape(-1),
+                            huber_f_input1,
+                            huber_f_input2,
                             delta=self.huber_delta,
                         )
                         loss += pref_f * l_huber_loss
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 709c0daaec..9b8ec058a8 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2740,6 +2740,12 @@ def loss_ener():
             default=False,
             doc=doc_use_huber,
         ),
+        Argument(
+            "f_use_norm",
+            bool,
+            optional=True,
+            default=False,
+        ),
         Argument(
             "huber_delta",
             float,

From c39433bc89d2bbf6f46a972157b78280f16baf9c Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 18 Dec 2025 18:53:21 +0800
Subject: [PATCH 21/27] new mae

---
 deepmd/pt/loss/ener.py   | 70 ++++++++++++++++++++++++++--------------
 deepmd/utils/argcheck.py |  6 ++++
 2 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index e41b45fa5a..be9138b606 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -146,7 +146,9 @@ def __init__(
         self.huber_delta = huber_delta
         self.f_use_norm = f_use_norm
         if self.f_use_norm:
-            assert self.use_huber, "f_use_norm can only be True when use_huber is True."
+            assert self.use_huber or self.use_l1_all, (
+                "f_use_norm can only be True when use_huber or use_l1_all is True."
+            )
         if self.use_huber and (
             self.has_pf or self.has_gf or self.relative_f is not None
         ):
@@ -233,15 +235,11 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
                 l1_ener_loss = F.l1_loss(
                     energy_pred.reshape(-1),
                     energy_label.reshape(-1),
-                    reduction="sum",
+                    reduction="mean",
                 )
-                loss += pref_e * l1_ener_loss
+                loss += atom_norm * (pref_e * l1_ener_loss)
                 more_loss["mae_e"] = self.display_if_exist(
-                    F.l1_loss(
-                        energy_pred.reshape(-1),
-                        energy_label.reshape(-1),
-                        reduction="mean",
-                    ).detach(),
+                    l1_ener_loss.detach() * atom_norm,
                     find_energy,
                 )
                 # more_loss['log_keys'].append('rmse_e')
@@ -303,11 +301,21 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
                         rmse_f.detach(), find_force
                     )
                 else:
-                    l1_force_loss = F.l1_loss(force_label, force_pred, reduction="none")
+                    l1_force_loss = F.l1_loss(
+                        force_label.reshape(-1),
+                        force_pred.reshape(-1),
+                        reduction="mean",
+                    )
                     more_loss["mae_f"] = self.display_if_exist(
-                        l1_force_loss.mean().detach(), find_force
+                        l1_force_loss.detach(), find_force
                     )
-                    l1_force_loss = l1_force_loss.sum(-1).mean(-1).sum()
+                    if self.f_use_norm:
+                        l1_force_loss = torch.linalg.vector_norm(
+                            (force_label - force_pred).reshape(-1, 3),
+                            ord=2,
+                            dim=1,
+                            keepdim=True,
+                        ).mean()  # l2 norm mae
                     loss += (pref_f * l1_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
                 if mae:
                     mae_f = torch.mean(torch.abs(diff_f))
@@ -362,22 +370,36 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
             find_virial = label.get("find_virial", 0.0)
             pref_v = pref_v * find_virial
             diff_v = label["virial"] - model_pred["virial"].reshape(-1, 9)
-            l2_virial_loss = torch.mean(torch.square(diff_v))
-            if not self.inference:
-                more_loss["l2_virial_loss"] = self.display_if_exist(
-                    l2_virial_loss.detach(), find_virial
+            if not self.use_l1_all:
+                l2_virial_loss = torch.mean(torch.square(diff_v))
+                if not self.inference:
+                    more_loss["l2_virial_loss"] = self.display_if_exist(
+                        l2_virial_loss.detach(), find_virial
+                    )
+                if not self.use_huber:
+                    loss += atom_norm * (pref_v * l2_virial_loss)
+                else:
+                    l_huber_loss = custom_huber_loss(
+                        atom_norm * model_pred["virial"].reshape(-1),
+                        atom_norm * label["virial"].reshape(-1),
+                        delta=self.huber_delta,
+                    )
+                    loss += pref_v * l_huber_loss
+                rmse_v = l2_virial_loss.sqrt() * atom_norm
+                more_loss["rmse_v"] = self.display_if_exist(
+                    rmse_v.detach(), find_virial
                 )
-            if not self.use_huber:
-                loss += atom_norm * (pref_v * l2_virial_loss)
             else:
-                l_huber_loss = custom_huber_loss(
-                    atom_norm * model_pred["virial"].reshape(-1),
-                    atom_norm * label["virial"].reshape(-1),
-                    delta=self.huber_delta,
+                l1_virial_loss = F.l1_loss(
+                    label["virial"].reshape(-1),
+                    model_pred["virial"].reshape(-1),
+                    reduction="mean",
+                )
+                loss += atom_norm * (pref_v * l1_virial_loss)
+                more_loss["mae_v"] = self.display_if_exist(
+                    l1_virial_loss.detach() * atom_norm,
+                    find_virial,
                 )
-                loss += pref_v * l_huber_loss
-            rmse_v = l2_virial_loss.sqrt() * atom_norm
-            more_loss["rmse_v"] = self.display_if_exist(rmse_v.detach(), find_virial)
             if mae:
                 mae_v = torch.mean(torch.abs(diff_v)) * atom_norm
                 more_loss["mae_v"] = self.display_if_exist(mae_v.detach(), find_virial)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 9b8ec058a8..bbb0c01a4c 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -2746,6 +2746,12 @@ def loss_ener():
             optional=True,
             default=False,
         ),
+        Argument(
+            "use_l1_all",
+            bool,
+            optional=True,
+            default=False,
+        ),
         Argument(
             "huber_delta",
             float,

From 4e9e5ea3bdae5403391da1b8773c457a5363dc6e Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 22 Dec 2025 17:25:22 +0800
Subject: [PATCH 22/27] Update ener.py

---
 deepmd/pt/loss/ener.py | 44 +++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index be9138b606..367d5c9092 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -330,16 +330,42 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
                 )
                 pref_pf = pref_pf * find_atom_pref
                 atom_pref_reshape = atom_pref.reshape(-1)
-                l2_pref_force_loss = (torch.square(diff_f) * atom_pref_reshape).mean()
-                if not self.inference:
-                    more_loss["l2_pref_force_loss"] = self.display_if_exist(
-                        l2_pref_force_loss.detach(), find_atom_pref
+                if not self.use_l1_all:
+                    l2_pref_force_loss = (
+                        torch.square(diff_f) * atom_pref_reshape
+                    ).mean()
+                    if not self.inference:
+                        more_loss["l2_pref_force_loss"] = self.display_if_exist(
+                            l2_pref_force_loss.detach(), find_atom_pref
+                        )
+                    if not self.use_huber:
+                        loss += (pref_pf * l2_pref_force_loss).to(
+                            GLOBAL_PT_FLOAT_PRECISION
+                        )
+                    else:
+                        l_huber_loss = custom_huber_loss(
+                            (atom_pref * force_pred).reshape(-1),
+                            (atom_pref * force_label).reshape(-1),
+                            delta=self.huber_delta,
+                        )
+                        loss += pref_pf * l_huber_loss
+                    rmse_pf = l2_pref_force_loss.sqrt()
+                    more_loss["rmse_pf"] = self.display_if_exist(
+                        rmse_pf.detach(), find_atom_pref
                     )
-                loss += (pref_pf * l2_pref_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
-                rmse_pf = l2_pref_force_loss.sqrt()
-                more_loss["rmse_pf"] = self.display_if_exist(
-                    rmse_pf.detach(), find_atom_pref
-                )
+                else:
+                    l1_pref_force_loss = (torch.abs(diff_f) * atom_pref_reshape).mean()
+                    more_loss["mae_f"] = self.display_if_exist(
+                        l1_pref_force_loss.detach(), find_atom_pref
+                    )
+                    if self.f_use_norm:
+                        l1_pref_force_loss = torch.linalg.vector_norm(
+                            (diff_f * atom_pref_reshape).reshape(-1, 3),
+                            ord=2,
+                            dim=1,
+                            keepdim=True,
+                        ).mean()  # l2 norm mae
+                    loss += (pref_pf * l1_pref_force_loss).to(GLOBAL_PT_FLOAT_PRECISION)
 
             if self.has_gf and "drdq" in label:
                 drdq = label["drdq"]

From 5808b2e19a8e37f97d03219e1802c0c34284b693 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 22 Dec 2025 17:44:35 +0800
Subject: [PATCH 23/27] Update ener.py

---
 deepmd/pt/loss/ener.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 367d5c9092..00a352424e 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Any,
     Optional,
 )
 
@@ -57,6 +58,7 @@ def __init__(
         use_default_pf: bool = False,
         f_use_norm: bool = False,
         huber_delta: float = 0.01,
+        **kwargs: Any,
     ) -> None:
         r"""Construct a layer to compute loss on energy, force and virial.
 

From ac5fbf92749b4e8bddd4e428a0760001ca14f6a8 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Mon, 17 Nov 2025 15:56:21 +0800
Subject: [PATCH 24/27] fix fitting fparam stat

---
 .../pt/model/atomic_model/dp_atomic_model.py  |  9 ++++++++
 deepmd/pt/model/task/fitting.py               | 21 ++++++++++++++-----
 deepmd/pt/train/wrapper.py                    | 20 +++++++++++++++---
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
index cee7aaf2f9..832c2ee9f6 100644
--- a/deepmd/pt/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -328,6 +328,15 @@ def wrapped_sampler():
                 atom_exclude_types = self.atom_excl.get_exclude_types()
                 for sample in sampled:
                     sample["atom_exclude_types"] = list(atom_exclude_types)
+            if (
+                "find_fparam" not in sampled[0]
+                and "fparam" not in sampled[0]
+                and self.has_default_fparam()
+            ):
+                default_fparam = self.get_default_fparam()
+                for sample in sampled:
+                    nframe = sample["atype"].shape[0]
+                    sample["fparam"] = default_fparam.repeat(nframe, 1)
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 12efb7c1f6..7ed071c771 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -63,7 +63,12 @@ def __new__(cls, *args, **kwargs):
         return super().__new__(cls)
 
     def share_params(
-        self, base_class, shared_level, model_prob=1.0, protection=1e-2, resume=False
+        self,
+        base_class: "Fitting",
+        shared_level: int,
+        model_prob: float = 1.0,
+        protection: float = 1e-2,
+        resume: bool = False,
     ) -> None:
         """
         Share the parameters of self to the base_class with shared_level during multitask training.
@@ -132,7 +137,6 @@ def share_params(
                     )
                 self.aparam_avg = base_class.aparam_avg
                 self.aparam_inv_std = base_class.aparam_inv_std
-
             # the following will successfully link all the params except buffers, which need manually link.
             for item in self._modules:
                 self._modules[item] = base_class._modules[item]
@@ -261,7 +265,11 @@ def compute_input_stats(
 
         # stat fparam
         if self.numb_fparam > 0:
-            if stat_file_path is not None and stat_file_path.is_dir():
+            if (
+                stat_file_path is not None
+                and stat_file_path.is_dir()
+                and (stat_file_path / "fparam").is_file()
+            ):
                 self.restore_fparam_from_file(stat_file_path)
             else:
                 sampled = merged() if callable(merged) else merged
@@ -292,10 +300,13 @@ def compute_input_stats(
             log.info(f"fparam_avg is {fparam_avg}, fparam_inv_std is {fparam_inv_std}")
             self.fparam_avg.copy_(to_torch_tensor(fparam_avg))
             self.fparam_inv_std.copy_(to_torch_tensor(fparam_inv_std))
-
         # stat aparam
         if self.numb_aparam > 0:
-            if stat_file_path is not None and stat_file_path.is_dir():
+            if (
+                stat_file_path is not None
+                and stat_file_path.is_dir()
+                and (stat_file_path / "aparam").is_file()
+            ):
                 self.restore_aparam_from_file(stat_file_path)
             else:
                 sampled = merged() if callable(merged) else merged
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 4ac86d43d6..cdef1e8533 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -59,7 +60,13 @@ def __init__(
                     self.loss[task_key] = loss[task_key]
         self.inference_only = self.loss is None
 
-    def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2, resume=False) -> None:
+    def share_params(
+        self,
+        shared_links: dict[str, Any],
+        model_key_prob_map: dict,
+        data_stat_protect: float = 1e-2,
+        resume: bool = False,
+    ) -> None:
         """
         Share the parameters of classes following rules defined in shared_links during multitask training.
         If not start from checkpoint (resume is False),
@@ -129,9 +136,16 @@ def share_params(self, shared_links, model_key_prob_map, data_stat_protect=1e-2,
                         link_class = self.model[
                             model_key_link
                         ].atomic_model.__getattr__(class_type_link)
-                        frac_prob = model_key_prob_map[model_key_link]/model_key_prob_map[model_key_base]
+                        frac_prob = (
+                            model_key_prob_map[model_key_link]
+                            / model_key_prob_map[model_key_base]
+                        )
                         link_class.share_params(
-                            base_class, shared_level_link, model_prob=frac_prob, protection=data_stat_protect, resume=resume
+                            base_class,
+                            shared_level_link,
+                            model_prob=frac_prob,
+                            protection=data_stat_protect,
+                            resume=resume,
                         )
                         log.warning(
                             f"Shared params of {model_key_base}.{class_type_base} and {model_key_link}.{class_type_link}!"

From 4cc677d6adf4fa1fd6202fbc6008bbd6bd0fe21f Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 16 Jan 2026 20:38:06 +0800
Subject: [PATCH 25/27] fix huber with atom_pref

---
 deepmd/pt/loss/ener.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepmd/pt/loss/ener.py b/deepmd/pt/loss/ener.py
index 00a352424e..6ea48318da 100644
--- a/deepmd/pt/loss/ener.py
+++ b/deepmd/pt/loss/ener.py
@@ -346,8 +346,8 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
                         )
                     else:
                         l_huber_loss = custom_huber_loss(
-                            (atom_pref * force_pred).reshape(-1),
-                            (atom_pref * force_label).reshape(-1),
+                            atom_pref_reshape * force_pred.reshape(-1),
+                            atom_pref_reshape * force_label.reshape(-1),
                             delta=self.huber_delta,
                         )
                         loss += pref_pf * l_huber_loss

From b4c8b6062eb6caee37a4e77ab45fdc4e673c8bd4 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Wed, 25 Mar 2026 16:50:27 +0800
Subject: [PATCH 26/27] fix has_default_fparam when dos or property

---
 deepmd/entrypoints/test.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index 69b1704471..a472387eab 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -678,7 +678,11 @@ def test_dos(
 
     if dp.get_dim_fparam() > 0:
         data.add(
-            "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
+            "fparam",
+            dp.get_dim_fparam(),
+            atomic=False,
+            must=not dp.has_default_fparam(),
+            high_prec=False,
         )
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
@@ -846,7 +850,11 @@ def test_property(
 
     if dp.get_dim_fparam() > 0:
         data.add(
-            "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
+            "fparam",
+            dp.get_dim_fparam(),
+            atomic=False,
+            must=not dp.has_default_fparam(),
+            high_prec=False,
         )
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)

From ad98d0dff07954a4c720184a2dd91b532b7a88c2 Mon Sep 17 00:00:00 2001
From: Yuxiang Liu <liuyuxiang92@gmail.com>
Date: Wed, 25 Mar 2026 09:21:58 +0000
Subject: [PATCH 27/27] feat: Add softmax to property for cooh nframes=3

---
 deepmd/infer/deep_property.py | 11 +++++++++++
 deepmd/pt/loss/property.py    | 21 +++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/deepmd/infer/deep_property.py b/deepmd/infer/deep_property.py
index 5944491cc0..ddd76828bf 100644
--- a/deepmd/infer/deep_property.py
+++ b/deepmd/infer/deep_property.py
@@ -139,6 +139,17 @@ def eval(
         atomic_property = results[self.get_var_name()].reshape(
             nframes, natoms, self.get_task_dim()
         )
+        # --- softmax-weighted averaging over frames (minimal) ---
+        print(f"Nframes == {nframes}")
+        if nframes != 3:
+            raise RuntimeError(f"Expected nframes == 3, got {nframes}")
+        scores = property.mean(axis=1)                           # (3,)
+        # If you want to favor *smaller* values (e.g., energies), use: scores = -scores
+        w = np.exp(scores - scores.max()); w /= w.sum()          # (3,)
+        avg = (w[:, None] * property).sum(axis=0, keepdims=True) # (1, D)
+        property[:] = np.repeat(avg, nframes, axis=0)            # (3, D)
+        # --------------------------------------------------------
+
         property = results[f"{self.get_var_name()}_redu"].reshape(
             nframes, self.get_task_dim()
         )
diff --git a/deepmd/pt/loss/property.py b/deepmd/pt/loss/property.py
index 9d42c81b45..a801d9de23 100644
--- a/deepmd/pt/loss/property.py
+++ b/deepmd/pt/loss/property.py
@@ -91,6 +91,27 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
         """
         model_pred = model(**input_dict)
         var_name = self.var_name
+
+        # ---- Softmax-weighted averaging over the batch added by YL----
+        # model_pred[var_name]: (nbz, task_dim)
+        # 1) get a scalar score per sample (mean over task_dim)
+        #    (If you want to favor smaller values, use `score_per_sample = -model_pred[var_name].mean(dim=1)`.)
+        score_per_sample = model_pred[var_name].mean(dim=1)             # (nbz,)
+        weights = F.softmax(score_per_sample, dim=0)                    # (nbz,)
+        # 2) weighted average vector (1, task_dim)
+        avg_vec = (weights.unsqueeze(1) * model_pred[var_name]).sum(dim=0, keepdim=True)
+        # 3) replace all predictions with the averaged vector (broadcast over batch)
+        model_pred[var_name] = avg_vec.expand_as(model_pred[var_name])
+        # ----------------------------------------------------
+
+        nbz = model_pred[var_name].shape[0]
+        #=======Raise error when nbz!=3=======
+        if nbz != 3:
+            raise RuntimeError(
+                f"[PropertyLoss] Expected batch size nbz == 3 for softmax-avg, got nbz == {nbz}. "
+                "Ensure your DataLoader yields triples (batch_size=3, drop_last=True)."
+            )   
+
         nbz = model_pred[var_name].shape[0]
         assert model_pred[var_name].shape == (nbz, self.task_dim)
         assert label[var_name].shape == (nbz, self.task_dim)