diff --git a/deepmd/__about__.py b/deepmd/__about__.py
new file mode 100644
index 0000000000..0d2f7d41b3
--- /dev/null
+++ b/deepmd/__about__.py
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Auto-generated stub for development use
+__version__ = "dev"
diff --git a/deepmd/dpmodel/model/model.py b/deepmd/dpmodel/model/model.py
index 8f96e965b0..220d1f4464 100644
--- a/deepmd/dpmodel/model/model.py
+++ b/deepmd/dpmodel/model/model.py
@@ -104,13 +104,14 @@ def get_standard_model(data: dict) -> EnergyModel:
     else:
         raise RuntimeError(f"Unknown fitting type: {fitting_net_type}")
 
-    model = modelcls(
-        descriptor=descriptor,
-        fitting=fitting,
-        type_map=data["type_map"],
-        atom_exclude_types=atom_exclude_types,
-        pair_exclude_types=pair_exclude_types,
-    )
+    model_kwargs: dict = {
+        "descriptor": descriptor,
+        "fitting": fitting,
+        "type_map": data["type_map"],
+        "atom_exclude_types": atom_exclude_types,
+        "pair_exclude_types": pair_exclude_types,
+    }
+    model = modelcls(**model_kwargs)
     return model
 
 
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index 4a0cb27cb1..0773023d61 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -887,6 +887,8 @@ def test_property(
             high_prec=True,
         )
 
+    is_xas = var_name == "xas"
+
     if dp.get_dim_fparam() > 0:
         data.add(
             "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
@@ -894,6 +896,10 @@ def test_property(
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
 
+    # XAS requires sel_type.npy (per-frame absorbing element type index)
+    if is_xas:
+        data.add("sel_type", 1, atomic=False, must=True, high_prec=False)
+
     test_data = data.get_test()
     mixed_type = data.mixed_type
     natoms = len(test_data["type"][0])
@@ -918,21 +924,57 @@ def test_property(
     else:
         aparam = None
 
+    # XAS: per-atom outputs are needed to average over absorbing-element atoms
+    eval_atomic = has_atom_property or is_xas
     ret = dp.eval(
         coord,
         box,
         atype,
         fparam=fparam,
         aparam=aparam,
-        atomic=has_atom_property,
+        atomic=eval_atomic,
         mixed_type=mixed_type,
     )
 
-    property = ret[0]
+    if is_xas:
+        # ret[1]: per-atom property [numb_test, natoms, task_dim]
+        atom_prop = ret[1].reshape([numb_test, natoms, dp.task_dim])
+        if mixed_type:
+            atype_frames = atype  # [numb_test, natoms]
+        else:
+            atype_frames = np.tile(atype, (numb_test, 1))  # [numb_test, natoms]
+        sel_type_int = test_data["sel_type"][:numb_test, 0].astype(int)
+        property = np.zeros([numb_test, dp.task_dim], dtype=atom_prop.dtype)
+        for i in range(numb_test):
+            t = sel_type_int[i]
+            mask = atype_frames[i] == t  # [natoms]
+            count = max(mask.sum(), 1)
+            property[i] = atom_prop[i][mask].sum(axis=0) / count
+
+        # Add back the per-(type, edge) energy reference so output is in
+        # absolute eV (matching label format).  xas_e_ref is saved in the
+        # model checkpoint by XASLoss.compute_output_stats.
+        try:
+            xas_e_ref = dp.dp.model["Default"].atomic_model.xas_e_ref
+        except AttributeError:
+            xas_e_ref = None
+        if xas_e_ref is not None and fparam is not None:
+            import torch as _torch
+
+            edge_idx_all = (
+                _torch.tensor(fparam.reshape(numb_test, -1)).argmax(dim=-1).numpy()
+            )
+            e_ref_np = xas_e_ref.cpu().numpy()  # [ntypes, nfparam, 2]
+            for i in range(numb_test):
+                t = sel_type_int[i]
+                e = int(edge_idx_all[i])
+                property[i, :2] += e_ref_np[t, e]
+    else:
+        property = ret[0]
 
     property = property.reshape([numb_test, dp.task_dim])
 
-    if has_atom_property:
+    if has_atom_property and not is_xas:
         aproperty = ret[1]
         aproperty = aproperty.reshape([numb_test, natoms * dp.task_dim])
 
diff --git a/deepmd/pt/loss/__init__.py b/deepmd/pt/loss/__init__.py
index 1d25c1e52f..17b2cd37c3 100644
--- a/deepmd/pt/loss/__init__.py
+++ b/deepmd/pt/loss/__init__.py
@@ -21,6 +21,9 @@
 from .tensor import (
     TensorLoss,
 )
+from .xas import (
+    XASLoss,
+)
 
 __all__ = [
     "DOSLoss",
@@ -31,4 +34,5 @@
     "PropertyLoss",
     "TaskLoss",
     "TensorLoss",
+    "XASLoss",
 ]
diff --git a/deepmd/pt/loss/xas.py b/deepmd/pt/loss/xas.py
new file mode 100644
index 0000000000..2091f5e04c
--- /dev/null
+++ b/deepmd/pt/loss/xas.py
@@ -0,0 +1,321 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from collections import (
+    defaultdict,
+)
+from typing import (
+    Any,
+)
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from deepmd.pt.loss.loss import (
+    TaskLoss,
+)
+from deepmd.pt.utils import (
+    env,
+)
+from deepmd.utils.data import (
+    DataRequirementItem,
+)
+
+log = logging.getLogger(__name__)
+
+
+class XASLoss(TaskLoss):
+    """Loss for XAS spectrum fitting via property fitting + sel_type reduction.
+
+    The model outputs per-atom property vectors (atom_xas).  For each frame
+    this loss selects the atoms of type ``sel_type`` (read from ``sel_type.npy``
+    in each training system) and takes their mean, then computes a loss against
+    the per-frame XAS label.
+
+    Energy normalization
+    --------------------
+    XAS labels contain absolute edge energies (E_min, E_max in eV) that vary
+    enormously across element-edge pairs (H_K ~14 eV, Th_K ~110000 eV).
+    Training directly on absolute values causes gradient instability because
+    the energy dimensions dwarf the intensity dimensions.
+
+    ``compute_output_stats`` computes a reference energy ``e_ref[t, e]`` for
+    every ``(absorbing_type t, edge_index e)`` combination from the training
+    data and stores it as a registered buffer.  During training, ``forward``
+    normalises labels and predictions by subtracting the per-frame reference
+    so that the loss is computed on chemical shifts (±few eV) and normalised
+    intensities—quantities of comparable magnitude.
+
+    The buffer is saved in the model checkpoint, eliminating any need for
+    external normalisation files.
+
+    Parameters
+    ----------
+    task_dim : int
+        Output dimension of the fitting net (e.g. 102 = E_min + E_max + 100 pts).
+    ntypes : int
+        Number of atom types in the model.
+    nfparam : int
+        Length of the fparam one-hot vector (= number of edge types).
+    var_name : str
+        Property name, must match ``property_name`` in the fitting config.
+    loss_func : str
+        One of ``smooth_mae``, ``mae``, ``mse``, ``rmse``.
+    metric : list[str]
+        Metrics to display during training.
+    beta : float
+        Beta parameter for smooth_l1 loss.
+    """
+
+    def __init__(
+        self,
+        task_dim: int,
+        ntypes: int,
+        nfparam: int,
+        var_name: str = "xas",
+        loss_func: str = "smooth_mae",
+        metric: list[str] = ["mae"],
+        beta: float = 1.0,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__()
+        self.task_dim = task_dim
+        self.ntypes = ntypes
+        self.nfparam = nfparam
+        self.var_name = var_name
+        self.loss_func = loss_func
+        self.metric = metric
+        self.beta = beta
+
+        # e_ref[sel_type_idx, edge_idx, 0] = mean E_min  (eV)
+        # e_ref[sel_type_idx, edge_idx, 1] = mean E_max  (eV)
+        # Shape: [ntypes, nfparam, 2]. Filled by compute_output_stats; zero until then.
+        self.register_buffer(
+            "e_ref",
+            torch.zeros(ntypes, nfparam, 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION),
+        )
+
+    # ------------------------------------------------------------------
+    # Stat phase: compute per-(absorbing_type, edge) reference energies
+    # ------------------------------------------------------------------
+    def compute_output_stats(
+        self,
+        sampled: list[dict],
+        model: "torch.nn.Module | None" = None,
+    ) -> None:
+        """Compute ``e_ref`` and fix model energy-dim bias/std.
+
+        Called once before training starts.  Requires ``xas``, ``sel_type``,
+        and ``fparam`` in at least some samples.
+
+        Parameters
+        ----------
+        sampled : list[dict]
+            List of data batches from ``make_stat_input``.
+        model : nn.Module, optional
+            The full DeePMD model.  When given, the per-atom property model's
+            ``out_bias`` and ``out_std`` for the two energy dimensions (E_min,
+            E_max) are reset to 0 / 1 so the NN predicts *chemical shifts*
+            (±few eV) instead of absolute energies (~thousands of eV).
+            Without this reset the stat-initialised ``out_std ≈ 26 000 eV``
+            amplifies weight-update steps by 26 000×, causing immediate
+            gradient explosion.
+        """
+        accum: dict[tuple[int, int], list] = defaultdict(list)
+
+        for frame in sampled:
+            if (
+                self.var_name not in frame
+                or "sel_type" not in frame
+                or "fparam" not in frame
+            ):
+                continue
+            xas = frame[self.var_name]  # tensor, various shapes
+            sel_type = frame["sel_type"]
+            fparam = frame["fparam"]
+
+            # flatten to [nf, task_dim], [nf], [nf, nfparam]
+            xas = xas.reshape(-1, self.task_dim)
+            sel_type = sel_type.reshape(-1).long()
+            fparam = fparam.reshape(-1, self.nfparam)
+            edge_idx = fparam.argmax(dim=-1)
+
+            nf = xas.shape[0]
+            for i in range(nf):
+                t = int(sel_type[i].item())
+                e = int(edge_idx[i].item())
+                if 0 <= t < self.ntypes and 0 <= e < self.nfparam:
+                    accum[(t, e)].append(xas[i, :2].detach().cpu().numpy())
+
+        if not accum:
+            log.warning(
+                "XASLoss.compute_output_stats: no frames with xas+sel_type+fparam found; "
+                "e_ref remains zero. Training may be unstable."
+            )
+            return
+
+        e_ref = torch.zeros(
+            self.ntypes, self.nfparam, 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        for (t, e), vals in accum.items():
+            e_ref[t, e] = torch.tensor(
+                np.mean(vals, axis=0), dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+            log.info(
+                f"XASLoss e_ref: type={t}, edge={e} -> "
+                f"E_min_ref={float(e_ref[t, e, 0]):.2f} eV, "
+                f"E_max_ref={float(e_ref[t, e, 1]):.2f} eV  "
+                f"(n={len(vals)})"
+            )
+
+        self.e_ref.copy_(e_ref)
+        log.info(
+            f"XASLoss: e_ref computed for {len(accum)} (sel_type, edge) combinations."
+        )
+
+        if model is not None:
+            try:
+                am = model.atomic_model
+
+                # 1. Copy e_ref into the model's own buffer so it is saved
+                #    in the checkpoint and available at inference time without
+                #    any external reference file (analogous to out_bias).
+                if getattr(am, "xas_e_ref", None) is not None:
+                    am.xas_e_ref.copy_(e_ref.to(am.xas_e_ref.dtype))
+                    log.info("XASLoss: copied e_ref → model.atomic_model.xas_e_ref.")
+
+                # 2. Reset energy-dim out_bias/out_std so the NN predicts
+                #    chemical shifts instead of absolute energies.
+                #
+                #    Why this is necessary
+                #    ----------------------
+                #    The model stat phase initialises
+                #      out_bias[:, :2] ≈ global_mean(E_min, E_max) ≈ 19 000 eV
+                #      out_std[:, :2]  ≈ global_std(E_min, E_max)  ≈ 26 000 eV
+                #    so  atom_xas[:, 0] = NN_raw[:, 0] * 26 000 + 19 000.
+                #    A single Adam step changes NN_raw by ~lr, which changes
+                #    the physical output by lr × 26 000 = 2.7 eV — the same
+                #    instability as out_bias for energy fitting if the reference
+                #    is wrong.  With out_std=1 / out_bias=0, the NN output for
+                #    energy dims is interpreted directly as a chemical shift
+                #    (target ≈ label − e_ref ≈ ±few eV), keeping gradient
+                #    magnitudes O(1) and training stable.
+                key_idx = am.bias_keys.index(self.var_name)
+                with torch.no_grad():
+                    am.out_bias[key_idx, :, :2] = 0.0
+                    am.out_std[key_idx, :, :2] = 1.0
+                log.info(
+                    "XASLoss: reset out_bias[:,:2]=0 and out_std[:,:2]=1 "
+                    "for energy dims (model predicts chemical shifts; "
+                    "xas_e_ref restores absolute energies at inference)."
+                )
+            except Exception as exc:
+                log.warning(f"XASLoss: could not update model energy-dim stats: {exc}")
+
+    # ------------------------------------------------------------------
+    # Forward
+    # ------------------------------------------------------------------
+    def forward(
+        self,
+        input_dict: dict[str, torch.Tensor],
+        model: torch.nn.Module,
+        label: dict[str, torch.Tensor],
+        natoms: int,
+        learning_rate: float = 0.0,
+        mae: bool = False,
+    ) -> tuple[dict[str, torch.Tensor], torch.Tensor, dict[str, torch.Tensor]]:
+        model_pred = model(**input_dict)
+
+        # per-atom outputs: [nf, nloc, task_dim]
+        atom_prop = model_pred[f"atom_{self.var_name}"]
+        atype = input_dict["atype"]  # [nf, nloc]
+
+        # sel_type from label: [nf, 1] float → [nf] int
+        sel_type = label["sel_type"][:, 0].long()
+
+        # element-wise mean: average atom_prop over atoms of sel_type per frame
+        nf, nloc, td = atom_prop.shape
+        pred = torch.zeros(nf, td, dtype=atom_prop.dtype, device=atom_prop.device)
+        for i in range(nf):
+            t = int(sel_type[i].item())
+            mask = (atype[i] == t).unsqueeze(-1)  # [nloc, 1]
+            count = mask.sum().clamp(min=1)
+            pred[i] = (atom_prop[i] * mask).sum(dim=0) / count
+
+        label_xas = label[self.var_name]  # [nf, task_dim]
+
+        # --- per-frame reference energy lookup ---
+        # edge_idx = argmax of one-hot fparam
+        fparam = input_dict.get("fparam")
+        if fparam is not None and fparam.numel() > 0:
+            edge_idx = fparam.reshape(nf, -1).argmax(dim=-1).clamp(0, self.nfparam - 1)
+        else:
+            edge_idx = torch.zeros(nf, dtype=torch.long, device=pred.device)
+
+        # e_ref_frame: [nf, 2]  (E_min_ref, E_max_ref for each frame)
+        # Indices must be on the same device as the buffer (handles CPU/GPU mismatch)
+        _dev = self.e_ref.device
+        e_ref_frame = self.e_ref[sel_type.to(_dev), edge_idx.to(_dev)].to(pred.device)
+
+        # Shift the energy-dim TARGETS only.
+        #
+        # After compute_output_stats has reset out_bias[:,:2]=0 / out_std[:,:2]=1,
+        # the model outputs raw NN values ≈ 0 for dims 0,1.  We train those
+        # dims against (label − e_ref), i.e. the chemical shift (±few eV),
+        # keeping gradient magnitudes O(1).  Intensity dims (2:) are trained
+        # against the original label values unchanged.
+        #
+        # At inference, we add e_ref back to get the absolute edge energy.
+        label_shifted = label_xas.clone()
+        label_shifted[:, :2] = label_xas[:, :2] - e_ref_frame
+
+        # --- loss ---
+        loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
+        if self.loss_func == "smooth_mae":
+            loss += F.smooth_l1_loss(
+                pred, label_shifted, reduction="sum", beta=self.beta
+            )
+        elif self.loss_func == "mae":
+            loss += F.l1_loss(pred, label_shifted, reduction="sum")
+        elif self.loss_func == "mse":
+            loss += F.mse_loss(pred, label_shifted, reduction="sum")
+        elif self.loss_func == "rmse":
+            loss += torch.sqrt(F.mse_loss(pred, label_shifted, reduction="mean"))
+        else:
+            raise RuntimeError(f"Unknown loss function: {self.loss_func}")
+
+        # --- metrics ---
+        more_loss: dict[str, torch.Tensor] = {}
+        if "mae" in self.metric:
+            more_loss["mae"] = F.l1_loss(pred, label_shifted, reduction="mean").detach()
+        if "rmse" in self.metric:
+            more_loss["rmse"] = torch.sqrt(
+                F.mse_loss(pred, label_shifted, reduction="mean")
+            ).detach()
+
+        # Absolute prediction: add e_ref back to energy dims for eval / output
+        pred_abs = pred.clone()
+        pred_abs[:, :2] = pred[:, :2] + e_ref_frame
+        model_pred[self.var_name] = pred_abs
+
+        return model_pred, loss, more_loss
+
+    @property
+    def label_requirement(self) -> list[DataRequirementItem]:
+        """Declare required data files: xas label + sel_type."""
+        return [
+            DataRequirementItem(
+                self.var_name,
+                ndof=self.task_dim,
+                atomic=False,
+                must=True,
+                high_prec=True,
+            ),
+            DataRequirementItem(
+                "sel_type",
+                ndof=1,
+                atomic=False,
+                must=True,
+                high_prec=False,
+            ),
+        ]
diff --git a/deepmd/pt/model/atomic_model/__init__.py b/deepmd/pt/model/atomic_model/__init__.py
index 4da9bf781b..fbf7478778 100644
--- a/deepmd/pt/model/atomic_model/__init__.py
+++ b/deepmd/pt/model/atomic_model/__init__.py
@@ -41,6 +41,7 @@
 )
 from .property_atomic_model import (
     DPPropertyAtomicModel,
+    DPXASAtomicModel,
 )
 
 __all__ = [
@@ -51,6 +52,7 @@
     "DPEnergyAtomicModel",
     "DPPolarAtomicModel",
     "DPPropertyAtomicModel",
+    "DPXASAtomicModel",
     "DPZBLLinearEnergyAtomicModel",
     "LinearEnergyAtomicModel",
     "PairTabAtomicModel",
diff --git a/deepmd/pt/model/atomic_model/property_atomic_model.py b/deepmd/pt/model/atomic_model/property_atomic_model.py
index baf9c5b7fc..ffe3cc5fe6 100644
--- a/deepmd/pt/model/atomic_model/property_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/property_atomic_model.py
@@ -52,3 +52,28 @@ def apply_out_stat(
         for kk in self.bias_keys:
             ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
         return ret
+
+
+class DPXASAtomicModel(DPPropertyAtomicModel):
+    """Atomic model for XAS spectrum fitting.
+
+    Extends :class:`DPPropertyAtomicModel` with a per-(absorbing_type, edge)
+    energy reference buffer ``xas_e_ref`` [ntypes, nfparam, 2].  The buffer is
+    populated by :meth:`deepmd.pt.loss.xas.XASLoss.compute_output_stats` before
+    training starts and is saved in the model checkpoint so that absolute edge
+    energies can be reconstructed at inference time without any external files.
+    """
+
+    def __init__(
+        self, descriptor: Any, fitting: Any, type_map: Any, **kwargs: Any
+    ) -> None:
+        super().__init__(descriptor, fitting, type_map, **kwargs)
+        nfparam: int = getattr(fitting, "numb_fparam", 0)
+        if nfparam > 0:
+            ntypes: int = len(type_map)
+            self.register_buffer(
+                "xas_e_ref",
+                torch.zeros(ntypes, nfparam, 2, dtype=torch.float64),
+            )
+        else:
+            self.xas_e_ref: torch.Tensor | None = None
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
index 24075412db..f577e4f0cf 100644
--- a/deepmd/pt/model/model/__init__.py
+++ b/deepmd/pt/model/model/__init__.py
@@ -73,6 +73,9 @@
     SpinEnergyModel,
     SpinModel,
 )
+from .xas_model import (
+    XASModel,
+)
 
 
 def _get_standard_model_components(model_params: dict, ntypes: int) -> tuple:
@@ -269,19 +272,23 @@ def get_standard_model(model_params: dict) -> BaseModel:
     elif fitting_net_type in ["ener", "direct_force_ener"]:
         modelcls = EnergyModel
     elif fitting_net_type == "property":
-        modelcls = PropertyModel
+        property_name = model_params.get("fitting_net", {}).get(
+            "property_name", model_params.get("fitting_net", {}).get("var_name", "")
+        )
+        modelcls = XASModel if property_name == "xas" else PropertyModel
     else:
         raise RuntimeError(f"Unknown fitting type: {fitting_net_type}")
 
-    model = modelcls(
-        descriptor=descriptor,
-        fitting=fitting,
-        type_map=model_params["type_map"],
-        atom_exclude_types=atom_exclude_types,
-        pair_exclude_types=pair_exclude_types,
-        preset_out_bias=preset_out_bias,
-        data_stat_protect=data_stat_protect,
-    )
+    model_kwargs: dict[str, Any] = {
+        "descriptor": descriptor,
+        "fitting": fitting,
+        "type_map": model_params["type_map"],
+        "atom_exclude_types": atom_exclude_types,
+        "pair_exclude_types": pair_exclude_types,
+        "preset_out_bias": preset_out_bias,
+        "data_stat_protect": data_stat_protect,
+    }
+    model = modelcls(**model_kwargs)
     if model_params.get("hessian_mode"):
         model.enable_hessian()
     model.model_def_script = json.dumps(model_params_old)
@@ -315,6 +322,7 @@ def get_model(model_params: dict) -> Any:
     "PolarModel",
     "SpinEnergyModel",
     "SpinModel",
+    "XASModel",
     "get_model",
     "make_hessian_model",
     "make_model",
diff --git a/deepmd/pt/model/model/xas_model.py b/deepmd/pt/model/model/xas_model.py
new file mode 100644
index 0000000000..a18ce44c10
--- /dev/null
+++ b/deepmd/pt/model/model/xas_model.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+)
+
+from deepmd.pt.model.atomic_model import (
+    DPXASAtomicModel,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+
+from .property_model import (
+    PropertyModel,
+)
+
+
+@BaseModel.register("xas")
+class XASModel(PropertyModel):
+    """Model for XAS spectrum fitting.
+
+    Identical to :class:`PropertyModel` but uses :class:`DPXASAtomicModel`
+    as the underlying atomic model, which carries the per-(absorbing_type,
+    edge) energy reference buffer ``xas_e_ref`` in the checkpoint.  This
+    buffer is populated by :meth:`deepmd.pt.loss.xas.XASLoss.compute_output_stats`
+    before training starts and restored at inference time so that absolute
+    edge energies are available without any external reference files.
+    """
+
+    model_type = "xas"
+
+    def __init__(
+        self,
+        descriptor: Any,
+        fitting: Any,
+        type_map: Any,
+        **kwargs: Any,
+    ) -> None:
+        xas_atomic = DPXASAtomicModel(descriptor, fitting, type_map, **kwargs)
+        super().__init__(
+            descriptor, fitting, type_map, atomic_model_=xas_atomic, **kwargs
+        )
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 8d16e1c7ea..790cbff80e 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -43,6 +43,7 @@
     PropertyLoss,
     TaskLoss,
     TensorLoss,
+    XASLoss,
 )
 from deepmd.pt.model.model import (
     get_model,
@@ -94,9 +95,13 @@
     get_optimizer_state_dict,
     set_optimizer_state_dict,
 )
-from torch.distributed.fsdp import (
-    fully_shard,
-)
+
+try:
+    from torch.distributed.fsdp import (
+        fully_shard,
+    )
+except ImportError:
+    fully_shard = None
 from torch.distributed.optim import (
     ZeroRedundancyOptimizer,
 )
@@ -374,6 +379,14 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
                 else False,
                 preset_observed_type=model_params.get("info", {}).get("observed_type"),
             )
+            # For XAS loss: compute per-(absorbing_type, edge) reference energies
+            # from training data and store as a registered buffer in the loss module.
+            if (
+                not resuming
+                and self.rank == 0
+                and hasattr(self.loss, "compute_output_stats")
+            ):
+                self.loss.compute_output_stats(self.get_sample_func(), model=self.model)
             # Persist observed_type from stat into model_params and model_def_script
             if not resuming and self.rank == 0:
                 observed = self.model.atomic_model.observed_type
@@ -1752,6 +1765,12 @@ def get_loss(
         loss_params["var_name"] = var_name
         loss_params["intensive"] = intensive
         return PropertyLoss(**loss_params)
+    elif loss_type == "xas":
+        loss_params["task_dim"] = _model.get_task_dim()
+        loss_params["var_name"] = _model.get_var_name()
+        loss_params["ntypes"] = _ntypes
+        loss_params["nfparam"] = _model.get_fitting_net().numb_fparam
+        return XASLoss(**loss_params)
     else:
         loss_params["starter_learning_rate"] = start_lr
         return TaskLoss.get_class_by_type(loss_type).get_loss(loss_params)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index b12bc7ef6f..674f565ee8 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -3465,6 +3465,22 @@ def loss_dos() -> list[Argument]:
     ]
 
 
+@loss_args_plugin.register("xas", doc=doc_only_pt_supported)
+def loss_xas() -> list[Argument]:
+    doc_loss_func = (
+        "The loss function to minimize: 'smooth_mae' (default), 'mae', 'mse', 'rmse'."
+    )
+    doc_metric = "Metrics to display during training. Supported: 'mae', 'rmse'."
+    doc_beta = "Beta parameter for smooth_l1 loss."
+    return [
+        Argument(
+            "loss_func", str, optional=True, default="smooth_mae", doc=doc_loss_func
+        ),
+        Argument("metric", list, optional=True, default=["mae"], doc=doc_metric),
+        Argument("beta", float, optional=True, default=1.0, doc=doc_beta),
+    ]
+
+
 @loss_args_plugin.register("property")
 def loss_property() -> list[Argument]:
     doc_loss_func = "The loss function to minimize, such as 'mae','smooth_mae'."
diff --git a/examples/xas/train/README.md b/examples/xas/train/README.md
new file mode 100644
index 0000000000..a4a96c757a
--- /dev/null
+++ b/examples/xas/train/README.md
@@ -0,0 +1,108 @@
+# XAS Spectrum Fitting with DeePMD-kit
+
+This example shows how to train a model to predict X-ray absorption spectra (XAS)
+from atomic structure using DeePMD-kit's `property` fitting net.
+
+## Concept
+
+- The model predicts a 102-dimensional output per atom: `[E_min, E_max, I_0, …, I_99]`
+- During training, per-atom outputs are averaged over atoms of the **absorbing element**
+  (identified by `sel_type.npy` in each training system)
+- The edge type (K, L1, L2, …) is provided as a frame-level parameter `fparam`
+- One training system per `(element, edge)` pair
+
+## Quick Start
+
+**1. Generate example training data**
+
+```bash
+python gen_data.py
+```
+
+This creates `data/Fe_K/` and `data/O_K/` with 50 frames each.
+
+**2. Train the model**
+
+```bash
+dp train input.json
+```
+
+**3. Freeze the model**
+
+```bash
+dp freeze -o model.pb
+```
+
+**4. Test the model**
+
+```bash
+dp test -m model.pb -s data/Fe_K -n 10
+dp test -m model.pb -s data/O_K  -n 10
+```
+
+`dp test` automatically detects `sel_type.npy` and applies element-wise averaging
+before computing the error metrics.
+
+## Data Format
+
+Each system directory must contain:
+
+```
+data/Fe_K/
+├── type.raw          # atom type indices, one per line (int)
+├── type_map.raw      # element symbols, one per line
+└── set.000/
+    ├── coord.npy     # [nframes, natoms*3]   Cartesian coordinates (Å)
+    ├── box.npy       # [nframes, 9]          cell vectors (Å), row-major
+    ├── fparam.npy    # [nframes, nfparam]    edge one-hot encoding
+    ├── sel_type.npy  # [nframes, 1]          absorbing element type index (float64)
+    └── xas.npy       # [nframes, 102]        XAS label: [E_min, E_max, I_0..I_99]
+```
+
+### `sel_type.npy`
+
+The type index of the absorbing element, stored as float64, constant per system.
+
+```
+Fe is type 0  →  sel_type.npy filled with 0.0
+O  is type 1  →  sel_type.npy filled with 1.0
+```
+
+### `xas.npy` label layout (`task_dim = 102`)
+
+| Column      | Meaning                                                            |
+| ----------- | ------------------------------------------------------------------ |
+| `xas[i,0]`  | `E_min` (eV) — lower bound of energy grid                          |
+| `xas[i,1]`  | `E_max` (eV) — upper bound of energy grid                          |
+| `xas[i,2:]` | `I[0..99]` — 100 intensity values on `linspace(E_min, E_max, 100)` |
+
+### `fparam.npy` edge encoding (`nfparam = 3`)
+
+| Edge | Encoding  |
+| ---- | --------- |
+| K    | `[1,0,0]` |
+| L1   | `[0,1,0]` |
+| L2   | `[0,0,1]` |
+
+Extend with more entries for additional edges and set `numb_fparam` accordingly.
+
+## Input Parameters
+
+Key fields in `input.json`:
+
+| Parameter                 | Description                                                   |
+| ------------------------- | ------------------------------------------------------------- |
+| `fitting_net.type`        | Must be `"property"`                                          |
+| `fitting_net.task_dim`    | `102` (2 energy bounds + 100 intensities)                     |
+| `fitting_net.intensive`   | `true` — per-atom outputs are **averaged**, not summed        |
+| `fitting_net.numb_fparam` | Number of edge-type features (3 for K/L1/L2)                  |
+| `loss.type`               | `"xas"` — uses `sel_type.npy` for element-selective averaging |
+| `loss.loss_func`          | `"smooth_mae"` (recommended) or `"mse"`                       |
+
+## Extending to More Elements / Edges
+
+- Add a new system directory per `(element, edge)` pair
+- Set `sel_type.npy` to the type index of the absorbing element in that system
+- Set `fparam.npy` to the one-hot vector for the corresponding edge
+- List all system paths under `training.training_data.systems`
+- Increase `numb_fparam` if adding new edge types
diff --git a/examples/xas/train/gen_data.py b/examples/xas/train/gen_data.py
new file mode 100644
index 0000000000..82c81319b8
--- /dev/null
+++ b/examples/xas/train/gen_data.py
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Generate example XAS training data for a Fe-O system.
+
+This script shows the required data format for XAS spectrum fitting.
+
+Data layout
+-----------
+One training system per (element, edge) pair:
+
+  data/Fe_K/          — Fe K-edge XAS
+  data/O_K/           — O  K-edge XAS
+
+Each system directory contains:
+
+  type.raw            — atom type indices (int, one per line)
+  type_map.raw        — element symbols, one per line
+  set.000/
+    coord.npy         — [nframes, natoms*3]  Cartesian coordinates (Å)
+    box.npy           — [nframes, 9]         cell vectors (Å), row-major
+    fparam.npy        — [nframes, nfparam]   edge encoding (one-hot or continuous)
+    sel_type.npy      — [nframes, 1]         type index of absorbing element (float)
+    xas.npy           — [nframes, task_dim]  XAS label: [E_min, E_max, I_0..I_99]
+
+Label format (task_dim = 102)
+------------------------------
+  xas[i, 0]    = E_min  (eV) — lower bound of the energy grid for frame i
+  xas[i, 1]    = E_max  (eV) — upper bound of the energy grid for frame i
+  xas[i, 2:]   = I      (arb. units) — 100 equally-spaced intensity values
+                  on the grid linspace(E_min, E_max, 100)
+
+fparam encoding (nfparam = 3 for K/L1/L2 edges)
+-------------------------------------------------
+  K-edge  → [1, 0, 0]
+  L1-edge → [0, 1, 0]
+  L2-edge → [0, 0, 1]
+  (extend as needed; use numb_fparam in input.json accordingly)
+
+sel_type.npy
+------------
+  Integer type index of the absorbing element, stored as float64.
+  All frames in a system must share the same value (it is constant per system).
+  Example: Fe is type 0 → sel_type.npy filled with 0.0
+           O  is type 1 → sel_type.npy filled with 1.0
+"""
+
+import os
+
+import numpy as np
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+nframes = 50  # number of frames per system
+numb_pts = 100  # energy grid points
+task_dim = numb_pts + 2  # E_min + E_max + 100 intensities
+nfparam = 3  # K / L1 / L2 one-hot
+natoms = 8  # 4 Fe (type 0) + 4 O (type 1)
+box_size = 4.0  # Å
+
+rng = np.random.default_rng(42)
+
+# Equilibrium positions: simple rock-salt-like arrangement
+base_pos = np.array(
+    [
+        [0.0, 0.0, 0.0],
+        [2.0, 2.0, 0.0],
+        [2.0, 0.0, 2.0],
+        [0.0, 2.0, 2.0],  # Fe
+        [1.0, 1.0, 1.0],
+        [3.0, 3.0, 1.0],
+        [3.0, 1.0, 3.0],
+        [1.0, 3.0, 3.0],  # O
+    ]
+)
+
+coords = base_pos[None] + rng.normal(0, 0.1, (nframes, natoms, 3))
+box = np.tile(np.diag([box_size] * 3).reshape(9), (nframes, 1))
+
+type_arr = np.array([0, 0, 0, 0, 1, 1, 1, 1], dtype=int)  # Fe Fe Fe Fe O O O O
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def gaussian_spectrum(peak_eV, e_min, e_max, npts=100, width_frac=0.10):
+    grid = np.linspace(e_min, e_max, npts)
+    width = (e_max - e_min) * width_frac
+    return np.exp(-0.5 * ((grid - peak_eV) / width) ** 2)
+
+
+def write_system(
+    path: str,
+    sel_type_idx: int,
+    atom_slice,  # slice object selecting absorbing atoms
+    e_min: float,
+    e_max: float,
+    peak_center: float,
+    peak_shift_scale: float,
+    fparam_vec,  # 1-D array of length nfparam (one-hot edge encoding)
+):
+    os.makedirs(f"{path}/set.000", exist_ok=True)
+
+    # --- structure ---
+    np.savetxt(f"{path}/type.raw", type_arr, fmt="%d")
+    with open(f"{path}/type_map.raw", "w") as f:
+        f.write("Fe\nO\n")
+    np.save(f"{path}/set.000/box.npy", box.astype(np.float64))
+    np.save(
+        f"{path}/set.000/coord.npy",
+        coords.reshape(nframes, natoms * 3).astype(np.float64),
+    )
+
+    # --- fparam: same edge for all frames ---
+    fparam = np.tile(fparam_vec, (nframes, 1)).astype(np.float64)
+    np.save(f"{path}/set.000/fparam.npy", fparam)
+
+    # --- sel_type: constant per system ---
+    sel = np.full((nframes, 1), float(sel_type_idx), dtype=np.float64)
+    np.save(f"{path}/set.000/sel_type.npy", sel)
+
+    # --- xas labels ---
+    labels = np.zeros((nframes, task_dim), dtype=np.float64)
+    for i in range(nframes):
+        # peak position shifts slightly with mean x-coordinate of absorbing atoms
+        mean_x = coords[i, atom_slice, 0].mean()
+        peak = peak_center + mean_x * peak_shift_scale
+        spectrum = gaussian_spectrum(peak, e_min, e_max)
+        labels[i, 0] = e_min
+        labels[i, 1] = e_max
+        labels[i, 2:] = spectrum
+    np.save(f"{path}/set.000/xas.npy", labels)
+
+    print(f"  {path}:")
+    print(f"    sel_type = {sel_type_idx}  fparam = {fparam_vec.tolist()}")
+    print(f"    xas.npy  shape = {labels.shape}")
+
+
+# ---------------------------------------------------------------------------
+# Generate Fe K-edge and O K-edge systems
+# ---------------------------------------------------------------------------
+print("Generating example XAS training data...")
+
+write_system(
+    path="data/Fe_K",
+    sel_type_idx=0,  # Fe is type 0
+    atom_slice=slice(0, 4),  # first 4 atoms are Fe
+    e_min=7100.0,  # Fe K-edge region (eV)
+    e_max=7250.0,
+    peak_center=7112.0,  # Fe K-edge energy
+    peak_shift_scale=2.0,  # chemical shift ∝ local environment
+    fparam_vec=np.array([1.0, 0.0, 0.0]),  # K-edge one-hot
+)
+
+write_system(
+    path="data/O_K",
+    sel_type_idx=1,  # O is type 1
+    atom_slice=slice(4, 8),  # last 4 atoms are O
+    e_min=525.0,  # O K-edge region (eV)
+    e_max=560.0,
+    peak_center=535.0,  # O K-edge energy
+    peak_shift_scale=0.5,
+    fparam_vec=np.array([1.0, 0.0, 0.0]),  # also K-edge
+)
+
+print(f"\nDone. {nframes} frames per system, task_dim={task_dim}, nfparam={nfparam}")
+print("Data written to ./data/Fe_K/ and ./data/O_K/")
diff --git a/examples/xas/train/input.json b/examples/xas/train/input.json
new file mode 100644
index 0000000000..f58417b478
--- /dev/null
+++ b/examples/xas/train/input.json
@@ -0,0 +1,77 @@
+{
+  "_comment": "XAS spectrum fitting example — Fe-O system, Fe K-edge + O K-edge",
+
+  "model": {
+    "type_map": [
+      "Fe",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "rcut": 6.0,
+      "rcut_smth": 0.5,
+      "sel": [
+        40,
+        40
+      ],
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
+    },
+    "fitting_net": {
+      "_comment": "property fitting with task_dim=102: [E_min, E_max, I_0, ..., I_99]",
+      "type": "property",
+      "property_name": "xas",
+      "task_dim": 102,
+      "_comment_intensive": "intensive=true: per-atom outputs are averaged (not summed)",
+      "intensive": true,
+      "_comment_fparam": "fparam encodes edge type: 1-hot vector, e.g. [1,0,0]=K, [0,1,0]=L1, [0,0,1]=L2",
+      "numb_fparam": 3,
+      "neuron": [
+        128,
+        128,
+        128
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
+  "loss": {
+    "_comment": "xas loss: reads sel_type.npy to select which element to reduce over",
+    "type": "xas",
+    "loss_func": "smooth_mae",
+    "metric": [
+      "mae",
+      "rmse"
+    ]
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 1e-8,
+    "decay_rate": 0.95
+  },
+  "training": {
+    "training_data": {
+      "_comment": "one system per (element, edge) pair",
+      "systems": [
+        "./data/Fe_K/",
+        "./data/O_K/"
+      ],
+      "batch_size": "auto"
+    },
+    "numb_steps": 200000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1000,
+    "save_freq": 10000,
+    "save_ckpt": "model.ckpt",
+    "stat_file": "stat_files"
+  }
+}