From 824cb301a31059b4fa62b852cf1cc9915792dc88 Mon Sep 17 00:00:00 2001
From: jlarson4 <jonahalarson@comcast.net>
Date: Mon, 13 Apr 2026 16:08:49 -0500
Subject: [PATCH] Initial XGLM Adapter setup

---
 .../test_xglm_adapter.py                      | 287 ++++++++++++++++
 .../factories/architecture_adapter_factory.py |   2 +
 .../supported_architectures/__init__.py       |   4 +
 .../supported_architectures/xglm.py           | 135 ++++++++
 .../model_registry/data/supported_models.json | 312 +++++++++++++++++-
 .../data/verification_history.json            |  92 +++++-
 6 files changed, 828 insertions(+), 4 deletions(-)
 create mode 100644 tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py
 create mode 100644 transformer_lens/model_bridge/supported_architectures/xglm.py

diff --git a/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py b/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py
new file mode 100644
index 000000000..73b68dbb4
--- /dev/null
+++ b/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py
@@ -0,0 +1,287 @@
+"""Unit tests for XGLMArchitectureAdapter.
+
+Tests cover:
+- Config attribute validation (all required attributes set correctly) [Phase A]
+- Weight conversion keys and structure [Phase A]
+- Component mapping structure (correct bridge types and HF module paths) [Phase B]
+- Embedding scale hook compatibility [Phase C]
+- Factory registration (XGLMForCausalLM maps to the right adapter) [Phase D]
+"""
+
+import math
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+from transformer_lens.config import TransformerBridgeConfig
+from transformer_lens.model_bridge.generalized_components import (
+    AttentionBridge,
+    BlockBridge,
+    EmbeddingBridge,
+    NormalizationBridge,
+    SymbolicBridge,
+    UnembeddingBridge,
+)
+from transformer_lens.model_bridge.supported_architectures.xglm import (
+    XGLMArchitectureAdapter,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+def _make_cfg(
+    n_heads: int = 4,
+    d_model: int = 64,
+    n_layers: int = 2,
+    d_mlp: int = 256,
+    d_vocab: int = 1000,
+    n_ctx: int = 512,
+) -> TransformerBridgeConfig:
+    """Return a minimal TransformerBridgeConfig for XGLM adapter tests."""
+    return TransformerBridgeConfig(
+        d_model=d_model,
+        d_head=d_model // n_heads,
+        n_layers=n_layers,
+        n_ctx=n_ctx,
+        n_heads=n_heads,
+        d_vocab=d_vocab,
+        d_mlp=d_mlp,
+        default_prepend_bos=True,
+        architecture="XGLMForCausalLM",
+    )
+
+
+@pytest.fixture
+def cfg() -> TransformerBridgeConfig:
+    return _make_cfg()
+
+
+@pytest.fixture
+def adapter(cfg: TransformerBridgeConfig) -> XGLMArchitectureAdapter:
+    return XGLMArchitectureAdapter(cfg)
+
+
+# ---------------------------------------------------------------------------
+# Phase A: Config attribute tests
+# ---------------------------------------------------------------------------
+
+
+class TestXGLMAdapterConfig:
+    """Adapter must set all required config attributes to the correct values."""
+
+    def test_normalization_type_is_ln(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.normalization_type == "LN"
+
+    def test_positional_embedding_type_is_standard(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.positional_embedding_type == "standard"
+
+    def test_final_rms_is_false(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.final_rms is False
+
+    def test_gated_mlp_is_false(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.gated_mlp is False
+
+    def test_attn_only_is_false(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.attn_only is False
+
+    def test_uses_rms_norm_is_false(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.cfg.uses_rms_norm is False
+
+
+# ---------------------------------------------------------------------------
+# Phase A: Weight processing conversion tests
+# ---------------------------------------------------------------------------
+
+
+class TestXGLMAdapterWeightConversions:
+    """Adapter must define exactly the four standard QKVO weight conversions."""
+
+    def test_q_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert "blocks.{i}.attn.q.weight" in adapter.weight_processing_conversions
+
+    def test_k_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert "blocks.{i}.attn.k.weight" in adapter.weight_processing_conversions
+
+    def test_v_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert "blocks.{i}.attn.v.weight" in adapter.weight_processing_conversions
+
+    def test_o_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert "blocks.{i}.attn.o.weight" in adapter.weight_processing_conversions
+
+    def test_exactly_four_conversion_keys(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert len(adapter.weight_processing_conversions) == 4
+
+
+# ---------------------------------------------------------------------------
+# Phase B: Component mapping structure tests
+# ---------------------------------------------------------------------------
+
+
+class TestXGLMAdapterComponentMapping:
+    """Component mapping must have the correct bridge types and HF module paths."""
+
+    def test_embed_is_embedding_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert isinstance(adapter.component_mapping["embed"], EmbeddingBridge)
+
+    def test_embed_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.component_mapping["embed"].name == "model.embed_tokens"
+
+    def test_no_pos_embed_in_mapping(self, adapter: XGLMArchitectureAdapter) -> None:
+        # Sinusoidal embeddings have no weights — no bridge entry expected
+        assert "pos_embed" not in adapter.component_mapping
+
+    def test_blocks_is_block_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert isinstance(adapter.component_mapping["blocks"], BlockBridge)
+
+    def test_blocks_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.component_mapping["blocks"].name == "model.layers"
+
+    def test_ln_final_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert isinstance(adapter.component_mapping["ln_final"], NormalizationBridge)
+
+    def test_ln_final_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.component_mapping["ln_final"].name == "model.layer_norm"
+
+    def test_unembed_is_unembedding_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert isinstance(adapter.component_mapping["unembed"], UnembeddingBridge)
+
+    def test_unembed_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        assert adapter.component_mapping["unembed"].name == "lm_head"
+
+    def test_ln1_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert isinstance(blocks.submodules["ln1"], NormalizationBridge)
+
+    def test_ln1_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert blocks.submodules["ln1"].name == "self_attn_layer_norm"
+
+    def test_attn_is_attention_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert isinstance(blocks.submodules["attn"], AttentionBridge)
+
+    def test_attn_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert blocks.submodules["attn"].name == "self_attn"
+
+    def test_attn_requires_attention_mask(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert blocks.submodules["attn"].requires_attention_mask is True
+
+    def test_attn_attention_mask_4d(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert blocks.submodules["attn"].attention_mask_4d is True
+
+    def test_attn_q_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        attn = adapter.component_mapping["blocks"].submodules["attn"]
+        assert attn.submodules["q"].name == "q_proj"
+
+    def test_attn_k_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        attn = adapter.component_mapping["blocks"].submodules["attn"]
+        assert attn.submodules["k"].name == "k_proj"
+
+    def test_attn_v_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        attn = adapter.component_mapping["blocks"].submodules["attn"]
+        assert attn.submodules["v"].name == "v_proj"
+
+    def test_attn_o_name_is_out_proj(self, adapter: XGLMArchitectureAdapter) -> None:
+        # Critical: XGLM uses out_proj, not o_proj (scaffold error pattern)
+        attn = adapter.component_mapping["blocks"].submodules["attn"]
+        assert attn.submodules["o"].name == "out_proj"
+
+    def test_ln2_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert isinstance(blocks.submodules["ln2"], NormalizationBridge)
+
+    def test_ln2_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert blocks.submodules["ln2"].name == "final_layer_norm"
+
+    def test_mlp_is_symbolic_bridge(self, adapter: XGLMArchitectureAdapter) -> None:
+        blocks = adapter.component_mapping["blocks"]
+        assert isinstance(blocks.submodules["mlp"], SymbolicBridge)
+
+    def test_mlp_in_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        mlp = adapter.component_mapping["blocks"].submodules["mlp"]
+        assert mlp.submodules["in"].name == "fc1"
+
+    def test_mlp_out_name(self, adapter: XGLMArchitectureAdapter) -> None:
+        mlp = adapter.component_mapping["blocks"].submodules["mlp"]
+        assert mlp.submodules["out"].name == "fc2"
+
+
+# ---------------------------------------------------------------------------
+# Phase C: Embedding scale hook compatibility tests
+# ---------------------------------------------------------------------------
+
+
+def _make_mock_bridge() -> SimpleNamespace:
+    """Return a minimal mock bridge with embed.hook_out for hook-compat tests."""
+    hook_out = SimpleNamespace(hook_conversion=None)
+    embed = SimpleNamespace(hook_out=hook_out)
+    return SimpleNamespace(embed=embed)
+
+
+class TestXGLMAdapterHookCompatibility:
+    """setup_hook_compatibility must attach a scale conversion to hook_embed."""
+
+    def test_sets_hook_conversion_on_embed_hook_out(self, adapter: XGLMArchitectureAdapter) -> None:
+        bridge = _make_mock_bridge()
+        adapter.setup_hook_compatibility(bridge)
+        assert bridge.embed.hook_out.hook_conversion is not None
+
+    def test_scales_by_sqrt_d_model(self, adapter: XGLMArchitectureAdapter) -> None:
+        # d_model=64, sqrt(64)=8 exactly
+        bridge = _make_mock_bridge()
+        adapter.setup_hook_compatibility(bridge)
+        conv = bridge.embed.hook_out.hook_conversion
+        x = torch.ones(2, 4, 64)
+        result = conv.handle_conversion(x)
+        expected_scale = math.sqrt(64)  # 8.0
+        assert torch.allclose(result, x * expected_scale, atol=1e-6)
+
+    def test_revert_inverts_scale(self, adapter: XGLMArchitectureAdapter) -> None:
+        # round-trip: revert(handle_conversion(x)) == x; exact for sqrt(64)=8
+        bridge = _make_mock_bridge()
+        adapter.setup_hook_compatibility(bridge)
+        conv = bridge.embed.hook_out.hook_conversion
+        x = torch.randn(2, 4, 64)
+        assert torch.allclose(conv.revert(conv.handle_conversion(x)), x, atol=1e-6)
+
+    def test_no_error_when_embed_missing(self, adapter: XGLMArchitectureAdapter) -> None:
+        # Guard: if bridge lacks embed, setup_hook_compatibility should not raise
+        bridge = SimpleNamespace()  # no embed attribute
+        adapter.setup_hook_compatibility(bridge)  # must not raise
+
+    def test_no_error_when_hook_out_missing(self, adapter: XGLMArchitectureAdapter) -> None:
+        # Guard: if embed lacks hook_out, no error expected
+        bridge = SimpleNamespace(embed=SimpleNamespace())  # embed but no hook_out
+        adapter.setup_hook_compatibility(bridge)  # must not raise
+
+
+# ---------------------------------------------------------------------------
+# Phase D: Factory registration tests
+# ---------------------------------------------------------------------------
+
+
+class TestXGLMFactoryRegistration:
+    """XGLMForCausalLM must be registered in SUPPORTED_ARCHITECTURES and resolve correctly."""
+
+    def test_factory_returns_xglm_adapter(self) -> None:
+        from transformer_lens.factories.architecture_adapter_factory import (
+            ArchitectureAdapterFactory,
+        )
+
+        cfg = _make_cfg()
+        adapter = ArchitectureAdapterFactory.select_architecture_adapter(cfg)
+        assert isinstance(adapter, XGLMArchitectureAdapter)
+
+    def test_factory_key_is_xglm_for_causal_lm(self) -> None:
+        from transformer_lens.factories.architecture_adapter_factory import (
+            SUPPORTED_ARCHITECTURES,
+        )
+
+        assert "XGLMForCausalLM" in SUPPORTED_ARCHITECTURES
diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py
index 1c6462cad..16f94761d 100644
--- a/transformer_lens/factories/architecture_adapter_factory.py
+++ b/transformer_lens/factories/architecture_adapter_factory.py
@@ -47,6 +47,7 @@
     QwenArchitectureAdapter,
     StableLmArchitectureAdapter,
     T5ArchitectureAdapter,
+    XGLMArchitectureAdapter,
 )
 
 # Export supported architectures
@@ -92,6 +93,7 @@
     "Qwen3ForCausalLM": Qwen3ArchitectureAdapter,
     "StableLmForCausalLM": StableLmArchitectureAdapter,
     "T5ForConditionalGeneration": T5ArchitectureAdapter,
+    "XGLMForCausalLM": XGLMArchitectureAdapter,
     "NanoGPTForCausalLM": NanogptArchitectureAdapter,
     "MinGPTForCausalLM": MingptArchitectureAdapter,
     "GPTNeoForCausalLM": NeoArchitectureAdapter,
diff --git a/transformer_lens/model_bridge/supported_architectures/__init__.py b/transformer_lens/model_bridge/supported_architectures/__init__.py
index 1b24f3741..1d7033c73 100644
--- a/transformer_lens/model_bridge/supported_architectures/__init__.py
+++ b/transformer_lens/model_bridge/supported_architectures/__init__.py
@@ -129,6 +129,9 @@
 from transformer_lens.model_bridge.supported_architectures.t5 import (
     T5ArchitectureAdapter,
 )
+from transformer_lens.model_bridge.supported_architectures.xglm import (
+    XGLMArchitectureAdapter,
+)
 
 __all__ = [
     "ApertusArchitectureAdapter",
@@ -173,4 +176,5 @@
     "Qwen3ArchitectureAdapter",
     "StableLmArchitectureAdapter",
     "T5ArchitectureAdapter",
+    "XGLMArchitectureAdapter",
 ]
diff --git a/transformer_lens/model_bridge/supported_architectures/xglm.py b/transformer_lens/model_bridge/supported_architectures/xglm.py
new file mode 100644
index 000000000..c2f839b55
--- /dev/null
+++ b/transformer_lens/model_bridge/supported_architectures/xglm.py
@@ -0,0 +1,135 @@
+"""XGLM architecture adapter.
+
+Supports XGLMForCausalLM (facebook/xglm-*).
+Assumes add_cross_attention=False (all published XGLM checkpoints).
+"""
+
+from typing import Any
+
+from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter
+from transformer_lens.model_bridge.generalized_components import (
+    AttentionBridge,
+    BlockBridge,
+    EmbeddingBridge,
+    LinearBridge,
+    NormalizationBridge,
+    SymbolicBridge,
+    UnembeddingBridge,
+)
+
+
+class XGLMArchitectureAdapter(ArchitectureAdapter):
+    """Architecture adapter for XGLM models.
+
+    XGLM uses pre-norm LayerNorm, sinusoidal positional embeddings (no
+    learnable weights), standard MHA with separate q/k/v/out_proj, and a
+    2-layer MLP (fc1/fc2) that lives directly on the decoder block rather
+    than inside an mlp sub-module.
+
+    All attention projections and fc1/fc2 carry biases. lm_head has no bias.
+    Embeddings are scaled by sqrt(d_model) at runtime in XGLMScaledWordEmbedding.
+
+    Optional Parameters (may not exist in state_dict):
+    --------------------------------------------------
+    None — all published XGLM checkpoints include all parameters listed above.
+    """
+
+    def __init__(self, cfg: Any) -> None:
+        """Initialize the XGLM architecture adapter."""
+        super().__init__(cfg)
+
+        # LayerNorm throughout (not RMSNorm)
+        self.cfg.normalization_type = "LN"
+        # Sinusoidal positional embeddings — added to token embeddings before blocks,
+        # no learnable weights, no RoPE
+        self.cfg.positional_embedding_type = "standard"
+        self.cfg.final_rms = False
+        # Standard 2-layer MLP (fc1 -> gelu -> fc2), no gate projection
+        self.cfg.gated_mlp = False
+        self.cfg.attn_only = False
+        self.cfg.uses_rms_norm = False
+
+        # Sinusoidal positional embeddings have no weights in the state_dict, so
+        # center_writing_weights cannot center pos_embed.  Disable it for XGLM.
+        self.supports_center_writing_weights = False
+
+        # Standard MHA: n_heads == n_kv_heads for all XGLM sizes
+        self.weight_processing_conversions = {
+            **self._qkvo_weight_conversions(),
+        }
+
+        self.component_mapping = {
+            "embed": EmbeddingBridge(name="model.embed_tokens"),
+            # No "pos_embed": sinusoidal embeddings are a non-persistent buffer with
+            # no learnable weights — embed_positions does not appear in state_dict.
+            "blocks": BlockBridge(
+                name="model.layers",
+                submodules={
+                    "ln1": NormalizationBridge(
+                        name="self_attn_layer_norm",  # pre-attn norm on XGLMDecoderLayer
+                        config=self.cfg,
+                        use_native_layernorm_autograd=True,
+                    ),
+                    "attn": AttentionBridge(
+                        name="self_attn",
+                        config=self.cfg,
+                        requires_attention_mask=True,
+                        attention_mask_4d=True,  # (batch, 1, tgt_len, src_len)
+                        submodules={
+                            "q": LinearBridge(name="q_proj"),
+                            "k": LinearBridge(name="k_proj"),
+                            "v": LinearBridge(name="v_proj"),
+                            "o": LinearBridge(name="out_proj"),  # out_proj, not o_proj
+                        },
+                    ),
+                    "ln2": NormalizationBridge(
+                        name="final_layer_norm",  # pre-MLP norm on XGLMDecoderLayer
+                        config=self.cfg,
+                        use_native_layernorm_autograd=True,
+                    ),
+                    # fc1/fc2 live directly on XGLMDecoderLayer — no "mlp" container.
+                    # SymbolicBridge preserves TL structure without a real HF submodule.
+                    "mlp": SymbolicBridge(
+                        submodules={
+                            "in": LinearBridge(name="fc1"),
+                            "out": LinearBridge(name="fc2"),
+                        },
+                    ),
+                },
+            ),
+            "ln_final": NormalizationBridge(
+                name="model.layer_norm",  # note: layer_norm, not norm
+                config=self.cfg,
+                use_native_layernorm_autograd=True,
+            ),
+            "unembed": UnembeddingBridge(name="lm_head"),
+        }
+
+    def setup_hook_compatibility(self, bridge: Any) -> None:
+        """Scale hook_embed by sqrt(d_model) to match XGLMScaledWordEmbedding.forward().
+
+        XGLMScaledWordEmbedding multiplies the embedding lookup by embed_scale =
+        sqrt(d_model) at runtime.  Without this override, hook_embed would capture
+        the raw (unscaled) table output, diverging from actual model activations.
+        """
+        from transformer_lens.conversion_utils.conversion_steps.base_tensor_conversion import (
+            BaseTensorConversion,
+        )
+
+        class EmbeddingScaleConversion(BaseTensorConversion):
+            """Scale embeddings by sqrt(d_model) for XGLM models."""
+
+            def __init__(self, scale: float) -> None:
+                super().__init__()
+                self.scale = scale
+
+            def handle_conversion(self, input_value: Any, *full_context: Any) -> Any:
+                return input_value * self.scale
+
+            def revert(self, input_value: Any, *full_context: Any) -> Any:
+                return input_value / self.scale
+
+        if hasattr(bridge, "embed") and hasattr(bridge.embed, "hook_out"):
+            bridge.embed.hook_out.hook_conversion = EmbeddingScaleConversion(
+                self.cfg.d_model**0.5
+            )
diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json
index 6c2ce3aff..817ff0a0d 100644
--- a/transformer_lens/tools/model_registry/data/supported_models.json
+++ b/transformer_lens/tools/model_registry/data/supported_models.json
@@ -6,9 +6,9 @@
     "min_downloads": 500,
     "scan_duration_seconds": 12.1
   },
-  "total_architectures": 36,
-  "total_models": 6686,
-  "total_verified": 690,
+  "total_architectures": 38,
+  "total_models": 6709,
+  "total_verified": 700,
   "models": [
     {
       "architecture_id": "Qwen3ForCausalLM",
@@ -93600,6 +93600,312 @@
       "phase4_score": 67.5,
       "phase7_score": null,
       "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/xglm-564M",
+      "status": 1,
+      "verified_date": "2026-04-10",
+      "metadata": {
+        "downloads": 168586,
+        "total_params": null
+      },
+      "note": "Full verification completed",
+      "phase1_score": 100.0,
+      "phase2_score": 100.0,
+      "phase3_score": 100.0,
+      "phase4_score": 91.8,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/incoder-1B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 12731,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/xglm-7.5B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 3589,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/xglm-4.5B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 3079,
+        "total_params": 5076815872
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/xglm-1.7B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 3061,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-2.7B",
+      "status": 1,
+      "verified_date": "2026-04-10",
+      "metadata": {
+        "downloads": 2907,
+        "total_params": 2774988800
+      },
+      "note": "Full verification completed",
+      "phase1_score": 100.0,
+      "phase2_score": 100.0,
+      "phase3_score": 100.0,
+      "phase4_score": 100.0,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-125M",
+      "status": 1,
+      "verified_date": "2026-04-10",
+      "metadata": {
+        "downloads": 2896,
+        "total_params": 162256896
+      },
+      "note": "Full verification completed with issues: P2=91.7% (failed: generation)",
+      "phase1_score": 100.0,
+      "phase2_score": 91.7,
+      "phase3_score": 100.0,
+      "phase4_score": 100.0,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-355M",
+      "status": 1,
+      "verified_date": "2026-04-10",
+      "metadata": {
+        "downloads": 2874,
+        "total_params": 405245952
+      },
+      "note": "Full verification completed with issues: P2=91.7% (failed: generation)",
+      "phase1_score": 100.0,
+      "phase2_score": 91.7,
+      "phase3_score": 100.0,
+      "phase4_score": 93.1,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-13B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2866,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-1.3B",
+      "status": 1,
+      "verified_date": "2026-04-10",
+      "metadata": {
+        "downloads": 2864,
+        "total_params": 1414471680
+      },
+      "note": "Full verification completed",
+      "phase1_score": 100.0,
+      "phase2_score": 100.0,
+      "phase3_score": 100.0,
+      "phase4_score": 99.2,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-6.7B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2862,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/xglm-2.9B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2352,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "facebook/incoder-6B",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2225,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-6.7B-Shinen",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2210,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "pythainlp/wangchanglm-7.5B-sft-en-sharded",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2128,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "KoboldAI/fairseq-dense-13B-Shinen",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2123,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "pythainlp/wangchanglm-7.5B-sft-enth",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 2102,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
+    },
+    {
+      "architecture_id": "XGLMForCausalLM",
+      "model_id": "hf-tiny-model-private/tiny-random-XGLMForCausalLM",
+      "status": 0,
+      "verified_date": null,
+      "metadata": {
+        "downloads": 1969,
+        "total_params": null
+      },
+      "note": null,
+      "phase1_score": null,
+      "phase2_score": null,
+      "phase3_score": null,
+      "phase4_score": null,
+      "phase7_score": null,
+      "phase8_score": null
     }
   ]
 }
diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json
index 9eb2e7648..62c44ab67 100644
--- a/transformer_lens/tools/model_registry/data/verification_history.json
+++ b/transformer_lens/tools/model_registry/data/verification_history.json
@@ -1,5 +1,5 @@
 {
-  "last_updated": "2026-04-09T16:34:36.818082",
+  "last_updated": "2026-04-10T19:18:41.322658",
   "records": [
     {
       "model_id": "Macropodus/macbert4mdcspell_v1",
@@ -11200,6 +11200,96 @@
       "notes": "Full verification completed",
       "invalidated": false,
       "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-125M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed with issues: P3=80.0% (failed: process_bridge_weights, layer_norm_folding, unembed_centering, value_bias_folding)",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-125M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed with issues: P2=91.7% (failed: generation)",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-355M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed with issues: P2=91.7% (failed: generation)",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-1.3B",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-2.7B",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "KoboldAI/fairseq-dense-2.7B",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "facebook/xglm-564M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "facebook/xglm-564M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
+    },
+    {
+      "model_id": "facebook/xglm-564M",
+      "architecture_id": "XGLMForCausalLM",
+      "verified_date": "2026-04-10",
+      "verified_by": "verify_models",
+      "transformerlens_version": null,
+      "notes": "Full verification completed",
+      "invalidated": false,
+      "invalidation_reason": null
     }
   ]
 }