From 824cb301a31059b4fa62b852cf1cc9915792dc88 Mon Sep 17 00:00:00 2001 From: jlarson4 Date: Mon, 13 Apr 2026 16:08:49 -0500 Subject: [PATCH] Initial XGLM Adapter setup --- .../test_xglm_adapter.py | 287 ++++++++++++++++ .../factories/architecture_adapter_factory.py | 2 + .../supported_architectures/__init__.py | 4 + .../supported_architectures/xglm.py | 135 ++++++++ .../model_registry/data/supported_models.json | 312 +++++++++++++++++- .../data/verification_history.json | 92 +++++- 6 files changed, 828 insertions(+), 4 deletions(-) create mode 100644 tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py create mode 100644 transformer_lens/model_bridge/supported_architectures/xglm.py diff --git a/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py b/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py new file mode 100644 index 000000000..73b68dbb4 --- /dev/null +++ b/tests/unit/model_bridge/supported_architectures/test_xglm_adapter.py @@ -0,0 +1,287 @@ +"""Unit tests for XGLMArchitectureAdapter. + +Tests cover: +- Config attribute validation (all required attributes set correctly) [Phase A] +- Weight conversion keys and structure [Phase A] +- Component mapping structure (correct bridge types and HF module paths) [Phase B] +- Embedding scale hook compatibility [Phase C] +- Factory registration (XGLMForCausalLM maps to the right adapter) [Phase D] +""" + +import math +from types import SimpleNamespace + +import pytest +import torch + +from transformer_lens.config import TransformerBridgeConfig +from transformer_lens.model_bridge.generalized_components import ( + AttentionBridge, + BlockBridge, + EmbeddingBridge, + NormalizationBridge, + SymbolicBridge, + UnembeddingBridge, +) +from transformer_lens.model_bridge.supported_architectures.xglm import ( + XGLMArchitectureAdapter, +) + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _make_cfg( + n_heads: int = 4, + d_model: int = 64, + n_layers: int = 2, + d_mlp: int = 256, + d_vocab: int = 1000, + n_ctx: int = 512, +) -> TransformerBridgeConfig: + """Return a minimal TransformerBridgeConfig for XGLM adapter tests.""" + return TransformerBridgeConfig( + d_model=d_model, + d_head=d_model // n_heads, + n_layers=n_layers, + n_ctx=n_ctx, + n_heads=n_heads, + d_vocab=d_vocab, + d_mlp=d_mlp, + default_prepend_bos=True, + architecture="XGLMForCausalLM", + ) + + +@pytest.fixture +def cfg() -> TransformerBridgeConfig: + return _make_cfg() + + +@pytest.fixture +def adapter(cfg: TransformerBridgeConfig) -> XGLMArchitectureAdapter: + return XGLMArchitectureAdapter(cfg) + + +# --------------------------------------------------------------------------- +# Phase A: Config attribute tests +# --------------------------------------------------------------------------- + + +class TestXGLMAdapterConfig: + """Adapter must set all required config attributes to the correct values.""" + + def test_normalization_type_is_ln(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.normalization_type == "LN" + + def test_positional_embedding_type_is_standard(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.positional_embedding_type == "standard" + + def test_final_rms_is_false(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.final_rms is False + + def test_gated_mlp_is_false(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.gated_mlp is False + + def test_attn_only_is_false(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.attn_only is False + + def test_uses_rms_norm_is_false(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.cfg.uses_rms_norm is False + + +# --------------------------------------------------------------------------- +# Phase A: Weight processing conversion tests +# --------------------------------------------------------------------------- + + +class TestXGLMAdapterWeightConversions: + """Adapter must define exactly the four standard QKVO weight conversions.""" + + def test_q_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None: + assert "blocks.{i}.attn.q.weight" in adapter.weight_processing_conversions + + def test_k_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None: + assert "blocks.{i}.attn.k.weight" in adapter.weight_processing_conversions + + def test_v_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None: + assert "blocks.{i}.attn.v.weight" in adapter.weight_processing_conversions + + def test_o_weight_key_present(self, adapter: XGLMArchitectureAdapter) -> None: + assert "blocks.{i}.attn.o.weight" in adapter.weight_processing_conversions + + def test_exactly_four_conversion_keys(self, adapter: XGLMArchitectureAdapter) -> None: + assert len(adapter.weight_processing_conversions) == 4 + + +# --------------------------------------------------------------------------- +# Phase B: Component mapping structure tests +# --------------------------------------------------------------------------- + + +class TestXGLMAdapterComponentMapping: + """Component mapping must have the correct bridge types and HF module paths.""" + + def test_embed_is_embedding_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + assert isinstance(adapter.component_mapping["embed"], EmbeddingBridge) + + def test_embed_name(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.component_mapping["embed"].name == "model.embed_tokens" + + def test_no_pos_embed_in_mapping(self, adapter: XGLMArchitectureAdapter) -> None: + # Sinusoidal embeddings have no weights — no bridge entry expected + assert "pos_embed" not in adapter.component_mapping + + def test_blocks_is_block_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + assert isinstance(adapter.component_mapping["blocks"], BlockBridge) + + def test_blocks_name(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.component_mapping["blocks"].name == "model.layers" + + def test_ln_final_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + assert isinstance(adapter.component_mapping["ln_final"], NormalizationBridge) + + def test_ln_final_name(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.component_mapping["ln_final"].name == "model.layer_norm" + + def test_unembed_is_unembedding_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + assert isinstance(adapter.component_mapping["unembed"], UnembeddingBridge) + + def test_unembed_name(self, adapter: XGLMArchitectureAdapter) -> None: + assert adapter.component_mapping["unembed"].name == "lm_head" + + def test_ln1_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert isinstance(blocks.submodules["ln1"], NormalizationBridge) + + def test_ln1_name(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert blocks.submodules["ln1"].name == "self_attn_layer_norm" + + def test_attn_is_attention_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert isinstance(blocks.submodules["attn"], AttentionBridge) + + def test_attn_name(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert blocks.submodules["attn"].name == "self_attn" + + def test_attn_requires_attention_mask(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert blocks.submodules["attn"].requires_attention_mask is True + + def test_attn_attention_mask_4d(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert blocks.submodules["attn"].attention_mask_4d is True + + def test_attn_q_name(self, adapter: XGLMArchitectureAdapter) -> None: + attn = adapter.component_mapping["blocks"].submodules["attn"] + assert attn.submodules["q"].name == "q_proj" + + def test_attn_k_name(self, adapter: XGLMArchitectureAdapter) -> None: + attn = adapter.component_mapping["blocks"].submodules["attn"] + assert attn.submodules["k"].name == "k_proj" + + def test_attn_v_name(self, adapter: XGLMArchitectureAdapter) -> None: + attn = adapter.component_mapping["blocks"].submodules["attn"] + assert attn.submodules["v"].name == "v_proj" + + def test_attn_o_name_is_out_proj(self, adapter: XGLMArchitectureAdapter) -> None: + # Critical: XGLM uses out_proj, not o_proj (scaffold error pattern) + attn = adapter.component_mapping["blocks"].submodules["attn"] + assert attn.submodules["o"].name == "out_proj" + + def test_ln2_is_normalization_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert isinstance(blocks.submodules["ln2"], NormalizationBridge) + + def test_ln2_name(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert blocks.submodules["ln2"].name == "final_layer_norm" + + def test_mlp_is_symbolic_bridge(self, adapter: XGLMArchitectureAdapter) -> None: + blocks = adapter.component_mapping["blocks"] + assert isinstance(blocks.submodules["mlp"], SymbolicBridge) + + def test_mlp_in_name(self, adapter: XGLMArchitectureAdapter) -> None: + mlp = adapter.component_mapping["blocks"].submodules["mlp"] + assert mlp.submodules["in"].name == "fc1" + + def test_mlp_out_name(self, adapter: XGLMArchitectureAdapter) -> None: + mlp = adapter.component_mapping["blocks"].submodules["mlp"] + assert mlp.submodules["out"].name == "fc2" + + +# --------------------------------------------------------------------------- +# Phase C: Embedding scale hook compatibility tests +# --------------------------------------------------------------------------- + + +def _make_mock_bridge() -> SimpleNamespace: + """Return a minimal mock bridge with embed.hook_out for hook-compat tests.""" + hook_out = SimpleNamespace(hook_conversion=None) + embed = SimpleNamespace(hook_out=hook_out) + return SimpleNamespace(embed=embed) + + +class TestXGLMAdapterHookCompatibility: + """setup_hook_compatibility must attach a scale conversion to hook_embed.""" + + def test_sets_hook_conversion_on_embed_hook_out(self, adapter: XGLMArchitectureAdapter) -> None: + bridge = _make_mock_bridge() + adapter.setup_hook_compatibility(bridge) + assert bridge.embed.hook_out.hook_conversion is not None + + def test_scales_by_sqrt_d_model(self, adapter: XGLMArchitectureAdapter) -> None: + # d_model=64, sqrt(64)=8 exactly + bridge = _make_mock_bridge() + adapter.setup_hook_compatibility(bridge) + conv = bridge.embed.hook_out.hook_conversion + x = torch.ones(2, 4, 64) + result = conv.handle_conversion(x) + expected_scale = math.sqrt(64) # 8.0 + assert torch.allclose(result, x * expected_scale, atol=1e-6) + + def test_revert_inverts_scale(self, adapter: XGLMArchitectureAdapter) -> None: + # round-trip: revert(handle_conversion(x)) == x; exact for sqrt(64)=8 + bridge = _make_mock_bridge() + adapter.setup_hook_compatibility(bridge) + conv = bridge.embed.hook_out.hook_conversion + x = torch.randn(2, 4, 64) + assert torch.allclose(conv.revert(conv.handle_conversion(x)), x, atol=1e-6) + + def test_no_error_when_embed_missing(self, adapter: XGLMArchitectureAdapter) -> None: + # Guard: if bridge lacks embed, setup_hook_compatibility should not raise + bridge = SimpleNamespace() # no embed attribute + adapter.setup_hook_compatibility(bridge) # must not raise + + def test_no_error_when_hook_out_missing(self, adapter: XGLMArchitectureAdapter) -> None: + # Guard: if embed lacks hook_out, no error expected + bridge = SimpleNamespace(embed=SimpleNamespace()) # embed but no hook_out + adapter.setup_hook_compatibility(bridge) # must not raise + + +# --------------------------------------------------------------------------- +# Phase D: Factory registration tests +# --------------------------------------------------------------------------- + + +class TestXGLMFactoryRegistration: + """XGLMForCausalLM must be registered in SUPPORTED_ARCHITECTURES and resolve correctly.""" + + def test_factory_returns_xglm_adapter(self) -> None: + from transformer_lens.factories.architecture_adapter_factory import ( + ArchitectureAdapterFactory, + ) + + cfg = _make_cfg() + adapter = ArchitectureAdapterFactory.select_architecture_adapter(cfg) + assert isinstance(adapter, XGLMArchitectureAdapter) + + def test_factory_key_is_xglm_for_causal_lm(self) -> None: + from transformer_lens.factories.architecture_adapter_factory import ( + SUPPORTED_ARCHITECTURES, + ) + + assert "XGLMForCausalLM" in SUPPORTED_ARCHITECTURES diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py index 1c6462cad..16f94761d 100644 --- a/transformer_lens/factories/architecture_adapter_factory.py +++ b/transformer_lens/factories/architecture_adapter_factory.py @@ -47,6 +47,7 @@ QwenArchitectureAdapter, StableLmArchitectureAdapter, T5ArchitectureAdapter, + XGLMArchitectureAdapter, ) # Export supported architectures @@ -92,6 +93,7 @@ "Qwen3ForCausalLM": Qwen3ArchitectureAdapter, "StableLmForCausalLM": StableLmArchitectureAdapter, "T5ForConditionalGeneration": T5ArchitectureAdapter, + "XGLMForCausalLM": XGLMArchitectureAdapter, "NanoGPTForCausalLM": NanogptArchitectureAdapter, "MinGPTForCausalLM": MingptArchitectureAdapter, "GPTNeoForCausalLM": NeoArchitectureAdapter, diff --git a/transformer_lens/model_bridge/supported_architectures/__init__.py b/transformer_lens/model_bridge/supported_architectures/__init__.py index 1b24f3741..1d7033c73 100644 --- a/transformer_lens/model_bridge/supported_architectures/__init__.py +++ b/transformer_lens/model_bridge/supported_architectures/__init__.py @@ -129,6 +129,9 @@ from transformer_lens.model_bridge.supported_architectures.t5 import ( T5ArchitectureAdapter, ) +from transformer_lens.model_bridge.supported_architectures.xglm import ( + XGLMArchitectureAdapter, +) __all__ = [ "ApertusArchitectureAdapter", @@ -173,4 +176,5 @@ "Qwen3ArchitectureAdapter", "StableLmArchitectureAdapter", "T5ArchitectureAdapter", + "XGLMArchitectureAdapter", ] diff --git a/transformer_lens/model_bridge/supported_architectures/xglm.py b/transformer_lens/model_bridge/supported_architectures/xglm.py new file mode 100644 index 000000000..c2f839b55 --- /dev/null +++ b/transformer_lens/model_bridge/supported_architectures/xglm.py @@ -0,0 +1,135 @@ +"""XGLM architecture adapter. + +Supports XGLMForCausalLM (facebook/xglm-*). +Assumes add_cross_attention=False (all published XGLM checkpoints). +""" + +from typing import Any + +from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter +from transformer_lens.model_bridge.generalized_components import ( + AttentionBridge, + BlockBridge, + EmbeddingBridge, + LinearBridge, + NormalizationBridge, + SymbolicBridge, + UnembeddingBridge, +) + + +class XGLMArchitectureAdapter(ArchitectureAdapter): + """Architecture adapter for XGLM models. + + XGLM uses pre-norm LayerNorm, sinusoidal positional embeddings (no + learnable weights), standard MHA with separate q/k/v/out_proj, and a + 2-layer MLP (fc1/fc2) that lives directly on the decoder block rather + than inside an mlp sub-module. + + All attention projections and fc1/fc2 carry biases. lm_head has no bias. + Embeddings are scaled by sqrt(d_model) at runtime in XGLMScaledWordEmbedding. + + Optional Parameters (may not exist in state_dict): + -------------------------------------------------- + None — all published XGLM checkpoints include all parameters listed above. + """ + + def __init__(self, cfg: Any) -> None: + """Initialize the XGLM architecture adapter.""" + super().__init__(cfg) + + # LayerNorm throughout (not RMSNorm) + self.cfg.normalization_type = "LN" + # Sinusoidal positional embeddings — added to token embeddings before blocks, + # no learnable weights, no RoPE + self.cfg.positional_embedding_type = "standard" + self.cfg.final_rms = False + # Standard 2-layer MLP (fc1 -> gelu -> fc2), no gate projection + self.cfg.gated_mlp = False + self.cfg.attn_only = False + self.cfg.uses_rms_norm = False + + # Sinusoidal positional embeddings have no weights in the state_dict, so + # center_writing_weights cannot center pos_embed. Disable it for XGLM. + self.supports_center_writing_weights = False + + # Standard MHA: n_heads == n_kv_heads for all XGLM sizes + self.weight_processing_conversions = { + **self._qkvo_weight_conversions(), + } + + self.component_mapping = { + "embed": EmbeddingBridge(name="model.embed_tokens"), + # No "pos_embed": sinusoidal embeddings are a non-persistent buffer with + # no learnable weights — embed_positions does not appear in state_dict. + "blocks": BlockBridge( + name="model.layers", + submodules={ + "ln1": NormalizationBridge( + name="self_attn_layer_norm", # pre-attn norm on XGLMDecoderLayer + config=self.cfg, + use_native_layernorm_autograd=True, + ), + "attn": AttentionBridge( + name="self_attn", + config=self.cfg, + requires_attention_mask=True, + attention_mask_4d=True, # (batch, 1, tgt_len, src_len) + submodules={ + "q": LinearBridge(name="q_proj"), + "k": LinearBridge(name="k_proj"), + "v": LinearBridge(name="v_proj"), + "o": LinearBridge(name="out_proj"), # out_proj, not o_proj + }, + ), + "ln2": NormalizationBridge( + name="final_layer_norm", # pre-MLP norm on XGLMDecoderLayer + config=self.cfg, + use_native_layernorm_autograd=True, + ), + # fc1/fc2 live directly on XGLMDecoderLayer — no "mlp" container. + # SymbolicBridge preserves TL structure without a real HF submodule. + "mlp": SymbolicBridge( + submodules={ + "in": LinearBridge(name="fc1"), + "out": LinearBridge(name="fc2"), + }, + ), + }, + ), + "ln_final": NormalizationBridge( + name="model.layer_norm", # note: layer_norm, not norm + config=self.cfg, + use_native_layernorm_autograd=True, + ), + "unembed": UnembeddingBridge(name="lm_head"), + } + + def setup_hook_compatibility(self, bridge: Any) -> None: + """Scale hook_embed by sqrt(d_model) to match XGLMScaledWordEmbedding.forward(). + + XGLMScaledWordEmbedding multiplies the embedding lookup by embed_scale = + sqrt(d_model) at runtime. Without this override, hook_embed would capture + the raw (unscaled) table output, diverging from actual model activations. + """ + from transformer_lens.conversion_utils.conversion_steps.base_tensor_conversion import ( + BaseTensorConversion, + ) + + class EmbeddingScaleConversion(BaseTensorConversion): + """Scale embeddings by sqrt(d_model) for XGLM models.""" + + def __init__(self, scale: float) -> None: + super().__init__() + self.scale = scale + + def handle_conversion(self, input_value: Any, *full_context: Any) -> Any: + return input_value * self.scale + + def revert(self, input_value: Any, *full_context: Any) -> Any: + return input_value / self.scale + + if hasattr(bridge, "embed") and hasattr(bridge.embed, "hook_out"): + bridge.embed.hook_out.hook_conversion = EmbeddingScaleConversion( + self.cfg.d_model**0.5 + ) diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index 6c2ce3aff..817ff0a0d 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -6,9 +6,9 @@ "min_downloads": 500, "scan_duration_seconds": 12.1 }, - "total_architectures": 36, - "total_models": 6686, - "total_verified": 690, + "total_architectures": 38, + "total_models": 6709, + "total_verified": 700, "models": [ { "architecture_id": "Qwen3ForCausalLM", @@ -93600,6 +93600,312 @@ "phase4_score": 67.5, "phase7_score": null, "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/xglm-564M", + "status": 1, + "verified_date": "2026-04-10", + "metadata": { + "downloads": 168586, + "total_params": null + }, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 91.8, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/incoder-1B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 12731, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/xglm-7.5B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 3589, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/xglm-4.5B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 3079, + "total_params": 5076815872 + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/xglm-1.7B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 3061, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-2.7B", + "status": 1, + "verified_date": "2026-04-10", + "metadata": { + "downloads": 2907, + "total_params": 2774988800 + }, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 100.0, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-125M", + "status": 1, + "verified_date": "2026-04-10", + "metadata": { + "downloads": 2896, + "total_params": 162256896 + }, + "note": "Full verification completed with issues: P2=91.7% (failed: generation)", + "phase1_score": 100.0, + "phase2_score": 91.7, + "phase3_score": 100.0, + "phase4_score": 100.0, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-355M", + "status": 1, + "verified_date": "2026-04-10", + "metadata": { + "downloads": 2874, + "total_params": 405245952 + }, + "note": "Full verification completed with issues: P2=91.7% (failed: generation)", + "phase1_score": 100.0, + "phase2_score": 91.7, + "phase3_score": 100.0, + "phase4_score": 93.1, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-13B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2866, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-1.3B", + "status": 1, + "verified_date": "2026-04-10", + "metadata": { + "downloads": 2864, + "total_params": 1414471680 + }, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 99.2, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-6.7B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2862, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/xglm-2.9B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2352, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "facebook/incoder-6B", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2225, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-6.7B-Shinen", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2210, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "pythainlp/wangchanglm-7.5B-sft-en-sharded", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2128, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "KoboldAI/fairseq-dense-13B-Shinen", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2123, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "pythainlp/wangchanglm-7.5B-sft-enth", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 2102, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "XGLMForCausalLM", + "model_id": "hf-tiny-model-private/tiny-random-XGLMForCausalLM", + "status": 0, + "verified_date": null, + "metadata": { + "downloads": 1969, + "total_params": null + }, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null } ] } diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json index 9eb2e7648..62c44ab67 100644 --- a/transformer_lens/tools/model_registry/data/verification_history.json +++ b/transformer_lens/tools/model_registry/data/verification_history.json @@ -1,5 +1,5 @@ { - "last_updated": "2026-04-09T16:34:36.818082", + "last_updated": "2026-04-10T19:18:41.322658", "records": [ { "model_id": "Macropodus/macbert4mdcspell_v1", @@ -11200,6 +11200,96 @@ "notes": "Full verification completed", "invalidated": false, "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-125M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P3=80.0% (failed: process_bridge_weights, layer_norm_folding, unembed_centering, value_bias_folding)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-125M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=91.7% (failed: generation)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-355M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=91.7% (failed: generation)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-1.3B", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-2.7B", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "KoboldAI/fairseq-dense-2.7B", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/xglm-564M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/xglm-564M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/xglm-564M", + "architecture_id": "XGLMForCausalLM", + "verified_date": "2026-04-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null } ] }