From 5c979e6fc1c71a5447883df5975c500c010d149b Mon Sep 17 00:00:00 2001 From: Mizzle Date: Mon, 13 Apr 2026 15:30:15 +0800 Subject: [PATCH] =?UTF-8?q?fix(config):=20align=20default=20decoder=5Fkwar?= =?UTF-8?q?gs=20with=20checkpoint=20(3=C3=97=20patch2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove duplicate Transformer + PatchedPretransform stage so total temporal ratio stays 240×2³=1920, matching encoder and official config.json. Made-with: Cursor --- configuration_moss_audio_tokenizer.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/configuration_moss_audio_tokenizer.py b/configuration_moss_audio_tokenizer.py index d82791b..67c3fd0 100644 --- a/configuration_moss_audio_tokenizer.py +++ b/configuration_moss_audio_tokenizer.py @@ -250,26 +250,6 @@ def __init__( "module_type": "PatchedPretransform", "patch_size": 2, }, - { - "module_type": "Transformer", - "input_dimension": 384, - "output_dimension": 768, - "d_model": 768, - "num_heads": 12, - "num_layers": 12, - "dim_feedforward": 3072, - "causal": True, - "norm": "layer_norm", - "positional_embedding": "rope", - "max_period": 10000, - "gating": "none", - "layer_scale": 0.01, - "conv_layout": True, - }, - { - "module_type": "PatchedPretransform", - "patch_size": 2, - }, { "module_type": "Transformer", "input_dimension": 384,