@@ -4951,33 +4951,18 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
49514951
49524952@ModelBase.register("StepVLForConditionalGeneration")
49534953class Step3VLVisionModel(MmprojModel):
4954- def __init__(self, dir_model: Path, *args, hparams: dict[str, Any] | None = None, **kwargs):
4955- if hparams is None:
4956- hparams = ModelBase.load_hparams(dir_model, is_mistral_format=False)
4957- assert hparams is not None
4958-
4959- vision_config = {**hparams.get("vision_config", {})}
4960-
4961- hidden_size = int(vision_config.get("hidden_size", vision_config.get("width", 0)))
4962- if hidden_size <= 0:
4963- raise ValueError("Step3-VL vision hidden_size/width not found")
4964-
4965- mlp_ratio = float(vision_config.get("mlp_ratio", 8960 / 1536))
4966- vision_config["hidden_size"] = hidden_size
4967- vision_config["num_hidden_layers"] = int(vision_config.get("num_hidden_layers", vision_config.get("layers", 0)))
4968- vision_config["num_attention_heads"] = int(vision_config.get("num_attention_heads", vision_config.get("heads", 0)))
4969- vision_config["intermediate_size"] = int(vision_config.get("intermediate_size", round(hidden_size * mlp_ratio)))
4970- vision_config["layer_norm_eps"] = float(vision_config.get("layer_norm_eps", 1e-5))
4971- vision_config["use_ln_pre"] = bool(vision_config.get("use_ln_pre", True))
4972- vision_config["use_ln_post"] = bool(vision_config.get("use_ln_post", False))
4973- vision_config["use_abs_posemb"] = bool(vision_config.get("use_abs_posemb", True))
4974- vision_config["use_rope2d"] = bool(vision_config.get("use_rope2d", True))
4975- hparams["vision_config"] = vision_config
4976-
4977- super().__init__(dir_model, *args, hparams=hparams, **kwargs)
4978-
4979- self.preprocessor_config.setdefault("image_mean", list(_MISTRAL_COMMON_DATASET_MEAN))
4980- self.preprocessor_config.setdefault("image_std", list(_MISTRAL_COMMON_DATASET_STD))
4954+ def __init__(self, *args, **kwargs):
4955+ super().__init__(*args, **kwargs)
4956+ assert self.hparams_vision is not None
4957+
4958+ if not self.hparams_vision.get("intermediate_size"):
4959+ hidden_size = self.hparams_vision.get("hidden_size") or self.hparams_vision.get("width") or 0
4960+ assert hidden_size > 0
4961+ mlp_ratio = float(self.hparams_vision.get("mlp_ratio", 8960 / 1536))
4962+ self.hparams_vision["intermediate_size"] = int(round(hidden_size * mlp_ratio))
4963+
4964+ self.preprocessor_config.setdefault("image_mean", list(_MISTRAL_COMMON_DATASET_MEAN))
4965+ self.preprocessor_config.setdefault("image_std", list(_MISTRAL_COMMON_DATASET_STD))
49814966
49824967 def set_gguf_parameters(self):
49834968 super().set_gguf_parameters()
@@ -4991,7 +4976,7 @@ def set_gguf_parameters(self):
49914976 )
49924977
49934978 self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.STEP3VL)
4994- self.gguf_writer.add_vision_attention_layernorm_eps(float(self.hparams_vision[ "layer_norm_eps"]))
4979+ self.gguf_writer.add_vision_attention_layernorm_eps(float(self.hparams_vision.get( "layer_norm_eps", 1e-5)))
49954980 self.gguf_writer.add_vision_projector_scale_factor(projector_stride ** 2)
49964981 # 3024 max resize comes from step3-vl-10b processing_step3.py.
49974982 self.gguf_writer.add_vision_preproc_image_size(3024)
@@ -5002,9 +4987,7 @@ def tensor_force_quant(self, name, new_name, bid, n_dims):
50024987 return super().tensor_force_quant(name, new_name, bid, n_dims)
50034988
50044989 def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5005- del bid
5006-
5007- if name.startswith("model.") or name.startswith("lm_head."):
4990+ if name.startswith("model.") or name.startswith("lm_head."):
50084991 return
50094992
50104993 if name.startswith("vision_model.vit_downsampler"):
@@ -5030,10 +5013,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
50305013 name = name.replace("attn.in_proj_weight", "attn.in_proj.weight")
50315014 name = name.replace("attn.in_proj_bias", "attn.in_proj.bias")
50325015
5033- yield (self.map_tensor_name(name), data_torch)
5034- return
5035-
5036- return
5016+ yield from super().modify_tensors(data_torch, name, bid)
50375017
50385018
50395019@ModelBase.register("Qwen3VLForConditionalGeneration")
0 commit comments