Skip to content

Commit d55bdf7

Browse files
forforever73CISC
andauthored
Apply suggestions from code review
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent b47145c commit d55bdf7

1 file changed

Lines changed: 15 additions & 35 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4951,33 +4951,18 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
49514951

49524952
@ModelBase.register("StepVLForConditionalGeneration")
49534953
class Step3VLVisionModel(MmprojModel):
4954-
def __init__(self, dir_model: Path, *args, hparams: dict[str, Any] | None = None, **kwargs):
4955-
if hparams is None:
4956-
hparams = ModelBase.load_hparams(dir_model, is_mistral_format=False)
4957-
assert hparams is not None
4958-
4959-
vision_config = {**hparams.get("vision_config", {})}
4960-
4961-
hidden_size = int(vision_config.get("hidden_size", vision_config.get("width", 0)))
4962-
if hidden_size <= 0:
4963-
raise ValueError("Step3-VL vision hidden_size/width not found")
4964-
4965-
mlp_ratio = float(vision_config.get("mlp_ratio", 8960 / 1536))
4966-
vision_config["hidden_size"] = hidden_size
4967-
vision_config["num_hidden_layers"] = int(vision_config.get("num_hidden_layers", vision_config.get("layers", 0)))
4968-
vision_config["num_attention_heads"] = int(vision_config.get("num_attention_heads", vision_config.get("heads", 0)))
4969-
vision_config["intermediate_size"] = int(vision_config.get("intermediate_size", round(hidden_size * mlp_ratio)))
4970-
vision_config["layer_norm_eps"] = float(vision_config.get("layer_norm_eps", 1e-5))
4971-
vision_config["use_ln_pre"] = bool(vision_config.get("use_ln_pre", True))
4972-
vision_config["use_ln_post"] = bool(vision_config.get("use_ln_post", False))
4973-
vision_config["use_abs_posemb"] = bool(vision_config.get("use_abs_posemb", True))
4974-
vision_config["use_rope2d"] = bool(vision_config.get("use_rope2d", True))
4975-
hparams["vision_config"] = vision_config
4976-
4977-
super().__init__(dir_model, *args, hparams=hparams, **kwargs)
4978-
4979-
self.preprocessor_config.setdefault("image_mean", list(_MISTRAL_COMMON_DATASET_MEAN))
4980-
self.preprocessor_config.setdefault("image_std", list(_MISTRAL_COMMON_DATASET_STD))
4954+
def __init__(self, *args, **kwargs):
4955+
super().__init__(*args, **kwargs)
4956+
assert self.hparams_vision is not None
4957+
4958+
if not self.hparams_vision.get("intermediate_size"):
4959+
hidden_size = self.hparams_vision.get("hidden_size") or self.hparams_vision.get("width") or 0
4960+
assert hidden_size > 0
4961+
mlp_ratio = float(self.hparams_vision.get("mlp_ratio", 8960 / 1536))
4962+
self.hparams_vision["intermediate_size"] = int(round(hidden_size * mlp_ratio))
4963+
4964+
self.preprocessor_config.setdefault("image_mean", list(_MISTRAL_COMMON_DATASET_MEAN))
4965+
self.preprocessor_config.setdefault("image_std", list(_MISTRAL_COMMON_DATASET_STD))
49814966

49824967
def set_gguf_parameters(self):
49834968
super().set_gguf_parameters()
@@ -4991,7 +4976,7 @@ def set_gguf_parameters(self):
49914976
)
49924977

49934978
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.STEP3VL)
4994-
self.gguf_writer.add_vision_attention_layernorm_eps(float(self.hparams_vision["layer_norm_eps"]))
4979+
self.gguf_writer.add_vision_attention_layernorm_eps(float(self.hparams_vision.get("layer_norm_eps", 1e-5)))
49954980
self.gguf_writer.add_vision_projector_scale_factor(projector_stride ** 2)
49964981
# 3024 max resize comes from step3-vl-10b processing_step3.py.
49974982
self.gguf_writer.add_vision_preproc_image_size(3024)
@@ -5002,9 +4987,7 @@ def tensor_force_quant(self, name, new_name, bid, n_dims):
50024987
return super().tensor_force_quant(name, new_name, bid, n_dims)
50034988

50044989
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5005-
del bid
5006-
5007-
if name.startswith("model.") or name.startswith("lm_head."):
4990+
if name.startswith("model.") or name.startswith("lm_head."):
50084991
return
50094992

50104993
if name.startswith("vision_model.vit_downsampler"):
@@ -5030,10 +5013,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
50305013
name = name.replace("attn.in_proj_weight", "attn.in_proj.weight")
50315014
name = name.replace("attn.in_proj_bias", "attn.in_proj.bias")
50325015

5033-
yield (self.map_tensor_name(name), data_torch)
5034-
return
5035-
5036-
return
5016+
yield from super().modify_tensors(data_torch, name, bid)
50375017

50385018

50395019
@ModelBase.register("Qwen3VLForConditionalGeneration")

0 commit comments

Comments
 (0)