From 832a43955b0c8c881739f11f1893e00ed5fcff57 Mon Sep 17 00:00:00 2001
From: Jason <jason.sss@binance.com>
Date: Sun, 22 Mar 2026 00:33:17 +0100
Subject: [PATCH] fix(attention): download hub kernel in
 AttentionModuleMixin.set_attention_backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ModelMixin.set_attention_backend() calls both
_check_attention_backend_requirements() and
_maybe_download_kernel_for_backend() before setting the backend, but
AttentionModuleMixin.set_attention_backend() — used when the method is
called directly on an individual attention submodule — skipped both
calls.

This meant that hub-based backends (e.g. "sage_hub") silently failed
to download the required kernel when targeting a submodule, even though
the same call worked fine on the top-level model.

Add the two missing helper calls to AttentionModuleMixin.set_attention_backend().

Fixes #13284
---
 src/diffusers/models/attention.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
index 36d0893734c7..ca0af8a1c711 100644
--- a/src/diffusers/models/attention.py
+++ b/src/diffusers/models/attention.py
@@ -159,13 +159,19 @@ def get_processor(self, return_deprecated_lora: bool = False) -> "AttentionProce
             return self.processor
 
     def set_attention_backend(self, backend: str):
-        from .attention_dispatch import AttentionBackendName
+        from .attention_dispatch import (
+            AttentionBackendName,
+            _check_attention_backend_requirements,
+            _maybe_download_kernel_for_backend,
+        )
 
         available_backends = {x.value for x in AttentionBackendName.__members__.values()}
         if backend not in available_backends:
             raise ValueError(f"`{backend=}` must be one of the following: " + ", ".join(available_backends))
 
         backend = AttentionBackendName(backend.lower())
+        _check_attention_backend_requirements(backend)
+        _maybe_download_kernel_for_backend(backend)
         self.processor._attention_backend = backend
 
     def set_use_npu_flash_attention(self, use_npu_flash_attention: bool) -> None: