From e1d85e7577d8f6355bd4cb3449bcb0a7e5f80cb8 Mon Sep 17 00:00:00 2001
From: Qiacheng Li <choliaky@gmail.com>
Date: Wed, 12 Nov 2025 12:21:05 -0800
Subject: [PATCH 1/4] Update README.md for Intel Arc GPU installation, remove
 IPEX (#10729)

IPEX is no longer needed for Intel Arc GPUs.  Removing instruction to setup ipex.
---
 README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 8142f595bcd0..9e28803a21f5 100644
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ RDNA 4 (RX 9000 series):
 
 ### Intel GPUs (Windows and Linux)
 
-(Option 1) Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
+Intel Arc GPU users can install native PyTorch with torch.xpu support using pip. More information can be found [here](https://pytorch.org/docs/main/notes/get_start_xpu.html)
 
 1. To install PyTorch xpu, use the following command:
 
@@ -252,10 +252,6 @@ This is the command to install the Pytorch xpu nightly which might have some per
 
 ```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu```
 
-(Option 2) Alternatively, Intel GPUs supported by Intel Extension for PyTorch (IPEX) can leverage IPEX for improved performance.
-
-1. visit [Installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=gpu) for more information.
-
 ### NVIDIA
 
 Nvidia users should install stable pytorch using this command:

From 18e7d6dba5f1012d4cf09e8f777dc85d56ff25c0 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 13 Nov 2025 07:19:53 +1000
Subject: [PATCH 2/4] mm/mp: always unload re-used but modified models (#10724)

The partial unloader path in model re-use flow skips straight to the
actual unload without any check of the patching UUID. This means that
if you do an upscale flow with a model patch on an existing model, it
will not apply your patchings.

Fix by delaying the partial_unload until after the uuid checks. This
is done by making partial_unload a model of partial_load where extra_mem
is -ve.
---
 comfy/model_management.py | 5 +----
 comfy/model_patcher.py    | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index d8913082adaa..a21df54b3f38 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -503,10 +503,7 @@ def model_load(self, lowvram_model_memory=0, force_patch_weights=False):
         use_more_vram = lowvram_model_memory
         if use_more_vram == 0:
             use_more_vram = 1e32
-        if use_more_vram > 0:
-            self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
-        else:
-            self.model.partially_unload(self.model.offload_device, -use_more_vram, force_patch_weights=force_patch_weights)
+        self.model_use_more_vram(use_more_vram, force_patch_weights=force_patch_weights)
 
         real_model = self.model.model
 
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 68b0a9192753..cf1b0d4412bc 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -928,6 +928,9 @@ def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
                 extra_memory += (used - self.model.model_loaded_weight_memory)
 
             self.patch_model(load_weights=False)
+            if extra_memory < 0 and not unpatch_weights:
+                self.partially_unload(self.offload_device, -extra_memory, force_patch_weights=force_patch_weights)
+                return 0
             full_load = False
             if self.model.model_lowvram == False and self.model.model_loaded_weight_memory > 0:
                 self.apply_hooks(self.forced_hooks, force_apply=True)

From 1c7eaeca1013e4315f36e0d4d274faa106001121 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 13 Nov 2025 07:20:53 +1000
Subject: [PATCH 3/4] qwen: reduce VRAM usage (#10725)

Clean up a bunch of stacked and no-longer-needed tensors on the QWEN
VRAM peak (currently FFN).

With this I go from OOMing at B=37x1328x1328 to being able to
succesfully run B=47 (RTX5090).
---
 comfy/ldm/qwen_image/model.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
index e5d0d17c1d6e..427ea19c1179 100644
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -236,10 +236,10 @@ def forward(
         img_mod1, img_mod2 = img_mod_params.chunk(2, dim=-1)
         txt_mod1, txt_mod2 = txt_mod_params.chunk(2, dim=-1)
 
-        img_normed = self.img_norm1(hidden_states)
-        img_modulated, img_gate1 = self._modulate(img_normed, img_mod1)
-        txt_normed = self.txt_norm1(encoder_hidden_states)
-        txt_modulated, txt_gate1 = self._modulate(txt_normed, txt_mod1)
+        img_modulated, img_gate1 = self._modulate(self.img_norm1(hidden_states), img_mod1)
+        del img_mod1
+        txt_modulated, txt_gate1 = self._modulate(self.txt_norm1(encoder_hidden_states), txt_mod1)
+        del txt_mod1
 
         img_attn_output, txt_attn_output = self.attn(
             hidden_states=img_modulated,
@@ -248,16 +248,20 @@ def forward(
             image_rotary_emb=image_rotary_emb,
             transformer_options=transformer_options,
         )
+        del img_modulated
+        del txt_modulated
 
         hidden_states = hidden_states + img_gate1 * img_attn_output
         encoder_hidden_states = encoder_hidden_states + txt_gate1 * txt_attn_output
+        del img_attn_output
+        del txt_attn_output
+        del img_gate1
+        del txt_gate1
 
-        img_normed2 = self.img_norm2(hidden_states)
-        img_modulated2, img_gate2 = self._modulate(img_normed2, img_mod2)
+        img_modulated2, img_gate2 = self._modulate(self.img_norm2(hidden_states), img_mod2)
         hidden_states = torch.addcmul(hidden_states, img_gate2, self.img_mlp(img_modulated2))
 
-        txt_normed2 = self.txt_norm2(encoder_hidden_states)
-        txt_modulated2, txt_gate2 = self._modulate(txt_normed2, txt_mod2)
+        txt_modulated2, txt_gate2 = self._modulate(self.txt_norm2(encoder_hidden_states), txt_mod2)
         encoder_hidden_states = torch.addcmul(encoder_hidden_states, txt_gate2, self.txt_mlp(txt_modulated2))
 
         return encoder_hidden_states, hidden_states

From 8b0b93df51d04f08eb779cb84dc331fa18b43ae8 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 12 Nov 2025 14:04:41 -0800
Subject: [PATCH 4/4]  Update Python 3.14 compatibility notes in README 
 (#10730)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9e28803a21f5..f51807ad5df1 100644
--- a/README.md
+++ b/README.md
@@ -200,7 +200,7 @@ comfy install
 
 ## Manual Install (Windows, Linux)
 
-Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) but it is not recommended.
+Python 3.14 works but you may encounter issues with the torch compile node. The free threaded variant is still missing some dependencies.
 
 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12