diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 3849d4b26019..0d1e4927072a 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -1,6 +1,7 @@ # yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json language: "en-US" early_access: false +tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code." reviews: profile: "chill" @@ -35,6 +36,14 @@ reviews: - "!**/*.bat" path_instructions: + - path: "**" + instructions: | + IMPORTANT: Only comment on issues directly introduced by this PR's code changes. + Do NOT flag pre-existing issues in code that was merely moved, re-indented, + de-indented, or reformatted without logic changes. If code appears in the diff + only due to whitespace or structural reformatting (e.g., removing a `with:` block), + treat it as unchanged. Contributors should not feel obligated to address + pre-existing issues outside the scope of their contribution. - path: "comfy/**" instructions: | Core ML/diffusion engine. Focus on: @@ -74,7 +83,11 @@ reviews: auto_review: enabled: true auto_incremental_review: true - drafts: true + drafts: false + ignore_title_keywords: + - "WIP" + - "DO NOT REVIEW" + - "DO NOT MERGE" finishing_touches: docstrings: @@ -84,7 +97,7 @@ reviews: tools: ruff: - enabled: true + enabled: false pylint: enabled: false flake8: diff --git a/app/subgraph_manager.py b/app/subgraph_manager.py index 6a8f586a469a..08ad8c30239a 100644 --- a/app/subgraph_manager.py +++ b/app/subgraph_manager.py @@ -53,7 +53,7 @@ def _create_entry(self, file: str, source: str, node_pack: str) -> tuple[str, Su return entry_id, entry async def load_entry_data(self, entry: SubgraphEntry): - with open(entry['path'], 'r') as f: + with open(entry['path'], 'r', encoding='utf-8') as f: entry['data'] = f.read() return entry diff --git a/comfy/ldm/lightricks/av_model.py b/comfy/ldm/lightricks/av_model.py index 2c6954ecd2e4..2b080aaebfbe 100644 --- a/comfy/ldm/lightricks/av_model.py +++ b/comfy/ldm/lightricks/av_model.py @@ -9,6 +9,7 @@ LTXVModel, ) from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier +from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector import comfy.ldm.common_dit class CompressedTimestep: @@ -450,6 +451,29 @@ def _init_model_components(self, device, dtype, **kwargs): operations=self.operations, ) + self.audio_embeddings_connector = Embeddings1DConnector( + split_rope=True, + double_precision_rope=True, + dtype=dtype, + device=device, + operations=self.operations, + ) + + self.video_embeddings_connector = Embeddings1DConnector( + split_rope=True, + double_precision_rope=True, + dtype=dtype, + device=device, + operations=self.operations, + ) + + def preprocess_text_embeds(self, context): + if context.shape[-1] == self.caption_channels * 2: + return context + out_vid = self.video_embeddings_connector(context)[0] + out_audio = self.audio_embeddings_connector(context)[0] + return torch.concat((out_vid, out_audio), dim=-1) + def _init_transformer_blocks(self, device, dtype, **kwargs): """Initialize transformer blocks for LTXAV.""" self.transformer_blocks = nn.ModuleList( diff --git a/comfy/model_base.py b/comfy/model_base.py index 9dcef8741423..2f49578f6d81 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -988,10 +988,14 @@ def __init__(self, model_config, model_type=ModelType.FLUX, device=None): def extra_conds(self, **kwargs): out = super().extra_conds(**kwargs) attention_mask = kwargs.get("attention_mask", None) + device = kwargs["device"] + if attention_mask is not None: out['attention_mask'] = comfy.conds.CONDRegular(attention_mask) cross_attn = kwargs.get("cross_attn", None) if cross_attn is not None: + if hasattr(self.diffusion_model, "preprocess_text_embeds"): + cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference())) out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25)) diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py index 82fbacf59c60..e2ce22e374e5 100644 --- a/comfy/text_encoders/lt.py +++ b/comfy/text_encoders/lt.py @@ -3,7 +3,6 @@ from transformers import T5TokenizerFast from .spiece_tokenizer import SPieceTokenizer import comfy.text_encoders.genmo -from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector import torch import comfy.utils import math @@ -109,22 +108,6 @@ def __init__(self, dtype_llama=None, device="cpu", dtype=None, model_options={}) operations = self.gemma3_12b.operations # TODO self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device) - self.audio_embeddings_connector = Embeddings1DConnector( - split_rope=True, - double_precision_rope=True, - dtype=dtype, - device=device, - operations=operations, - ) - - self.video_embeddings_connector = Embeddings1DConnector( - split_rope=True, - double_precision_rope=True, - dtype=dtype, - device=device, - operations=operations, - ) - def set_clip_options(self, options): self.execution_device = options.get("execution_device", self.execution_device) self.gemma3_12b.set_clip_options(options) @@ -146,10 +129,6 @@ def encode_token_weights(self, token_weight_pairs): out = out.reshape((out.shape[0], out.shape[1], -1)) out = self.text_embedding_projection(out) out = out.float() - out_vid = self.video_embeddings_connector(out)[0] - out_audio = self.audio_embeddings_connector(out)[0] - out = torch.concat((out_vid, out_audio), dim=-1) - return out.to(out_device), pooled def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed): @@ -159,14 +138,14 @@ def load_sd(self, sd): if "model.layers.47.self_attn.q_norm.weight" in sd: return self.gemma3_12b.load_sd(sd) else: - sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True) + sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True) if len(sdo) == 0: sdo = sd missing_all = [] unexpected_all = [] - for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]: + for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]: component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)} if component_sd: missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False)) diff --git a/comfy_extras/nodes_nag.py b/comfy_extras/nodes_nag.py index 033e40eb9206..b571818486f1 100644 --- a/comfy_extras/nodes_nag.py +++ b/comfy_extras/nodes_nag.py @@ -10,7 +10,7 @@ def define_schema(cls) -> io.Schema: node_id="NAGuidance", display_name="Normalized Attention Guidance", description="Applies Normalized Attention Guidance to models, enabling negative prompts on distilled/schnell models.", - category="", + category="advanced/guidance", is_experimental=True, inputs=[ io.Model.Input("model", tooltip="The model to apply NAG to."),