From e7b7dc271bd1ab6e38128acda878326d3450e214 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:34:22 +0100 Subject: [PATCH 1/6] Treat NaN as absent in v1 arithmetic instead of raising ValueError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NaN in multiplicative/divisive constants now masks the term (makes it absent) rather than raising. NaN in additive constants is treated as 0 (additive identity). NaN in constraint RHS skips the constraint. This simplifies the API — users no longer need fillna() before every operation with incomplete data. Update arithmetic-convention, missing-data, and new mixed-coordinate-arithmetic notebooks to reflect the new semantics. Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/arithmetic-convention.ipynb | 30 +- examples/missing-data.ipynb | 182 ++++-------- examples/mixed-coordinate-arithmetic.ipynb | 306 +++++++++++++++++++++ linopy/expressions.py | 60 ++-- test/test_algebraic_properties.py | 20 +- test/test_convention.py | 13 +- test/test_legacy_violations.py | 27 +- test/test_linear_expression.py | 131 ++++++--- 8 files changed, 536 insertions(+), 233 deletions(-) create mode 100644 examples/mixed-coordinate-arithmetic.ipynb diff --git a/examples/arithmetic-convention.ipynb b/examples/arithmetic-convention.ipynb index d9abced3..55a6773c 100644 --- a/examples/arithmetic-convention.ipynb +++ b/examples/arithmetic-convention.ipynb @@ -354,7 +354,9 @@ } }, "outputs": [], - "source": "x.mul(partial, join=\"left\", fill_value=0) # keep x's coords, fill missing factor with 0" + "source": [ + "x.mul(partial, join=\"left\") # keep x's coords; missing factor positions become absent" + ] }, { "cell_type": "markdown", @@ -394,6 +396,7 @@ }, { "cell_type": "code", + "execution_count": null, "id": "v1-align-example", "metadata": { "ExecuteTime": { @@ -401,12 +404,11 @@ "start_time": "2026-03-15T10:27:01.339780Z" } }, + "outputs": [], "source": [ "x_aligned, y_short_aligned = linopy.align(x, y_short, join=\"outer\")\n", "x_aligned + y_short_aligned" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -418,13 +420,13 @@ "cell_type": "markdown", "id": "93l4fo4zlhk", "metadata": {}, - "source": "### NaN convention\n\nIn v1, **NaN means \"absent term\" — never a numeric value.**\n\nNaN enters only from `mask=` at construction or structural operations (`.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()`). Operations like `.roll()`, `.sel()`, `.isel()` do not produce NaN.\n\n**User-supplied NaN raises `ValueError`** — the library does not guess whether NaN means \"zero,\" \"exclude,\" or \"identity.\" You decide:\n\n```python\nx + data.fillna(0) # NaN = \"no offset\"\nx * factor.fillna(1) # NaN = \"no scaling\"\n```\n\n**Misaligned constants in mul/div also raise** — when the factor doesn't cover all coordinates, v1 raises instead of silently filling. Use `fill_value=` to opt in explicitly:\n\n```python\nexpr.mul(da, join=\"outer\", fill_value=0) # missing entries → 0 (kills term)\nexpr.mul(da, join=\"outer\", fill_value=1) # missing entries → 1 (preserves term)\nexpr.div(da, join=\"left\", fill_value=1) # missing entries → 1 (no scaling)\n```\n\nFor add/sub, missing entries are always filled with 0 (additive identity) — this is safe and required for associativity.\n\nFor edge cases and detailed NaN propagation rules, see [Missing Data](missing-data.ipynb)." + "source": "### NaN convention\n\nIn v1, **NaN means \"absent\" — it acts as a mask.**\n\nNaN can enter from `mask=` at construction, structural operations (`.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()`), or from user-supplied data.\n\n**NaN in multiplicative/divisive constants masks the term:**\n\n```python\nx * cost_with_nan # NaN positions become absent (no term)\nx / rate_with_nan # NaN positions become absent\n```\n\n**NaN in additive/subtractive constants is treated as 0 (additive identity):**\n\n```python\nx + data_with_nan # NaN positions contribute nothing\nx - data_with_nan # same — NaN subtracted = nothing subtracted\n```\n\n**NaN in constraint RHS skips the constraint** at that position — the constraint simply doesn't exist there.\n\nThis is consistent: NaN always means \"nothing here.\" For multiplication that means no term; for addition that means no contribution; for constraints that means no constraint.\n\n**When NaN means something else**, handle it explicitly:\n\n```python\nexpr.mul(rate, join=\"left\", fill_value=1) # missing entries → 1 (preserves term)\nexpr.mul(da, join=\"outer\", fill_value=0) # missing entries → 0 (kills term)\n```\n\nFor add/sub, missing entries from coordinate alignment are always filled with 0 (additive identity) — this is safe and required for associativity.\n\nFor a practical walkthrough with mixed-coordinate variables and NaN costs, see [Mixed-Coordinate Arithmetic](mixed-coordinate-arithmetic.ipynb). For edge cases and detailed propagation rules, see [Missing Data](missing-data.ipynb)." }, { "cell_type": "markdown", "id": "qjqfsj2ckps", "metadata": {}, - "source": "#### Arithmetic with absent slots\n\nWhen expressions have absent slots (from `.shift()`, `.where()`, etc.), v1 and legacy handle multiplication differently:\n\n| Operation | v1 | Legacy |\n|---|---|---|\n| `shifted + 5` | `+5` (fills const with 0, the additive identity) | Same |\n| `shifted - 5` | `-5` (same — subtraction is addition of negation) | Same |\n| `shifted * 3` | **absent** (NaN propagates) | **zero** (NaN filled with 0) |\n| `shifted / 2` | **absent** (NaN propagates) | **zero** (NaN filled with 0) |\n\nThe addition behavior is the same in both conventions — filling with 0 (additive identity) is required for associativity. But for multiplication, legacy silently turns absent slots into zero-valued terms. This means legacy cannot distinguish \"absent variable\" from \"zero variable\", and `fillna()` becomes a no-op on absent slots.\n\nWhen merging expressions (e.g., `x + y.shift(time=1)`), an absent term from `y.shift` does **not** mask the valid `x` term at the same coordinate. A coordinate is fully absent only when **all** terms are absent." + "source": "#### Arithmetic with absent slots\n\nWhen expressions have absent slots (from `.shift()`, `.where()`, user NaN, etc.), v1 and legacy handle them differently:\n\n| Operation | v1 | Legacy |\n|---|---|---|\n| `shifted + 5` | `+5` (fills const with 0, the additive identity) | Same |\n| `shifted - 5` | `-5` (same — subtraction is addition of negation) | Same |\n| `shifted * 3` | **absent** (NaN propagates) | **zero** (NaN filled with 0) |\n| `shifted / 2` | **absent** (NaN propagates) | **zero** (NaN filled with 0) |\n| `var * nan_cost` | **absent** (NaN masks the term) | **zero** (NaN filled with 0) |\n| `var + nan_data` | `var` (NaN treated as 0) | Same |\n\nThe addition behavior is the same in both conventions — filling with 0 (additive identity) is required for associativity. But for multiplication, legacy silently turns absent/NaN slots into zero-valued terms. This means legacy cannot distinguish \"absent variable\" from \"zero variable.\"\n\nWhen merging expressions (e.g., `x + y.shift(time=1)`), an absent term from `y.shift` does **not** mask the valid `x` term at the same coordinate. A coordinate is fully absent only when **all** terms are absent." }, { "cell_type": "code", @@ -445,6 +447,14 @@ "print(\"shifted * 3 at time=0:\", (shifted * 3).isnull().values[0]) # True (stays absent)\n", "print()\n", "\n", + "# User NaN in multiplicative constant → masks\n", + "nan_cost = xr.DataArray(\n", + " [1.0, np.nan, 3.0, 4.0, 5.0], dims=[\"time\"], coords={\"time\": time}\n", + ")\n", + "print(\"x * nan_cost at time=1:\", (x * nan_cost).isnull().values[1]) # True (masked)\n", + "print(\"x + nan_cost at time=1:\", (x + nan_cost).const.values[1]) # 0.0 (NaN → 0)\n", + "print()\n", + "\n", "# Merging: absent y doesn't poison valid x\n", "y_shifted = (1 * y).shift(time=1)\n", "merged = x + y_shifted\n", @@ -457,7 +467,7 @@ "cell_type": "markdown", "id": "xamzfwxyikn", "metadata": {}, - "source": "#### Handling NaN in your data\n\nWhen your data has NaN, you have two approaches:\n\n**`.fillna()` — replace NaN before operating:**\n\n```python\nx + data.fillna(0) # NaN positions contribute nothing\nx * factor.fillna(1) # NaN positions keep original coefficient\n```\n\n**`.sel()` — subset to valid positions (preferred for constraints):**\n\n```python\nvalid = data.notnull()\nm.add_constraints(x.sel(time=valid) <= data.sel(time=valid), name=\"con\")\n```\n\nThe `mask=` parameter on `add_constraints` / `add_variables` is another option for multi-dimensional patterns where `.sel()` is inconvenient." + "source": "#### When NaN doesn't mean \"absent\"\n\nNaN in your data is automatically handled: it masks in mul/div, and contributes nothing in add/sub. But sometimes NaN means something different — like \"use the identity\" for a scaling factor. In those cases, fill explicitly:\n\n```python\nx * factor.fillna(1) # NaN = \"no scaling\" (keep original coefficient)\nexpr.mul(rate, join=\"left\", fill_value=1) # missing entries = \"no scaling\"\n```\n\nFor constraints, `.sel()` or `mask=` give you precise control over which positions have constraints:\n\n```python\nvalid = data.notnull()\nm.add_constraints(x.sel(time=valid) <= data.sel(time=valid), name=\"con\")\n```" }, { "cell_type": "markdown", @@ -634,7 +644,7 @@ "cell_type": "markdown", "id": "join-header", "metadata": {}, - "source": "---\n\n## The `join` parameter\n\nBoth conventions support explicit `join=` on named methods. This overrides the default behavior and works identically in both modes.\n\n| `join` | Coordinates kept | Fill behavior |\n|--------|-----------------|---------------|\n| `\"exact\"` | Must match | `ValueError` if different |\n| `\"inner\"` | Intersection | No fill needed |\n| `\"outer\"` | Union | add/sub: fill with 0; mul/div: see below |\n| `\"left\"` | Left operand's | Fill missing right |\n| `\"right\"` | Right operand's | Fill missing left |\n| `\"override\"` | Left operand's (positional) | Positional alignment |\n\n**`fill_value` on mul/div:** In v1, when the constant factor doesn't cover all coordinates, `.mul()` and `.div()` raise a `ValueError` instead of silently filling. Pass `fill_value=` to opt in:\n\n```python\nexpr.mul(da, join=\"outer\", fill_value=0) # missing → 0 (kills term)\nexpr.mul(da, join=\"outer\", fill_value=1) # missing → 1 (preserves term)\n```\n\nIn legacy, missing entries are silently filled with 0 (mul) or 1 (div). For add/sub, missing entries are always filled with 0 (additive identity) in both conventions." + "source": "---\n\n## The `join` parameter\n\nBoth conventions support explicit `join=` on named methods. This overrides the default behavior and works identically in both modes.\n\n| `join` | Coordinates kept | Fill behavior |\n|--------|-----------------|---------------|\n| `\"exact\"` | Must match | `ValueError` if different |\n| `\"inner\"` | Intersection | No fill needed |\n| `\"outer\"` | Union | add/sub: fill with 0; mul/div: NaN (masks term) |\n| `\"left\"` | Left operand's | Fill missing right |\n| `\"right\"` | Right operand's | Fill missing left |\n| `\"override\"` | Left operand's (positional) | Positional alignment |\n\n**`fill_value` on mul/div:** When the constant factor doesn't cover all coordinates after alignment, missing entries become NaN, which masks those terms (makes them absent). Pass `fill_value=` to use a different fill:\n\n```python\nexpr.mul(da, join=\"outer\", fill_value=0) # missing → 0 (kills term)\nexpr.mul(da, join=\"outer\", fill_value=1) # missing → 1 (preserves term)\n```\n\nIn legacy, missing entries are silently filled with 0 (mul) or 1 (div). For add/sub, missing entries are always filled with 0 (additive identity) in both conventions." }, { "cell_type": "code", @@ -784,7 +794,7 @@ "cell_type": "markdown", "id": "migration-header", "metadata": {}, - "source": "---\n\n## Migration guide\n\nTo migrate from legacy to v1:\n\n### Step 1: Enable v1 and run your code\n\n```python\nlinopy.options[\"arithmetic_convention\"] = \"v1\"\n```\n\nAny code that relied on legacy alignment will now raise `ValueError` with a helpful message suggesting which `join=` to use.\n\n### Step 2: Fix coordinate mismatches\n\nCommon patterns:\n\n| Legacy code (silent) | v1 equivalent (explicit) |\n|---|---|\n| `x + subset_constant` | `x.add(subset_constant, join=\"left\")` |\n| `x + y` (same size, different labels) | `x + y.assign_coords(time=x.coords[\"time\"])` |\n| `x <= partial_rhs` | `x.le(partial_rhs, join=\"left\")` |\n| `expr + expr` (mismatched coords) | `expr.add(other, join=\"outer\")` or `.sel()` first |\n\n### Step 3: Handle NaN\n\nUnder legacy, NaN in operands was silently replaced. Under v1, it raises `ValueError`. See [Missing Data](missing-data.ipynb) for the full migration guide.\n\n### Step 4: Pandas index names\n\nUnder v1, pandas objects must have **named indices** to align properly with linopy variables:\n\n```python\n# Will fail — unnamed index becomes \"dim_0\"\ncost = pd.Series([10, 20], index=[\"wind\", \"solar\"])\n\n# Works — explicit dimension name\ncost = pd.Series([10, 20], index=pd.Index([\"wind\", \"solar\"], name=\"tech\"))\n```" + "source": "---\n\n## Migration guide\n\nTo migrate from legacy to v1:\n\n### Step 1: Enable v1 and run your code\n\n```python\nlinopy.options[\"arithmetic_convention\"] = \"v1\"\n```\n\nAny code that relied on legacy alignment will now raise `ValueError` with a helpful message suggesting which `join=` to use.\n\n### Step 2: Fix coordinate mismatches\n\nCommon patterns:\n\n| Legacy code (silent) | v1 equivalent (explicit) |\n|---|---|\n| `x + subset_constant` | `x.add(subset_constant, join=\"left\")` |\n| `x + y` (same size, different labels) | `x + y.assign_coords(time=x.coords[\"time\"])` |\n| `x <= partial_rhs` | `x.le(partial_rhs, join=\"left\")` |\n| `expr + expr` (mismatched coords) | `expr.add(other, join=\"outer\")` or `.sel()` first |\n\n### Step 3: Review NaN handling\n\nUnder v1, NaN is treated as \"absent\" — it masks terms in multiplication and contributes nothing in addition. This is usually the right behavior, but review cases where legacy's NaN filling did something intentional:\n\n| Legacy code | v1 behavior | Action needed? |\n|---|---|---|\n| `x + data_with_nans` | NaN positions contribute 0 | No — same effect |\n| `x * data_with_nans` | NaN positions become **absent** | Check if you wanted **zero** instead |\n| `x / data_with_nans` | NaN positions become **absent** | Check if you wanted **identity** (÷1) instead |\n\nIf you relied on legacy's `div` filling NaN with 1, use `.fillna(1)` explicitly.\n\n### Step 4: Pandas index names\n\nUnder v1, pandas objects must have **named indices** to align properly with linopy variables:\n\n```python\n# Will fail — unnamed index becomes \"dim_0\"\ncost = pd.Series([10, 20], index=[\"wind\", \"solar\"])\n\n# Works — explicit dimension name\ncost = pd.Series([10, 20], index=pd.Index([\"wind\", \"solar\"], name=\"tech\"))\n```" }, { "cell_type": "markdown", @@ -882,7 +892,7 @@ "cell_type": "markdown", "id": "summary", "metadata": {}, - "source": "---\n\n## Summary\n\n| | v1 (future default) | Legacy (current default) |\n|---|---|---|\n| **Mismatched coords** | `ValueError` | Silent left-join / override |\n| **Same-size different labels** | `ValueError` | Positional alignment |\n| **NaN in user data** | `ValueError` | Filled with 0 (add/mul) or 1 (div) |\n| **Absent slot + scalar** | Revives (additive identity fill) | Revives (same) |\n| **Absent slot × scalar** | Stays absent (NaN propagates) | Becomes zero (NaN filled) |\n| **mul/div with misaligned factor** | `ValueError` (use `fill_value=` to opt in) | Silent fill (0 for mul, 1 for div) |\n| **add/sub with misaligned constant** | Fill with 0 (additive identity) | Same |\n| **Explicit join** | `.add(x, join=...)` | `.add(x, join=...)` |\n| **Setting** | `options[\"arithmetic_convention\"] = \"v1\"` | `options[\"arithmetic_convention\"] = \"legacy\"` |" + "source": "---\n\n## Summary\n\n| | v1 (future default) | Legacy (current default) |\n|---|---|---|\n| **Mismatched coords** | `ValueError` | Silent left-join / override |\n| **Same-size different labels** | `ValueError` | Positional alignment |\n| **NaN in add/sub constant** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div factor** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | Constraint skipped at that position | Same |\n| **Absent slot + scalar** | Revives (additive identity fill) | Revives (same) |\n| **Absent slot × scalar** | Stays absent (NaN propagates) | Becomes zero (NaN filled) |\n| **mul/div with misaligned factor** | NaN fills mask the term (use `fill_value=` for other behavior) | Silent fill (0 for mul, 1 for div) |\n| **add/sub with misaligned constant** | Fill with 0 (additive identity) | Same |\n| **Explicit join** | `.add(x, join=...)` | `.add(x, join=...)` |\n| **Setting** | `options[\"arithmetic_convention\"] = \"v1\"` | `options[\"arithmetic_convention\"] = \"legacy\"` |" } ], "metadata": { diff --git a/examples/missing-data.ipynb b/examples/missing-data.ipynb index 33e073d9..3d794cdb 100644 --- a/examples/missing-data.ipynb +++ b/examples/missing-data.ipynb @@ -4,25 +4,23 @@ "cell_type": "markdown", "id": "intro", "metadata": {}, - "source": "# Missing Data — Edge Cases and Details\n\nThis notebook covers NaN edge cases and detailed propagation rules. For the core NaN convention and arithmetic behavior, see [Arithmetic Convention](arithmetic-convention.ipynb).\n\n1. [NaN internals](#nan-internals) — where NaN lives, how it propagates\n2. [What raises](#what-raises) — NaN at API boundaries\n3. [Handling NaN with `.fillna()`](#handling-nan-with-fillna) — choosing the right fill value\n4. [Masking constraints](#masking-constraints) — `.sel()` and `mask=`\n5. [Masking with NaN in coefficients](#masking-with-nan-in-coefficients) — multi-dimensional patterns\n6. [Legacy NaN behavior](#legacy-nan-behavior-for-comparison) — how it worked before" + "source": "# Missing Data — Edge Cases and Details\n\nThis notebook covers NaN edge cases and detailed propagation rules. For the core NaN convention and arithmetic behavior, see [Arithmetic Convention](arithmetic-convention.ipynb).\n\n1. [NaN internals](#nan-internals) — where NaN lives, how it propagates\n2. [NaN in arithmetic](#nan-in-arithmetic) — how NaN behaves in each operation\n3. [Handling NaN with `.fillna()`](#handling-nan-with-fillna) — when you want different behavior\n4. [Masking constraints](#masking-constraints) — `.sel()` and `mask=`\n5. [Masking with NaN in coefficients](#masking-with-nan-in-coefficients) — multi-dimensional patterns\n6. [Legacy NaN behavior](#legacy-nan-behavior-for-comparison) — how it worked before" }, { "cell_type": "code", - "execution_count": null, "id": "imports", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.879309Z", - "start_time": "2026-03-11T14:52:16.087004Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:13.611630Z", "iopub.status.busy": "2026-03-12T07:17:13.611383Z", "iopub.status.idle": "2026-03-12T07:17:14.222456Z", "shell.execute_reply": "2026-03-12T07:17:14.222237Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.339843Z", + "start_time": "2026-03-18T14:24:47.581893Z" } }, - "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -31,25 +29,25 @@ "import linopy\n", "\n", "linopy.options[\"arithmetic_convention\"] = \"v1\"" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "setup", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.939592Z", - "start_time": "2026-03-11T14:52:16.885073Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.223624Z", "iopub.status.busy": "2026-03-12T07:17:14.223510Z", "iopub.status.idle": "2026-03-12T07:17:14.252820Z", "shell.execute_reply": "2026-03-12T07:17:14.252554Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.389729Z", + "start_time": "2026-03-18T14:24:48.349795Z" } }, - "outputs": [], "source": [ "m = linopy.Model()\n", "time = pd.RangeIndex(5, name=\"time\")\n", @@ -57,105 +55,70 @@ "\n", "# Data with NaN\n", "data = xr.DataArray([1.0, np.nan, 3.0, 4.0, 5.0], dims=[\"time\"], coords={\"time\": time})" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "rqgv2f7nwpb", "metadata": {}, - "source": "---\n\n## NaN internals\n\nThis section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n\n### How NaN enters\n\nOnly two sources produce NaN inside linopy data structures:\n\n1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n\nOperations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n\n### How NaN propagates\n\nAn expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n\nWhen expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n\nA coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n\n### Where NaN lives\n\nNaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n\n### Why this is consistent\n\n- **`lhs >= rhs` is `lhs - rhs >= 0`**, so RHS obeys the same rule as any constant — no special case.\n- **No dual role for NaN**: it cannot mean both \"absent\" and \"a number I computed with.\" Internal NaN (from `shift`, `mask=`) is always structural. User NaN is always an error.\n- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent." + "source": "---\n\n## NaN internals\n\nThis section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n\n### How NaN enters\n\nNaN can enter linopy data structures from three sources:\n\n1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n3. **User-supplied data** with NaN values — NaN in multiplicative constants masks terms; NaN in additive constants is treated as 0.\n\nOperations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n\n### How NaN propagates\n\nAn expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n\nWhen expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n\nA coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n\n### Where NaN lives\n\nNaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n\n### Why this is consistent\n\n- **`lhs >= rhs` is `lhs - rhs >= 0`**, so RHS obeys the same rule as any constant — no special case.\n- **No dual role for NaN**: it always means \"absent/nothing here.\" Internal NaN (from `shift`, `mask=`) and user NaN (from data) are treated identically.\n- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent." }, { "cell_type": "markdown", "id": "v1-rule-header", "metadata": {}, - "source": [ - "---\n", - "\n", - "## What raises\n", - "\n", - "**NaN in any arithmetic operand raises `ValueError`.** This includes:\n", - "- Constants added/subtracted: `expr + data_with_nan`\n", - "- Factors multiplied/divided: `expr * data_with_nan`\n", - "- Constraint RHS: `expr >= data_with_nan` (because `expr >= rhs` is `expr - rhs >= 0`)\n", - "\n", - "There is no implicit fill. The library does not guess whether NaN means \"zero,\" \"exclude,\" or \"identity.\" You decide." - ] + "source": "---\n\n## NaN in arithmetic\n\nUnder v1, NaN in operands is handled automatically — no `ValueError`, no surprises:\n\n| Operation | NaN behavior | Rationale |\n|---|---|---|\n| `expr + nan_data` | NaN → 0 (additive identity) | Adding nothing = no contribution |\n| `expr - nan_data` | NaN → 0 (additive identity) | Subtracting nothing = no change |\n| `expr * nan_data` | NaN → absent (masks term) | Multiplying by nothing = no term |\n| `expr / nan_data` | NaN → absent (masks term) | Dividing by nothing = no term |\n| `expr <= nan_rhs` | Constraint skipped | No RHS = no constraint |\n\nThis applies to both user-supplied NaN and structural NaN (from `.shift()`, `.where()`, etc.)." }, { "cell_type": "code", - "execution_count": null, "id": "v1-rule-demo", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.949756Z", - "start_time": "2026-03-11T14:52:16.942400Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.253991Z", "iopub.status.busy": "2026-03-12T07:17:14.253892Z", "iopub.status.idle": "2026-03-12T07:17:14.260195Z", "shell.execute_reply": "2026-03-12T07:17:14.259998Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.404760Z", + "start_time": "2026-03-18T14:24:48.393926Z" } }, + "source": "# NaN in arithmetic — no errors, automatic handling\nadd_result = x + data\nmul_result = x * data\ncon_result = x >= data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint(\"con: NaN position RHS =\", con_result.rhs.sel(time=1).item()) # NaN (skipped)", "outputs": [], - "source": [ - "# All of these raise ValueError:\n", - "for op_name, op_fn in [\n", - " (\"add\", lambda: x + data),\n", - " (\"mul\", lambda: x * data),\n", - " (\"constraint\", lambda: x >= data),\n", - "]:\n", - " try:\n", - " op_fn()\n", - " except ValueError:\n", - " print(f\"{op_name}: ValueError raised (NaN in operand)\")" - ] + "execution_count": null }, { "cell_type": "markdown", "id": "fillna-header", "metadata": {}, - "source": [ - "---\n", - "\n", - "## Handling NaN with `.fillna()`\n", - "\n", - "When your data contains NaN, fill it explicitly before combining with expressions. The fill value depends on what the NaN means in your context:\n", - "\n", - "| Operation | Fill value | Meaning |\n", - "|-----------|-----------|--------|\n", - "| `expr + data.fillna(0)` | 0 | NaN = \"no offset\" |\n", - "| `expr * data.fillna(0)` | 0 | NaN = \"exclude this term\" |\n", - "| `expr * data.fillna(1)` | 1 | NaN = \"no scaling\" |\n", - "| `expr / data.fillna(1)` | 1 | NaN = \"no scaling\" |\n", - "\n", - "The choice is yours — and that's the point. Under legacy, the library chose for you (0 for add/mul, 1 for div). Under v1, you make the decision explicit." - ] + "source": "---\n\n## Handling NaN with `.fillna()`\n\nNaN is handled automatically in most cases. Use `.fillna()` when the default behavior isn't what you want:\n\n| Default behavior | If you want instead | Use |\n|---|---|---|\n| `expr * nan → absent` | Zero coefficient (term exists but contributes nothing) | `expr * data.fillna(0)` |\n| `expr * nan → absent` | Keep original coefficient (no scaling) | `expr * data.fillna(1)` |\n| `expr / nan → absent` | Keep original coefficient (no scaling) | `expr / data.fillna(1)` |\n| `expr + nan → +0` | A specific fill value | `expr + data.fillna(value)` |\n\nThe key difference from legacy: `mul` and `div` now **mask** at NaN positions (making the term absent) rather than filling with 0 or 1. If you relied on the legacy fill behavior, add an explicit `.fillna()`." }, { "cell_type": "code", - "execution_count": null, "id": "fillna-demo", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.968586Z", - "start_time": "2026-03-11T14:52:16.956299Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.261186Z", "iopub.status.busy": "2026-03-12T07:17:14.261122Z", "iopub.status.idle": "2026-03-12T07:17:14.270213Z", "shell.execute_reply": "2026-03-12T07:17:14.269997Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.419394Z", + "start_time": "2026-03-18T14:24:48.407499Z" } }, - "outputs": [], "source": [ "# Fill NaN before operating — you choose the fill value\n", "print(\"add fillna(0):\", (x + data.fillna(0)).const.values)\n", "print(\"mul fillna(0):\", (x * data.fillna(0)).coeffs.squeeze().values)\n", "print(\"mul fillna(1):\", (x * data.fillna(1)).coeffs.squeeze().values)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -175,21 +138,19 @@ }, { "cell_type": "code", - "execution_count": null, "id": "masking-sel", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.983888Z", - "start_time": "2026-03-11T14:52:16.974378Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.271290Z", "iopub.status.busy": "2026-03-12T07:17:14.271219Z", "iopub.status.idle": "2026-03-12T07:17:14.279948Z", "shell.execute_reply": "2026-03-12T07:17:14.279785Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.436675Z", + "start_time": "2026-03-18T14:24:48.427136Z" } }, - "outputs": [], "source": [ "# Availability data with NaN = \"no limit at this hour\"\n", "availability = xr.DataArray(\n", @@ -199,7 +160,9 @@ "# Select only where data is valid — constraint has fewer coordinates\n", "valid = availability.notnull()\n", "m.add_constraints(x.sel(time=valid) <= availability.sel(time=valid), name=\"avail\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -215,26 +178,26 @@ }, { "cell_type": "code", - "execution_count": null, "id": "masking-mask-demo", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:16.998421Z", - "start_time": "2026-03-11T14:52:16.990226Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.280842Z", "iopub.status.busy": "2026-03-12T07:17:14.280784Z", "iopub.status.idle": "2026-03-12T07:17:14.286824Z", "shell.execute_reply": "2026-03-12T07:17:14.286655Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.449061Z", + "start_time": "2026-03-18T14:24:48.440993Z" } }, - "outputs": [], "source": [ "# Same result using mask= instead of .sel()\n", "mask = availability.notnull()\n", "m.add_constraints(x <= availability.fillna(0), name=\"avail_masked\", mask=mask)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -268,21 +231,19 @@ }, { "cell_type": "code", - "execution_count": null, "id": "coeff-demo", "metadata": { - "ExecuteTime": { - "end_time": "2026-03-11T14:52:17.017774Z", - "start_time": "2026-03-11T14:52:17.003374Z" - }, "execution": { "iopub.execute_input": "2026-03-12T07:17:14.287819Z", "iopub.status.busy": "2026-03-12T07:17:14.287760Z", "iopub.status.idle": "2026-03-12T07:17:14.300622Z", "shell.execute_reply": "2026-03-12T07:17:14.300443Z" + }, + "ExecuteTime": { + "end_time": "2026-03-18T14:24:48.471791Z", + "start_time": "2026-03-18T14:24:48.458849Z" } }, - "outputs": [], "source": [ "# Efficiency data: solar has no efficiency at night (NaN)\n", "techs = pd.Index([\"solar\", \"wind\"], name=\"tech\")\n", @@ -307,56 +268,21 @@ "expr = gen * efficiency.fillna(0)\n", "m.add_constraints(expr >= rhs, name=\"min_output\", mask=coeff_mask)\n", "print(\"mask approach — constraint mask:\", coeff_mask.values)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "legacy-header", "metadata": {}, - "source": [ - "---\n", - "\n", - "## Legacy NaN behavior (for comparison)\n", - "\n", - "Under legacy, NaN was handled implicitly:\n", - "- **In arithmetic**: silently replaced with neutral elements (0 for add/sub/mul, 1 for div)\n", - "- **In constraint RHS**: NaN meant \"no constraint here\" — auto-masked internally\n", - "- **With `auto_mask=True`**: NaN in variable bounds meant \"no variable here\"\n", - "\n", - "This was convenient but could mask data quality issues. A NaN from a data pipeline bug would silently become 0, producing a valid but wrong model.\n", - "\n", - "### Migration\n", - "\n", - "| Legacy code (silent) | v1 equivalent (explicit) |\n", - "|---|---|\n", - "| `x + data_with_nans` | `x + data_with_nans.fillna(0)` |\n", - "| `x * data_with_nans` | `x * data_with_nans.fillna(0)` |\n", - "| `x / data_with_nans` | `x / data_with_nans.fillna(1)` |\n", - "| `m.add_constraints(expr >= nan_rhs)` | `m.add_constraints(expr.sel(...) >= rhs.sel(...))` |\n", - "| `Model(auto_mask=True)` | Explicit `mask=` or `.sel()` |" - ] + "source": "---\n\n## Legacy NaN behavior (for comparison)\n\nUnder legacy, NaN was handled implicitly:\n- **In arithmetic**: silently replaced with neutral elements (0 for add/sub/mul, 1 for div)\n- **In constraint RHS**: NaN meant \"no constraint here\" — auto-masked internally\n- **With `auto_mask=True`**: NaN in variable bounds meant \"no variable here\"\n\nThis was convenient but could mask data quality issues. A NaN from a data pipeline bug would silently become 0, producing a valid but wrong model. The v1 convention makes NaN handling more transparent: NaN masks in mul/div (removing the term entirely) and contributes 0 in add/sub.\n\n### Migration\n\n| Legacy code | v1 behavior | Action needed? |\n|---|---|---|\n| `x + data_with_nans` | NaN → 0 (same effect) | None |\n| `x * data_with_nans` | NaN → **absent** (legacy filled with 0) | If you wanted zero terms, use `.fillna(0)` |\n| `x / data_with_nans` | NaN → **absent** (legacy filled with 1) | If you wanted identity, use `.fillna(1)` |\n| `m.add_constraints(expr >= nan_rhs)` | NaN → constraint skipped (same effect) | None |\n| `Model(auto_mask=True)` | Explicit `mask=` or `.sel()` | Same as before |" }, { "cell_type": "markdown", "id": "summary", "metadata": {}, - "source": [ - "---\n", - "\n", - "## Summary\n", - "\n", - "| Aspect | v1 | Legacy |\n", - "|---|---|---|\n", - "| **NaN means** | Absent term (not absent coordinate) | Numeric placeholder (filled silently) |\n", - "| **NaN sources** | `mask=`, structural ops only | Anywhere (user data, bounds, RHS) |\n", - "| **NaN in operands** | `ValueError` | Filled with neutral element (0 or 1) |\n", - "| **NaN in constraint RHS** | `ValueError` | Auto-masked |\n", - "| **Combining expressions** | Absent terms ignored, valid terms kept | NaN filled before combining |\n", - "| **Coordinate absent when** | All terms absent AND const is NaN | Never (NaN always filled) |\n", - "| **Masking** | Explicit: `.sel()` or `mask=` | Implicit via NaN / `auto_mask` |\n", - "| **Storage** | Float fields + `-1` sentinels | Same, but NaN has dual role |\n", - "| **Fill value choice** | User decides | Library decides |" - ] + "source": "---\n\n## Summary\n\n| Aspect | v1 | Legacy |\n|---|---|---|\n| **NaN means** | Absent term (not absent coordinate) | Numeric placeholder (filled silently) |\n| **NaN sources** | `mask=`, structural ops, user data | Same |\n| **NaN in add/sub** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | Constraint skipped | Same |\n| **Combining expressions** | Absent terms ignored, valid terms kept | NaN filled before combining |\n| **Coordinate absent when** | All terms absent AND const is NaN | Never (NaN always filled) |\n| **Masking** | Automatic via NaN in mul/div; explicit via `.sel()` or `mask=` | Implicit via NaN / `auto_mask` |\n| **Storage** | Float fields + `-1` sentinels | Same, but NaN has dual role |\n| **`.fillna()` needed?** | Only when you want non-default fill (e.g., `fillna(1)` for div) | Never (done automatically) |" } ], "metadata": { diff --git a/examples/mixed-coordinate-arithmetic.ipynb b/examples/mixed-coordinate-arithmetic.ipynb new file mode 100644 index 00000000..1a201e97 --- /dev/null +++ b/examples/mixed-coordinate-arithmetic.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2d7hs5iptwn", + "metadata": {}, + "source": "# Mixed-Coordinate Arithmetic\n\nA common pattern in energy modeling: variables cover **different subsets** of a shared dimension, but cost parameters span the full set. This notebook shows how to combine them cleanly under the v1 arithmetic convention.\n\n**Scenario:** Three capacity variables (`cap_a`, `cap_b`, `cap_c`) cover different technology subsets. Cost coefficients are defined over all technologies. We want a single cost expression over the union of technologies." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "vnmxvu41lk", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.667374Z", + "start_time": "2026-03-18T14:21:27.664771Z" + } + }, + "outputs": [], + "source": [ + "import xarray as xr\n", + "\n", + "import linopy\n", + "\n", + "linopy.options[\"arithmetic_convention\"] = \"v1\"" + ] + }, + { + "cell_type": "markdown", + "id": "2bvmgbym644", + "metadata": {}, + "source": "## Setup\n\nThree technology groups with overlapping cost data:" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fe7y8gn5a2", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.742431Z", + "start_time": "2026-03-18T14:21:27.719644Z" + } + }, + "outputs": [], + "source": [ + "m = linopy.Model()\n", + "\n", + "tech_a = [\"wind\", \"solar\"]\n", + "tech_b = [\"gas\"]\n", + "tech_all = [\"wind\", \"solar\", \"gas\"]\n", + "\n", + "cap_a = m.add_variables(lower=0, coords=[tech_a], dims=[\"tech\"], name=\"cap_a\")\n", + "cap_b = m.add_variables(lower=0, coords=[tech_b], dims=[\"tech\"], name=\"cap_b\")\n", + "cap_c = m.add_variables(lower=0, coords=[tech_all], dims=[\"tech\"], name=\"cap_c\")\n", + "\n", + "# Cost parameters span all technologies — NaN where a variable doesn't apply\n", + "cost_a = xr.DataArray([7, 9, float(\"nan\")], coords=[(\"tech\", tech_all)])\n", + "cost_b = xr.DataArray([float(\"nan\"), float(\"nan\"), 11], coords=[(\"tech\", tech_all)])\n", + "cost_c = xr.DataArray([13, 17, 19], coords=[(\"tech\", tech_all)])" + ] + }, + { + "cell_type": "markdown", + "id": "8j05vog5umk", + "metadata": {}, + "source": "## Approach 1: `fillna(0)` + explicit joins\n\nThe most explicit approach. Since `cost_a` has NaN at \"gas\" (where `cap_a` doesn't exist), fill NaN with 0 before multiplying. Use `join=\"left\"` so the product keeps only the variable's coordinates, then `join=\"outer\"` when adding to build the union.\n\n`fillna(0)` on a cost means \"this technology has no cost contribution from this variable\" — a safe, intentional choice." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "biw39h6a1e", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.788816Z", + "start_time": "2026-03-18T14:21:27.756276Z" + } + }, + "outputs": [], + "source": [ + "combined = (\n", + " cap_a.mul(cost_a.fillna(0), join=\"left\")\n", + " .add(cap_b.mul(cost_b.fillna(0), join=\"left\"), join=\"outer\")\n", + " .add(cap_c.mul(cost_c, join=\"left\"), join=\"outer\")\n", + ")\n", + "combined" + ] + }, + { + "cell_type": "markdown", + "id": "3z14qxy8l2g", + "metadata": {}, + "source": "Expected result:\n```\n[gas]: +11 cap_b[gas] + 19 cap_c[gas]\n[solar]: +9 cap_a[solar] + 17 cap_c[solar]\n[wind]: +7 cap_a[wind] + 13 cap_c[wind]\n```" + }, + { + "cell_type": "markdown", + "id": "u3bhml209b9", + "metadata": {}, + "source": "## Approach 2: `dropna()` on costs first\n\nInstead of filling NaN with 0, drop the irrelevant entries from the cost arrays. Then multiply with `join=\"left\"` (the variable's coords are always a subset of the cost's coords after dropping), and combine with `join=\"outer\"`." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "hb8n0uzb1u", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.818344Z", + "start_time": "2026-03-18T14:21:27.798495Z" + } + }, + "outputs": [], + "source": [ + "combined_v2 = (\n", + " cap_a.mul(cost_a.dropna(\"tech\"), join=\"left\")\n", + " .add(cap_b.mul(cost_b.dropna(\"tech\"), join=\"left\"), join=\"outer\")\n", + " .add(cap_c.mul(cost_c, join=\"left\"), join=\"outer\")\n", + ")\n", + "combined_v2" + ] + }, + { + "cell_type": "markdown", + "id": "jw46qdqpzhg", + "metadata": {}, + "source": "## Approach 3: Scope costs to each variable upfront\n\nThe cleanest option when you control the data: define costs only over the relevant technologies from the start, eliminating NaN entirely." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "311s75nab7q", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.862196Z", + "start_time": "2026-03-18T14:21:27.829301Z" + } + }, + "outputs": [], + "source": [ + "# Costs scoped to each variable's technologies — no NaN needed\n", + "cost_a_scoped = xr.DataArray([7, 9], coords=[(\"tech\", tech_a)])\n", + "cost_b_scoped = xr.DataArray([11], coords=[(\"tech\", tech_b)])\n", + "cost_c_scoped = xr.DataArray([13, 17, 19], coords=[(\"tech\", tech_all)])\n", + "\n", + "combined_v3 = (\n", + " (cap_a * cost_a_scoped)\n", + " .add(cap_b * cost_b_scoped, join=\"outer\")\n", + " .add(cap_c * cost_c_scoped, join=\"outer\")\n", + ")\n", + "combined_v3" + ] + }, + { + "cell_type": "markdown", + "id": "7su6bb0lk2o", + "metadata": {}, + "source": "## Approach 4: Pre-align with `linopy.align()`\n\nUse `linopy.align()` to reindex all variables and cost arrays to the same coordinates upfront. After alignment, all operands share the same `tech` dimension, so arithmetic uses exact matching — no per-operation `join=` needed.\n\nVariables get absent slots at coordinates they don't cover; cost arrays get NaN. Since NaN in a multiplicative constant acts as a mask, the NaN entries naturally produce absent terms — no `fillna` needed." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "azddqkp858", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.902375Z", + "start_time": "2026-03-18T14:21:27.872779Z" + } + }, + "outputs": [], + "source": [ + "# Align all variables and costs to the union of tech coordinates\n", + "cap_a_al, cap_b_al, cap_c_al, cost_a_al, cost_b_al, cost_c_al = linopy.align(\n", + " cap_a, cap_b, cap_c, cost_a, cost_b, cost_c, join=\"outer\"\n", + ")\n", + "\n", + "# NaN in costs naturally masks — no fillna needed!\n", + "combined_v4 = cap_a_al * cost_a_al + cap_b_al * cost_b_al + cap_c_al * cost_c_al\n", + "combined_v4" + ] + }, + { + "cell_type": "markdown", + "id": "tnpb928aup", + "metadata": {}, + "source": "---\n\n## Adding a partial scaling factor\n\nNow extend the example: a `rate` parameter applies only to gas technologies. We want `cap_c * cost_c * rate`, where `rate` defaults to 1 for technologies it doesn't cover.\n\nExpected result — same as before, but the gas entry for `cap_c` is scaled by 1.04:\n```\n[gas]: +11 cap_b[gas] + 19.76 cap_c[gas]\n[solar]: +9 cap_a[solar] + 17 cap_c[solar]\n[wind]: +7 cap_a[wind] + 13 cap_c[wind]\n```" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "p92dqoyi8d", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.914837Z", + "start_time": "2026-03-18T14:21:27.911829Z" + } + }, + "outputs": [], + "source": [ + "rate = xr.DataArray([1.04], coords=[(\"tech\", [\"gas\"])])" + ] + }, + { + "cell_type": "markdown", + "id": "qah2n8cbiic", + "metadata": {}, + "source": "### Option A: `fill_value=1` on `.mul()`\n\nThe `fill_value` parameter tells linopy what to use for technologies not covered by `rate`. Since `rate` is a scaling factor, `1` is the natural identity — \"no scaling\"." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8pw3s5xra62", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:27.997888Z", + "start_time": "2026-03-18T14:21:27.976186Z" + } + }, + "outputs": [], + "source": [ + "combined_rate_a = (\n", + " cap_a.mul(cost_a.fillna(0), join=\"left\")\n", + " .add(cap_b.mul(cost_b.fillna(0), join=\"left\"), join=\"outer\")\n", + " .add(\n", + " cap_c.mul(cost_c, join=\"left\").mul(rate, join=\"left\", fill_value=1),\n", + " join=\"outer\",\n", + " )\n", + ")\n", + "combined_rate_a" + ] + }, + { + "cell_type": "markdown", + "id": "5sob69uofr5", + "metadata": {}, + "source": "### Option B: Prepare the parameter with xarray first\n\nPre-multiply the cost and rate arrays using standard xarray operations before passing to linopy. This keeps the linopy arithmetic simple." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "rtyit39tuj", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:28.079495Z", + "start_time": "2026-03-18T14:21:28.007888Z" + } + }, + "outputs": [], + "source": [ + "# Extend rate to all techs (fill with 1 = no scaling), then multiply with cost\n", + "cost_c_rated = cost_c * rate.reindex(tech=tech_all).fillna(1)\n", + "print(\"cost_c_rated:\", cost_c_rated.values) # [13, 17, 19.76]\n", + "\n", + "combined_rate_b = (\n", + " cap_a.mul(cost_a.fillna(0), join=\"left\")\n", + " .add(cap_b.mul(cost_b.fillna(0), join=\"left\"), join=\"outer\")\n", + " .add(cap_c * cost_c_rated, join=\"outer\")\n", + ")\n", + "combined_rate_b" + ] + }, + { + "cell_type": "markdown", + "id": "e7k602xaqwc", + "metadata": {}, + "source": "---\n\n## NaN as mask\n\nUnder the v1 convention, NaN in a multiplicative constant **masks** the corresponding term — the position becomes absent. This means you can use NaN-containing cost arrays directly with `join=\"left\"` and the NaN entries will naturally drop out:" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "tymb0e9grj", + "metadata": { + "ExecuteTime": { + "end_time": "2026-03-18T14:21:28.087846Z", + "start_time": "2026-03-18T13:41:33.345728Z" + } + }, + "outputs": [], + "source": [ + "# NaN in cost_a at \"gas\" naturally masks cap_a at \"gas\" — no fillna needed!\n", + "combined_nan_mask = (\n", + " cap_a.mul(cost_a, join=\"left\")\n", + " .add(cap_b.mul(cost_b, join=\"left\"), join=\"outer\")\n", + " .add(cap_c * cost_c, join=\"outer\")\n", + ")\n", + "combined_nan_mask" + ] + }, + { + "cell_type": "markdown", + "id": "5nw136646y2", + "metadata": {}, + "source": "---\n\n## Summary of patterns\n\n| Situation | Solution |\n|---|---|\n| Cost array has NaN for irrelevant techs | NaN in mul/div **masks** the term (makes it absent) — no cleanup needed |\n| NaN in additive constant | NaN treated as 0 (additive identity) — no cleanup needed |\n| Variables have different coord subsets | Use `.add(..., join=\"outer\")` to build the union |\n| Pre-align all operands | `linopy.align(*vars, *costs, join=\"outer\")`, then use `+`/`*` directly |\n| Multiplication with matching coords | `var * cost` (exact match, no join needed) |\n| Multiplication with superset cost | `var.mul(cost, join=\"left\")` to keep var's coords |\n| Partial scaling factor (e.g., rate for some techs) | `expr.mul(rate, join=\"left\", fill_value=1)` |\n| Partial scaling factor (alternative) | Pre-compute `cost * rate.reindex_like(cost).fillna(1)` in xarray |\n\n**Key principle:** NaN in multiplicative constants means \"no term here\" (absent). NaN in additive constants means \"no contribution\" (zero). For scaling factors where missing means \"identity,\" use `fill_value=1` explicitly." + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/linopy/expressions.py b/linopy/expressions.py index d4c9a458..9718f325 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -635,10 +635,8 @@ def _add_constant( ) or join == "legacy" if np.isscalar(other) and join is None: if not is_legacy and np.isnan(other): - raise ValueError( - "Constant contains NaN values. Use .fillna() to handle " - "missing values before arithmetic operations." - ) + # NaN additive constant → 0 (additive identity) + other = 0 const = self.const.fillna(0) + other return self.assign(const=const) da = as_dataarray(other, coords=self.coords, dims=self.coord_dims) @@ -650,11 +648,11 @@ def _add_constant( self_const = self_const.fillna(0) if is_legacy: da = da.fillna(0) - elif da.isnull().any(): - raise ValueError( - "Constant contains NaN values. Use .fillna() to handle " - "missing values before arithmetic operations." - ) + else: + # NaN in additive constant → 0 (additive identity). + # This treats user-supplied NaN the same as absent-slot NaN: + # adding NaN contributes nothing. + da = da.fillna(0) if needs_data_reindex: fv = {**self._fill_value, "const": 0} return self.__class__( @@ -678,10 +676,8 @@ def _apply_constant_op( # Fast path for scalars: no dimensions to align if np.isscalar(other): if not is_legacy and np.isnan(other): - raise ValueError( - "Factor contains NaN values. Use .fillna() to handle " - "missing values before arithmetic operations." - ) + # NaN scalar → entire expression becomes absent + return self.where(False) coeffs = self.coeffs.fillna(0) if is_legacy else self.coeffs const = self.const.fillna(0) if is_legacy else self.const scalar = DataArray(other) @@ -693,25 +689,35 @@ def _apply_constant_op( if is_legacy: factor = factor.fillna(fill_value) self_const = self_const.fillna(0) - elif factor.isnull().any(): - raise ValueError( - "Factor contains NaN values. Use .fillna() to handle " - "missing values before arithmetic operations." - ) + # In v1, NaN in factor acts as a mask: positions where factor is NaN + # become fully absent slots (vars=-1, coeffs=NaN, const=NaN). + nan_mask = factor.isnull() if not is_legacy else None if needs_data_reindex: fv = {**self._fill_value, "const": 0} data = self.data.reindex_like(self_const, fill_value=fv) coeffs = data.coeffs.fillna(0) if is_legacy else data.coeffs + new_coeffs = op(coeffs, factor) + new_const = op(self_const, factor) + new_vars = data.vars + if nan_mask is not None and nan_mask.any(): + new_vars = new_vars.where(~nan_mask, -1) + new_const = new_const.where(~nan_mask, np.nan) return self.__class__( assign_multiindex_safe( - data, - coeffs=op(coeffs, factor), - const=op(self_const, factor), + data, coeffs=new_coeffs, const=new_const, vars=new_vars ), self.model, ) coeffs = self.coeffs.fillna(0) if is_legacy else self.coeffs - return self.assign(coeffs=op(coeffs, factor), const=op(self_const, factor)) + new_coeffs = op(coeffs, factor) + new_const = op(self_const, factor) + result = self.assign(coeffs=new_coeffs, const=new_const) + if nan_mask is not None and nan_mask.any(): + new_vars = self.vars.where(~nan_mask, -1) + result = result.assign( + vars=new_vars, const=new_const.where(~nan_mask, np.nan) + ) + return result def _multiply_by_constant( self: GenericExpression, @@ -1283,14 +1289,8 @@ def to_constraint( rhs = as_dataarray(rhs, coords=self.coords, dims=self.coord_dims) if isinstance(rhs, DataArray): - is_legacy = ( - join is None and options["arithmetic_convention"] == "legacy" - ) or join == "legacy" - if not is_legacy and rhs.isnull().any(): - raise ValueError( - "Constraint RHS contains NaN values. Use .fillna() and " - "mask= to handle missing values explicitly." - ) + # NaN in RHS → constraint is skipped at those positions + # (NaN propagates into the sign field, which linopy treats as absent) if effective_join == "override": aligned_rhs = rhs.assign_coords(coords=self.const.coords) expr_const = self.const diff --git a/test/test_algebraic_properties.py b/test/test_algebraic_properties.py index fa130a2d..780ae2a0 100644 --- a/test/test_algebraic_properties.py +++ b/test/test_algebraic_properties.py @@ -505,12 +505,14 @@ def test_add_without_fill_value_still_revives(self, x: Variable) -> None: assert result.const.values[0] == 5 @pytest.mark.v1_only - def test_mul_misaligned_da_raises_without_fill_value(self, x: Variable) -> None: - """In v1, mul with misaligned DataArray raises without explicit fill_value.""" + def test_mul_misaligned_da_masks_without_fill_value(self, x: Variable) -> None: + """In v1, mul with misaligned DataArray masks terms where factor is NaN.""" expr = 1 * x da = xr.DataArray([2.0], dims="time", coords={"time": [1]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - expr.mul(da, join="left") + result = expr.mul(da, join="left") + # time=0 has NaN factor → absent; time=1 has factor 2 → coeff 2 + assert result.isnull().sel(time=0).item() + assert result.coeffs.squeeze().sel(time=1).item() == pytest.approx(2.0) @pytest.mark.v1_only def test_mul_misaligned_da_with_fill_value(self, x: Variable) -> None: @@ -521,12 +523,14 @@ def test_mul_misaligned_da_with_fill_value(self, x: Variable) -> None: assert not np.isnan(result.coeffs.values).all() @pytest.mark.v1_only - def test_div_misaligned_da_raises_without_fill_value(self, x: Variable) -> None: - """In v1, div with misaligned DataArray raises without explicit fill_value.""" + def test_div_misaligned_da_masks_without_fill_value(self, x: Variable) -> None: + """In v1, div with misaligned DataArray masks terms where divisor is NaN.""" expr = 1 * x da = xr.DataArray([2.0], dims="time", coords={"time": [1]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - expr.div(da, join="left") + result = expr.div(da, join="left") + # time=0 has NaN divisor → absent; time=1 has divisor 2 → coeff 0.5 + assert result.isnull().sel(time=0).item() + assert result.coeffs.squeeze().sel(time=1).item() == pytest.approx(0.5) @pytest.mark.v1_only def test_div_misaligned_da_with_fill_value(self, x: Variable) -> None: diff --git a/test/test_convention.py b/test/test_convention.py index e1099e81..9bf6eb29 100644 --- a/test/test_convention.py +++ b/test/test_convention.py @@ -218,13 +218,18 @@ def test_inf_mul_propagates(self, a: Variable) -> None: result = (1 * a) * const assert np.isinf(result.coeffs.squeeze().values[1]) - def test_nan_mul_raises_v1(self, a: Variable) -> None: - """Under v1, NaN in mul should raise ValueError.""" + def test_nan_mul_masks_v1(self, a: Variable) -> None: + """Under v1, NaN in mul masks the affected positions.""" const = xr.DataArray( [1.0, np.nan, 3.0, 4.0, 5.0], dims=["i"], coords={"i": range(5)} ) - with pytest.raises(ValueError, match="NaN"): - (1 * a) * const + result = (1 * a) * const + # i=1 has NaN factor → absent slot + assert result.isnull().sel(i=1).item() + # Other positions are valid + assert not result.isnull().sel(i=0).item() + assert result.coeffs.squeeze().sel(i=0).item() == 1.0 + assert result.coeffs.squeeze().sel(i=2).item() == 3.0 # --------------------------------------------------------------------------- diff --git a/test/test_legacy_violations.py b/test/test_legacy_violations.py index 74c20e88..7edb3f04 100644 --- a/test/test_legacy_violations.py +++ b/test/test_legacy_violations.py @@ -290,20 +290,27 @@ def test_div_nan_silently_becomes_one( assert result.coeffs.squeeze().sel(time=1).item() == 1.0 @pytest.mark.v1_only - def test_add_nan_raises(self, x: Variable, nan_data: xr.DataArray) -> None: - """v1: NaN in user data raises ValueError.""" - with pytest.raises(ValueError, match="NaN"): - x + nan_data + def test_add_nan_fills_zero(self, x: Variable, nan_data: xr.DataArray) -> None: + """v1: NaN in addend treated as 0 (additive identity).""" + result = x + nan_data + # NaN position gets 0 added → const unchanged from default + assert result.const.sel(time=1).item() == 0.0 + # Non-NaN positions get correct const + assert result.const.sel(time=0).item() == 1.0 @pytest.mark.v1_only - def test_mul_nan_raises(self, x: Variable, nan_data: xr.DataArray) -> None: - with pytest.raises(ValueError, match="NaN"): - x * nan_data + def test_mul_nan_masks(self, x: Variable, nan_data: xr.DataArray) -> None: + """v1: NaN in multiplier masks the affected position.""" + result = x * nan_data + assert result.isnull().sel(time=1).item() + assert not result.isnull().sel(time=0).item() @pytest.mark.v1_only - def test_div_nan_raises(self, x: Variable, nan_data: xr.DataArray) -> None: - with pytest.raises(ValueError, match="NaN"): - x / nan_data + def test_div_nan_masks(self, x: Variable, nan_data: xr.DataArray) -> None: + """v1: NaN in divisor masks the affected position.""" + result = x / nan_data + assert result.isnull().sel(time=1).item() + assert not result.isnull().sel(time=0).item() @pytest.mark.legacy_only def test_nan_fill_inconsistency(self, x: Variable, nan_data: xr.DataArray) -> None: diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index b95c056f..8b6f5b0f 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -766,13 +766,14 @@ def test_subset_sub_var_raises(self, v: Variable, subset: xr.DataArray) -> None: @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_mul_subset_join_left_raises( + def test_mul_subset_join_left_masks( self, v: Variable, subset: xr.DataArray, operand: str ) -> None: - """In v1, join='left' with subset raises without explicit fill_value.""" + """In v1, join='left' with subset masks where factor is NaN.""" target = v if operand == "var" else 1 * v - with pytest.raises(ValueError, match="Factor contains NaN"): - target.mul(subset, join="left") + result = target.mul(subset, join="left") + # Positions not covered by subset are masked (absent) + assert result.isnull().any() @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) @@ -1126,7 +1127,8 @@ def test_add_nan_filled( @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_add_nan_raises(self, v: Variable, operand: str) -> None: + def test_add_nan_fills_zero(self, v: Variable, operand: str) -> None: + """v1: NaN in addend treated as 0 (additive identity).""" vals = np.arange(20, dtype=float) vals[0] = np.nan vals[5] = np.nan @@ -1134,9 +1136,12 @@ def test_add_nan_raises(self, v: Variable, operand: str) -> None: nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) + base_const = 0.0 if operand == "var" else 5.0 target = v if operand == "var" else v + 5 - with pytest.raises(ValueError, match="NaN"): - target + nan_constant + result = target + nan_constant + # NaN positions → 0 added → const unchanged from base + for i in [0, 5, 19]: + assert result.const.values[i] == base_const @pytest.mark.legacy_only @pytest.mark.parametrize("operand", ["var", "expr"]) @@ -1157,16 +1162,20 @@ def test_sub_nan_filled( @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_sub_nan_raises(self, v: Variable, operand: str) -> None: + def test_sub_nan_fills_zero(self, v: Variable, operand: str) -> None: + """v1: NaN in subtrahend treated as 0 (additive identity).""" vals = np.arange(20, dtype=float) for i in self.NAN_POSITIONS: vals[i] = np.nan nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) + base_const = 0.0 if operand == "var" else 5.0 target = v if operand == "var" else v + 5 - with pytest.raises(ValueError, match="NaN"): - target - nan_constant + result = target - nan_constant + # NaN positions → 0 subtracted → const unchanged + for i in self.NAN_POSITIONS: + assert result.const.values[i] == base_const @pytest.mark.legacy_only @pytest.mark.parametrize("operand", ["var", "expr"]) @@ -1186,15 +1195,17 @@ def test_mul_nan_filled( @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_mul_nan_raises(self, v: Variable, operand: str) -> None: + def test_mul_nan_masks(self, v: Variable, operand: str) -> None: + """v1: NaN in multiplier masks the affected positions.""" vals = np.arange(20, dtype=float) vals[0] = np.nan nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) target = v if operand == "var" else 1 * v - with pytest.raises(ValueError, match="NaN"): - target * nan_constant + result = target * nan_constant + assert result.isnull().values[0] # NaN position → absent + assert not result.isnull().values[1] # non-NaN → valid @pytest.mark.legacy_only @pytest.mark.parametrize("operand", ["var", "expr"]) @@ -1215,7 +1226,8 @@ def test_div_nan_filled( @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_div_nan_raises(self, v: Variable, operand: str) -> None: + def test_div_nan_masks(self, v: Variable, operand: str) -> None: + """v1: NaN in divisor masks the affected positions.""" vals = np.arange(20, dtype=float) + 1 vals[0] = np.nan vals[5] = np.nan @@ -1223,8 +1235,10 @@ def test_div_nan_raises(self, v: Variable, operand: str) -> None: vals, dims=["dim_2"], coords={"dim_2": range(20)} ) target = v if operand == "var" else 1 * v - with pytest.raises(ValueError, match="NaN"): - target / nan_constant + result = target / nan_constant + assert result.isnull().values[0] # NaN position → absent + assert result.isnull().values[5] # NaN position → absent + assert not result.isnull().values[1] # non-NaN → valid @pytest.mark.legacy_only def test_add_commutativity( @@ -1242,16 +1256,19 @@ def test_add_commutativity( ) @pytest.mark.v1_only - def test_add_commutativity_nan_raises(self, v: Variable) -> None: + def test_add_commutativity_nan(self, v: Variable) -> None: + """v1: NaN in addend → 0; commutative.""" vals = np.arange(20, dtype=float) vals[0] = np.nan nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) - with pytest.raises(ValueError, match="NaN"): - v + nan_constant - with pytest.raises(ValueError, match="NaN"): - nan_constant + v + result_a = v + nan_constant + result_b = nan_constant + v + np.testing.assert_array_equal(result_a.const.values, result_b.const.values) + np.testing.assert_array_equal( + result_a.coeffs.values, result_b.coeffs.values + ) @pytest.mark.legacy_only def test_mul_commutativity( @@ -1268,16 +1285,22 @@ def test_mul_commutativity( ) @pytest.mark.v1_only - def test_mul_commutativity_nan_raises(self, v: Variable) -> None: + def test_mul_commutativity_nan(self, v: Variable) -> None: + """v1: NaN in multiplier masks; commutative.""" vals = np.arange(20, dtype=float) vals[0] = np.nan nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) - with pytest.raises(ValueError, match="NaN"): - v * nan_constant - with pytest.raises(ValueError, match="NaN"): - nan_constant * v + result_a = v * nan_constant + result_b = nan_constant * v + # Both should mask at position 0 + assert result_a.isnull().values[0] + assert result_b.isnull().values[0] + # Non-NaN positions should match + np.testing.assert_array_equal( + result_a.coeffs.values[:, 1:], result_b.coeffs.values[:, 1:] + ) @pytest.mark.legacy_only def test_quadexpr_add_nan( @@ -1292,15 +1315,18 @@ def test_quadexpr_add_nan( assert not np.isnan(result.const.values).any() @pytest.mark.v1_only - def test_quadexpr_add_nan_raises(self, v: Variable) -> None: + def test_quadexpr_add_nan_fills_zero(self, v: Variable) -> None: + """v1: NaN in addend to quadratic expr treated as 0.""" vals = np.arange(20, dtype=float) vals[0] = np.nan nan_constant = xr.DataArray( vals, dims=["dim_2"], coords={"dim_2": range(20)} ) qexpr = v * v - with pytest.raises(ValueError, match="NaN"): - qexpr + nan_constant + result = qexpr + nan_constant + assert isinstance(result, QuadraticExpression) + # NaN position → 0 added → const unchanged + assert result.const.values[0] == 0.0 class TestExpressionWithNaN: """ @@ -2448,11 +2474,15 @@ def test_mul_constant_join_outer(self, a: Variable) -> None: assert result.coeffs.sel(i=2).item() == 3 @pytest.mark.v1_only - def test_mul_constant_join_outer_raises(self, a: Variable) -> None: - """In v1, outer join with misaligned factor raises without fill_value.""" + def test_mul_constant_join_outer_masks(self, a: Variable) -> None: + """In v1, outer join with misaligned factor masks where factor is NaN.""" const = xr.DataArray([2, 3, 4], dims=["i"], coords={"i": [1, 2, 3]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - a.to_linexpr().mul(const, join="outer") + result = a.to_linexpr().mul(const, join="outer") + assert list(result.data.indexes["i"]) == [0, 1, 2, 3] + # i=0 not in const → NaN factor → absent; i=3 not in a → absent + assert result.isnull().sel(i=0).item() + assert result.coeffs.sel(i=1).item() == 2 + assert result.coeffs.sel(i=2).item() == 3 @pytest.mark.v1_only def test_mul_constant_join_outer_with_fill_value(self, a: Variable) -> None: @@ -2489,11 +2519,14 @@ def test_div_constant_join_outer(self, a: Variable) -> None: assert list(result.data.indexes["i"]) == [0, 1, 2, 3] @pytest.mark.v1_only - def test_div_constant_join_outer_raises(self, a: Variable) -> None: - """In v1, outer join with misaligned divisor raises without fill_value.""" + def test_div_constant_join_outer_masks(self, a: Variable) -> None: + """In v1, outer join with misaligned divisor masks where divisor is NaN.""" const = xr.DataArray([2, 3, 4], dims=["i"], coords={"i": [1, 2, 3]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - a.to_linexpr().div(const, join="outer") + result = a.to_linexpr().div(const, join="outer") + assert list(result.data.indexes["i"]) == [0, 1, 2, 3] + # i=0 not in const → NaN divisor → absent + assert result.isnull().sel(i=0).item() + assert result.coeffs.sel(i=1).item() == pytest.approx(0.5) @pytest.mark.v1_only def test_div_constant_join_outer_with_fill_value(self, a: Variable) -> None: @@ -2651,11 +2684,17 @@ def test_mul_constant_outer_fill_values(self, a: Variable) -> None: assert result.coeffs.squeeze().sel(i=0).item() == 0 @pytest.mark.v1_only - def test_mul_constant_outer_raises_v1(self, a: Variable) -> None: + def test_mul_constant_outer_masks_v1(self, a: Variable) -> None: expr = 1 * a + 5 other = xr.DataArray([2, 3], dims=["i"], coords={"i": [1, 3]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - expr.mul(other, join="outer") + result = expr.mul(other, join="outer") + assert set(result.coords["i"].values) == {0, 1, 2, 3} + # i=1 has factor 2 → coeff 2, const 10 + assert result.coeffs.squeeze().sel(i=1).item() == 2 + assert result.const.sel(i=1).item() == 10 + # i=0, i=2 have NaN factor → absent + assert result.isnull().sel(i=0).item() + assert result.isnull().sel(i=2).item() @pytest.mark.v1_only def test_mul_constant_outer_with_fill_value_v1(self, a: Variable) -> None: @@ -2689,11 +2728,17 @@ def test_div_constant_outer_fill_values(self, a: Variable) -> None: assert result.coeffs.squeeze().sel(i=0).item() == pytest.approx(1.0) @pytest.mark.v1_only - def test_div_constant_outer_raises_v1(self, a: Variable) -> None: + def test_div_constant_outer_masks_v1(self, a: Variable) -> None: expr = 1 * a + 10 other = xr.DataArray([2.0, 5.0], dims=["i"], coords={"i": [1, 3]}) - with pytest.raises(ValueError, match="Factor contains NaN"): - expr.div(other, join="outer") + result = expr.div(other, join="outer") + assert set(result.coords["i"].values) == {0, 1, 2, 3} + # i=1 has divisor 2 → coeff 0.5, const 5 + assert result.coeffs.squeeze().sel(i=1).item() == pytest.approx(0.5) + assert result.const.sel(i=1).item() == pytest.approx(5.0) + # i=0, i=2 have NaN divisor → absent + assert result.isnull().sel(i=0).item() + assert result.isnull().sel(i=2).item() @pytest.mark.v1_only def test_div_constant_outer_with_fill_value_v1(self, a: Variable) -> None: From 6c3323215d820374faa80a136215d88fa301124c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:51:30 +0100 Subject: [PATCH 2/6] Fix Variable * NaN inconsistency, restore constraint RHS NaN raise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Variable.to_linexpr() now treats NaN coefficients as absent in v1, consistent with Expression * NaN (was silently filling with 0) - Restore ValueError for NaN in constraint RHS — silently skipping constraints is too dangerous at this API boundary - Update notebooks to reflect both changes Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/arithmetic-convention.ipynb | 374 +++++++++++++-------------- examples/missing-data.ipynb | 71 +++-- linopy/expressions.py | 11 +- linopy/variables.py | 17 +- test/test_linear_expression.py | 14 +- 5 files changed, 274 insertions(+), 213 deletions(-) diff --git a/examples/arithmetic-convention.ipynb b/examples/arithmetic-convention.ipynb index 55a6773c..103d5d2e 100644 --- a/examples/arithmetic-convention.ipynb +++ b/examples/arithmetic-convention.ipynb @@ -8,22 +8,22 @@ }, { "cell_type": "code", - "execution_count": null, "id": "imports", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.119653Z", - "start_time": "2026-03-15T10:22:26.390265Z" + "end_time": "2026-03-18T14:34:38.994093Z", + "start_time": "2026-03-18T14:34:38.298539Z" } }, - "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "\n", "import linopy" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -37,18 +37,18 @@ }, { "cell_type": "code", - "execution_count": null, "id": "opt-in", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.129764Z", - "start_time": "2026-03-15T10:22:27.128151Z" + "end_time": "2026-03-18T14:34:38.999338Z", + "start_time": "2026-03-18T14:34:38.997821Z" } }, - "outputs": [], "source": [ "linopy.options[\"arithmetic_convention\"] = \"v1\"" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -90,15 +90,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.175188Z", - "start_time": "2026-03-15T10:22:27.137443Z" + "end_time": "2026-03-18T14:34:39.045856Z", + "start_time": "2026-03-18T14:34:39.006455Z" } }, - "outputs": [], "source": [ "m = linopy.Model()\n", "\n", @@ -110,7 +108,9 @@ "y = m.add_variables(lower=0, coords=[time], name=\"y\")\n", "gen = m.add_variables(lower=0, coords=[time, techs], name=\"gen\")\n", "risk = m.add_variables(lower=0, coords=[techs, scenarios], name=\"risk\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -122,102 +122,102 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-same-coords", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.194482Z", - "start_time": "2026-03-15T10:22:27.183389Z" + "end_time": "2026-03-18T14:34:39.057693Z", + "start_time": "2026-03-18T14:34:39.048822Z" } }, - "outputs": [], "source": [ "# Same coords — just works\n", "x + y" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-matching-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.209615Z", - "start_time": "2026-03-15T10:22:27.202647Z" + "end_time": "2026-03-18T14:34:39.067745Z", + "start_time": "2026-03-18T14:34:39.060274Z" } }, - "outputs": [], "source": [ "# Constant with matching coords\n", "factor = xr.DataArray([2, 3, 4, 5, 6], dims=[\"time\"], coords={\"time\": time})\n", "x * factor" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-broadcast-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.226386Z", - "start_time": "2026-03-15T10:22:27.217314Z" + "end_time": "2026-03-18T14:34:39.082063Z", + "start_time": "2026-03-18T14:34:39.072201Z" } }, - "outputs": [], "source": [ "# Constant with fewer dims — broadcasts freely\n", "cost = xr.DataArray([1.0, 0.5, 3.0], dims=[\"tech\"], coords={\"tech\": techs})\n", "gen * cost # cost broadcasts over time" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-broadcast-expr", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.249780Z", - "start_time": "2026-03-15T10:22:27.236180Z" + "end_time": "2026-03-18T14:34:39.096271Z", + "start_time": "2026-03-18T14:34:39.084937Z" } }, - "outputs": [], "source": [ "# Expression + Expression with non-shared dims — broadcasts freely\n", "gen + risk # (time, tech) + (tech, scenario) → (time, tech, scenario)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-scalar", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.258303Z", - "start_time": "2026-03-15T10:22:27.253098Z" + "end_time": "2026-03-18T14:34:39.104795Z", + "start_time": "2026-03-18T14:34:39.099576Z" } }, - "outputs": [], "source": [ "# Scalar — always fine\n", "x + 5" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-constraint-broadcast", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.274938Z", - "start_time": "2026-03-15T10:22:27.263114Z" + "end_time": "2026-03-18T14:34:39.119915Z", + "start_time": "2026-03-18T14:34:39.108496Z" } }, - "outputs": [], "source": [ "# Constraints — RHS with fewer dims broadcasts naturally\n", "capacity = xr.DataArray([100, 80, 50], dims=[\"tech\"], coords={\"tech\": techs})\n", "m.add_constraints(gen <= capacity, name=\"cap\") # capacity broadcasts over time" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -229,15 +229,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-mismatch-expr", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.287834Z", - "start_time": "2026-03-15T10:22:27.281356Z" + "end_time": "2026-03-18T14:34:39.130986Z", + "start_time": "2026-03-18T14:34:39.123779Z" } }, - "outputs": [], "source": [ "y_short = m.add_variables(\n", " lower=0, coords=[pd.RangeIndex(3, name=\"time\")], name=\"y_short\"\n", @@ -247,19 +245,19 @@ " x + y_short # time coords don't match: [0..4] vs [0..2]\n", "except ValueError as e:\n", " print(\"ValueError:\", e)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-mismatch-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.297294Z", - "start_time": "2026-03-15T10:22:27.293052Z" + "end_time": "2026-03-18T14:34:39.137583Z", + "start_time": "2026-03-18T14:34:39.134202Z" } }, - "outputs": [], "source": [ "partial = xr.DataArray([10, 20, 30], dims=[\"time\"], coords={\"time\": [0, 1, 2]})\n", "\n", @@ -267,25 +265,27 @@ " x * partial # time coords [0..4] vs [0,1,2]\n", "except ValueError as e:\n", " print(\"ValueError:\", e)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-mismatch-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.307409Z", - "start_time": "2026-03-15T10:22:27.304043Z" + "end_time": "2026-03-18T14:34:39.148566Z", + "start_time": "2026-03-18T14:34:39.144984Z" } }, - "outputs": [], "source": [ "try:\n", " x <= partial # constraint RHS doesn't cover all coords\n", "except ValueError as e:\n", " print(\"ValueError:\", e)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -305,18 +305,18 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-sel-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.324986Z", - "start_time": "2026-03-15T10:22:27.315474Z" + "end_time": "2026-03-18T14:34:39.164561Z", + "start_time": "2026-03-18T14:34:39.154727Z" } }, - "outputs": [], "source": [ "x.sel(time=[0, 1, 2]) + y_short" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -330,33 +330,33 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-join-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.344814Z", - "start_time": "2026-03-15T10:22:27.335311Z" + "end_time": "2026-03-18T14:34:39.181064Z", + "start_time": "2026-03-18T14:34:39.171966Z" } }, - "outputs": [], "source": [ "x.add(y_short, join=\"inner\") # intersection: time [0, 1, 2]" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "v1-join-outer", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.355116Z", - "start_time": "2026-03-15T10:22:27.348065Z" + "end_time": "2026-03-18T14:34:39.193303Z", + "start_time": "2026-03-18T14:34:39.184848Z" } }, - "outputs": [], "source": [ "x.mul(partial, join=\"left\") # keep x's coords; missing factor positions become absent" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -370,21 +370,21 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-assign-coords-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.370452Z", - "start_time": "2026-03-15T10:22:27.360291Z" + "end_time": "2026-03-18T14:34:39.206073Z", + "start_time": "2026-03-18T14:34:39.196659Z" } }, - "outputs": [], "source": [ "z = m.add_variables(lower=0, coords=[pd.RangeIndex(5, 10, name=\"time\")], name=\"z\")\n", "\n", "# z has time=[5..9], x has time=[0..4] — same shape, different labels\n", "x + z.assign_coords(time=x.coords[\"time\"])" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -396,19 +396,19 @@ }, { "cell_type": "code", - "execution_count": null, "id": "v1-align-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:27:01.354052Z", - "start_time": "2026-03-15T10:27:01.339780Z" + "end_time": "2026-03-18T14:34:39.219399Z", + "start_time": "2026-03-18T14:34:39.210230Z" } }, - "outputs": [], "source": [ "x_aligned, y_short_aligned = linopy.align(x, y_short, join=\"outer\")\n", "x_aligned + y_short_aligned" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -420,7 +420,7 @@ "cell_type": "markdown", "id": "93l4fo4zlhk", "metadata": {}, - "source": "### NaN convention\n\nIn v1, **NaN means \"absent\" — it acts as a mask.**\n\nNaN can enter from `mask=` at construction, structural operations (`.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()`), or from user-supplied data.\n\n**NaN in multiplicative/divisive constants masks the term:**\n\n```python\nx * cost_with_nan # NaN positions become absent (no term)\nx / rate_with_nan # NaN positions become absent\n```\n\n**NaN in additive/subtractive constants is treated as 0 (additive identity):**\n\n```python\nx + data_with_nan # NaN positions contribute nothing\nx - data_with_nan # same — NaN subtracted = nothing subtracted\n```\n\n**NaN in constraint RHS skips the constraint** at that position — the constraint simply doesn't exist there.\n\nThis is consistent: NaN always means \"nothing here.\" For multiplication that means no term; for addition that means no contribution; for constraints that means no constraint.\n\n**When NaN means something else**, handle it explicitly:\n\n```python\nexpr.mul(rate, join=\"left\", fill_value=1) # missing entries → 1 (preserves term)\nexpr.mul(da, join=\"outer\", fill_value=0) # missing entries → 0 (kills term)\n```\n\nFor add/sub, missing entries from coordinate alignment are always filled with 0 (additive identity) — this is safe and required for associativity.\n\nFor a practical walkthrough with mixed-coordinate variables and NaN costs, see [Mixed-Coordinate Arithmetic](mixed-coordinate-arithmetic.ipynb). For edge cases and detailed propagation rules, see [Missing Data](missing-data.ipynb)." + "source": "### NaN convention\n\nIn v1, **NaN means \"absent\" — it acts as a mask.**\n\nNaN can enter from `mask=` at construction, structural operations (`.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()`), or from user-supplied data.\n\n**NaN in multiplicative/divisive constants masks the term:**\n\n```python\nx * cost_with_nan # NaN positions become absent (no term)\nx / rate_with_nan # NaN positions become absent\n```\n\n**NaN in additive/subtractive constants is treated as 0 (additive identity):**\n\n```python\nx + data_with_nan # NaN positions contribute nothing\nx - data_with_nan # same — NaN subtracted = nothing subtracted\n```\n\n**NaN in constraint RHS raises `ValueError`** — because silently skipping a constraint is dangerous. Use `.sel()` or `mask=` to exclude positions explicitly:\n\n```python\nvalid = rhs.notnull()\nm.add_constraints(x.sel(time=valid) <= rhs.sel(time=valid), name=\"con\")\n```\n\n**When NaN means something else**, handle it explicitly:\n\n```python\nexpr.mul(rate, join=\"left\", fill_value=1) # missing entries → 1 (preserves term)\nexpr.mul(da, join=\"outer\", fill_value=0) # missing entries → 0 (kills term)\n```\n\nFor add/sub, missing entries from coordinate alignment are always filled with 0 (additive identity) — this is safe and required for associativity.\n\nFor a practical walkthrough with mixed-coordinate variables and NaN costs, see [Mixed-Coordinate Arithmetic](mixed-coordinate-arithmetic.ipynb). For edge cases and detailed propagation rules, see [Missing Data](missing-data.ipynb)." }, { "cell_type": "markdown", @@ -430,15 +430,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "p3a6v5kx6es", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.411581Z", - "start_time": "2026-03-15T10:22:27.398419Z" + "end_time": "2026-03-18T14:34:39.252483Z", + "start_time": "2026-03-18T14:34:39.228667Z" } }, - "outputs": [], "source": [ "# Absent slot arithmetic demo\n", "shifted = (1 * x).shift(time=1) # time=0 is absent\n", @@ -461,7 +459,9 @@ "print(\n", " \"x + y.shift(1) at time=0 is absent?\", merged.isnull().values[0]\n", ") # False — x is valid" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -485,33 +485,31 @@ }, { "cell_type": "code", - "execution_count": null, "id": "legacy-switch", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.416249Z", - "start_time": "2026-03-15T10:22:27.414707Z" + "end_time": "2026-03-18T14:34:39.262226Z", + "start_time": "2026-03-18T14:34:39.260387Z" } }, - "outputs": [], "source": [ "import warnings\n", "\n", "linopy.options[\"arithmetic_convention\"] = \"legacy\"\n", "warnings.filterwarnings(\"ignore\", category=linopy.LinopyDeprecationWarning)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "legacy-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.430641Z", - "start_time": "2026-03-15T10:22:27.426283Z" + "end_time": "2026-03-18T14:34:39.270153Z", + "start_time": "2026-03-18T14:34:39.266223Z" } }, - "outputs": [], "source": [ "m2 = linopy.Model()\n", "time = pd.RangeIndex(5, name=\"time\")\n", @@ -519,7 +517,9 @@ "y2_short = m2.add_variables(\n", " lower=0, coords=[pd.RangeIndex(3, name=\"time\")], name=\"y_short\"\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -535,36 +535,36 @@ }, { "cell_type": "code", - "execution_count": null, "id": "legacy-subset", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.441042Z", - "start_time": "2026-03-15T10:22:27.433151Z" + "end_time": "2026-03-18T14:34:39.284452Z", + "start_time": "2026-03-18T14:34:39.276080Z" } }, - "outputs": [], "source": [ "# Different size — left join, fill missing with 0\n", "x2 + y2_short # y_short drops out at time 3, 4" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "legacy-same-size", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.460970Z", - "start_time": "2026-03-15T10:22:27.450904Z" + "end_time": "2026-03-18T14:34:39.301725Z", + "start_time": "2026-03-18T14:34:39.292060Z" } }, - "outputs": [], "source": [ "# Same size — positional alignment (labels ignored!)\n", "z2 = m2.add_variables(lower=0, coords=[pd.RangeIndex(5, 10, name=\"time\")], name=\"z\")\n", "x2 + z2 # x has time=[0..4], z has time=[5..9], but same size → positional match" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -581,20 +581,20 @@ }, { "cell_type": "code", - "execution_count": null, "id": "legacy-nan-fill", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.470457Z", - "start_time": "2026-03-15T10:22:27.464122Z" + "end_time": "2026-03-18T14:34:39.310943Z", + "start_time": "2026-03-18T14:34:39.305097Z" } }, - "outputs": [], "source": [ "vals = xr.DataArray([1.0, np.nan, 3.0, 4.0, 5.0], dims=[\"time\"], coords={\"time\": time})\n", "result = x2 + vals\n", "print(\"const:\", result.const.values) # NaN replaced with 0" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -608,37 +608,37 @@ }, { "cell_type": "code", - "execution_count": null, "id": "legacy-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.485173Z", - "start_time": "2026-03-15T10:22:27.477944Z" + "end_time": "2026-03-18T14:34:39.323117Z", + "start_time": "2026-03-18T14:34:39.316143Z" } }, - "outputs": [], "source": [ "rhs = xr.DataArray([10, 20, 30], dims=[\"time\"], coords={\"time\": [0, 1, 2]})\n", "con = x2 <= rhs # constraint only at time 0, 1, 2; NaN at time 3, 4\n", "con" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "legacy-restore-v1", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.489400Z", - "start_time": "2026-03-15T10:22:27.487983Z" + "end_time": "2026-03-18T14:34:39.329262Z", + "start_time": "2026-03-18T14:34:39.327680Z" } }, - "outputs": [], "source": [ "# Switch back to v1 for the rest of the notebook\n", "linopy.options[\"arithmetic_convention\"] = \"v1\"\n", "warnings.resetwarnings()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -648,15 +648,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "join-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.502609Z", - "start_time": "2026-03-15T10:22:27.498542Z" + "end_time": "2026-03-18T14:34:39.339568Z", + "start_time": "2026-03-18T14:34:39.334733Z" } }, - "outputs": [], "source": [ "m3 = linopy.Model()\n", "\n", @@ -665,87 +663,89 @@ "\n", "a = m3.add_variables(coords=[i_a], name=\"a\")\n", "b = m3.add_variables(coords=[i_b], name=\"b\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-inner", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.514779Z", - "start_time": "2026-03-15T10:22:27.505507Z" + "end_time": "2026-03-18T14:34:39.352078Z", + "start_time": "2026-03-18T14:34:39.343332Z" } }, - "outputs": [], "source": [ "# Inner join — intersection (i=1, 2)\n", "a.add(b, join=\"inner\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-outer", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.532700Z", - "start_time": "2026-03-15T10:22:27.523552Z" + "end_time": "2026-03-18T14:34:39.364865Z", + "start_time": "2026-03-18T14:34:39.355179Z" } }, - "outputs": [], "source": [ "# Outer join — union (i=0, 1, 2, 3)\n", "a.add(b, join=\"outer\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-left", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.544400Z", - "start_time": "2026-03-15T10:22:27.535298Z" + "end_time": "2026-03-18T14:34:39.383599Z", + "start_time": "2026-03-18T14:34:39.374361Z" } }, - "outputs": [], "source": [ "# Left join — keep a's coords (i=0, 1, 2)\n", "a.add(b, join=\"left\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-right", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.560648Z", - "start_time": "2026-03-15T10:22:27.552349Z" + "end_time": "2026-03-18T14:34:39.395480Z", + "start_time": "2026-03-18T14:34:39.386776Z" } }, - "outputs": [], "source": [ "# Right join — keep b's coords (i=1, 2, 3)\n", "a.add(b, join=\"right\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-override", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.580134Z", - "start_time": "2026-03-15T10:22:27.570359Z" + "end_time": "2026-03-18T14:34:39.409201Z", + "start_time": "2026-03-18T14:34:39.400520Z" } }, - "outputs": [], "source": [ "# Override — positional (0↔1, 1↔2, 2↔3), uses a's labels\n", "a.add(b, join=\"override\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -757,38 +757,38 @@ }, { "cell_type": "code", - "execution_count": null, "id": "join-mul-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.591822Z", - "start_time": "2026-03-15T10:22:27.583595Z" + "end_time": "2026-03-18T14:34:39.418196Z", + "start_time": "2026-03-18T14:34:39.411921Z" } }, - "outputs": [], "source": [ "const = xr.DataArray([2, 3, 4], dims=[\"i\"], coords={\"i\": [1, 2, 3]})\n", "\n", "# Multiply, keeping only shared coords\n", "a.mul(const, join=\"inner\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "join-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.607102Z", - "start_time": "2026-03-15T10:22:27.601210Z" + "end_time": "2026-03-18T14:34:39.430153Z", + "start_time": "2026-03-18T14:34:39.424328Z" } }, - "outputs": [], "source": [ "# Constraint with left join — only a's coords, NaN at missing RHS positions\n", "rhs = xr.DataArray([10, 20], dims=[\"i\"], coords={\"i\": [0, 1]})\n", "a.le(rhs, join=\"left\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -810,15 +810,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "practical-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.619993Z", - "start_time": "2026-03-15T10:22:27.616034Z" + "end_time": "2026-03-18T14:34:39.439625Z", + "start_time": "2026-03-18T14:34:39.435610Z" } }, - "outputs": [], "source": [ "m4 = linopy.Model()\n", "\n", @@ -826,36 +824,36 @@ "techs = pd.Index([\"solar\", \"wind\", \"gas\"], name=\"tech\")\n", "\n", "gen = m4.add_variables(lower=0, coords=[hours, techs], name=\"gen\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "practical-capacity", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.634628Z", - "start_time": "2026-03-15T10:22:27.623453Z" + "end_time": "2026-03-18T14:34:39.456108Z", + "start_time": "2026-03-18T14:34:39.444200Z" } }, - "outputs": [], "source": [ "# Capacity limits — constant broadcasts over hours\n", "capacity = xr.DataArray([100, 80, 50], dims=[\"tech\"], coords={\"tech\": techs})\n", "m4.add_constraints(gen <= capacity, name=\"capacity_limit\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "practical-solar", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.648995Z", - "start_time": "2026-03-15T10:22:27.637590Z" + "end_time": "2026-03-18T14:34:39.469841Z", + "start_time": "2026-03-18T14:34:39.459321Z" } }, - "outputs": [], "source": [ "# Solar availability — full 24h profile, matching coords\n", "solar_avail = np.zeros(24)\n", @@ -864,19 +862,19 @@ "\n", "solar_gen = gen.sel(tech=\"solar\")\n", "m4.add_constraints(solar_gen <= solar_availability, name=\"solar_avail\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "practical-peak", "metadata": { "ExecuteTime": { - "end_time": "2026-03-15T10:22:27.670953Z", - "start_time": "2026-03-15T10:22:27.657751Z" + "end_time": "2026-03-18T14:34:39.485345Z", + "start_time": "2026-03-18T14:34:39.472969Z" } }, - "outputs": [], "source": [ "# Peak demand — only applies to hours 8-20, use join=\"inner\"\n", "peak_hours = pd.RangeIndex(8, 21, name=\"hour\")\n", @@ -886,13 +884,15 @@ "\n", "total_gen = gen.sum(\"tech\")\n", "m4.add_constraints(total_gen.ge(peak_demand, join=\"inner\"), name=\"peak_demand\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "summary", "metadata": {}, - "source": "---\n\n## Summary\n\n| | v1 (future default) | Legacy (current default) |\n|---|---|---|\n| **Mismatched coords** | `ValueError` | Silent left-join / override |\n| **Same-size different labels** | `ValueError` | Positional alignment |\n| **NaN in add/sub constant** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div factor** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | Constraint skipped at that position | Same |\n| **Absent slot + scalar** | Revives (additive identity fill) | Revives (same) |\n| **Absent slot × scalar** | Stays absent (NaN propagates) | Becomes zero (NaN filled) |\n| **mul/div with misaligned factor** | NaN fills mask the term (use `fill_value=` for other behavior) | Silent fill (0 for mul, 1 for div) |\n| **add/sub with misaligned constant** | Fill with 0 (additive identity) | Same |\n| **Explicit join** | `.add(x, join=...)` | `.add(x, join=...)` |\n| **Setting** | `options[\"arithmetic_convention\"] = \"v1\"` | `options[\"arithmetic_convention\"] = \"legacy\"` |" + "source": "---\n\n## Summary\n\n| | v1 (future default) | Legacy (current default) |\n|---|---|---|\n| **Mismatched coords** | `ValueError` | Silent left-join / override |\n| **Same-size different labels** | `ValueError` | Positional alignment |\n| **NaN in add/sub constant** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div factor** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | `ValueError` (use `.sel()`/`mask=`) | Constraint skipped |\n| **Absent slot + scalar** | Revives (additive identity fill) | Revives (same) |\n| **Absent slot × scalar** | Stays absent (NaN propagates) | Becomes zero (NaN filled) |\n| **mul/div with misaligned factor** | NaN fills mask the term (use `fill_value=` for other behavior) | Silent fill (0 for mul, 1 for div) |\n| **add/sub with misaligned constant** | Fill with 0 (additive identity) | Same |\n| **Explicit join** | `.add(x, join=...)` | `.add(x, join=...)` |\n| **Setting** | `options[\"arithmetic_convention\"] = \"v1\"` | `options[\"arithmetic_convention\"] = \"legacy\"` |" } ], "metadata": { diff --git a/examples/missing-data.ipynb b/examples/missing-data.ipynb index 3d794cdb..2d4a3ed9 100644 --- a/examples/missing-data.ipynb +++ b/examples/missing-data.ipynb @@ -17,8 +17,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.222237Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.339843Z", - "start_time": "2026-03-18T14:24:47.581893Z" + "end_time": "2026-03-18T14:37:57.565450Z", + "start_time": "2026-03-18T14:37:57.562853Z" } }, "source": [ @@ -44,8 +44,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.252554Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.389729Z", - "start_time": "2026-03-18T14:24:48.349795Z" + "end_time": "2026-03-18T14:37:57.579773Z", + "start_time": "2026-03-18T14:37:57.574197Z" } }, "source": [ @@ -63,13 +63,48 @@ "cell_type": "markdown", "id": "rqgv2f7nwpb", "metadata": {}, - "source": "---\n\n## NaN internals\n\nThis section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n\n### How NaN enters\n\nNaN can enter linopy data structures from three sources:\n\n1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n3. **User-supplied data** with NaN values — NaN in multiplicative constants masks terms; NaN in additive constants is treated as 0.\n\nOperations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n\n### How NaN propagates\n\nAn expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n\nWhen expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n\nA coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n\n### Where NaN lives\n\nNaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n\n### Why this is consistent\n\n- **`lhs >= rhs` is `lhs - rhs >= 0`**, so RHS obeys the same rule as any constant — no special case.\n- **No dual role for NaN**: it always means \"absent/nothing here.\" Internal NaN (from `shift`, `mask=`) and user NaN (from data) are treated identically.\n- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent." + "source": [ + "---\n", + "\n", + "## NaN internals\n", + "\n", + "This section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n", + "\n", + "### How NaN enters\n", + "\n", + "NaN can enter linopy data structures from three sources:\n", + "\n", + "1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n", + "2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n", + "3. **User-supplied data** with NaN values — NaN in multiplicative constants masks terms; NaN in additive constants is treated as 0.\n", + "\n", + "Operations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n", + "\n", + "### How NaN propagates\n", + "\n", + "An expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n", + "\n", + "When expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n", + "\n", + "A coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n", + "\n", + "### Where NaN lives\n", + "\n", + "NaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n", + "\n", + "### Why this is consistent\n", + "\n", + "- **`lhs >= rhs` is `lhs - rhs >= 0`**, so RHS obeys the same rule as any constant — no special case.\n", + "- **No dual role for NaN**: it always means \"absent/nothing here.\" Internal NaN (from `shift`, `mask=`) and user NaN (from data) are treated identically.\n", + "\n", + "- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent." + ] }, { "cell_type": "markdown", "id": "v1-rule-header", "metadata": {}, - "source": "---\n\n## NaN in arithmetic\n\nUnder v1, NaN in operands is handled automatically — no `ValueError`, no surprises:\n\n| Operation | NaN behavior | Rationale |\n|---|---|---|\n| `expr + nan_data` | NaN → 0 (additive identity) | Adding nothing = no contribution |\n| `expr - nan_data` | NaN → 0 (additive identity) | Subtracting nothing = no change |\n| `expr * nan_data` | NaN → absent (masks term) | Multiplying by nothing = no term |\n| `expr / nan_data` | NaN → absent (masks term) | Dividing by nothing = no term |\n| `expr <= nan_rhs` | Constraint skipped | No RHS = no constraint |\n\nThis applies to both user-supplied NaN and structural NaN (from `.shift()`, `.where()`, etc.)." + "source": "---\n\n## NaN in arithmetic\n\nUnder v1, NaN in arithmetic operands is handled automatically:\n\n| Operation | NaN behavior | Rationale |\n|---|---|---|\n| `expr + nan_data` | NaN → 0 (additive identity) | Adding nothing = no contribution |\n| `expr - nan_data` | NaN → 0 (additive identity) | Subtracting nothing = no change |\n| `expr * nan_data` | NaN → absent (masks term) | Multiplying by nothing = no term |\n| `expr / nan_data` | NaN → absent (masks term) | Dividing by nothing = no term |\n| `expr <= nan_rhs` | **`ValueError`** | Silently skipping constraints is dangerous |\n\nConstraint RHS is the one place where NaN still raises. This is intentional: `expr <= nan` could mean `expr <= 0` (treating NaN as 0 per the subtraction rule) or \"no constraint\" — both are plausible and both are dangerous if guessed wrong. Use `.sel()` or `mask=` to be explicit." }, { "cell_type": "code", @@ -82,11 +117,11 @@ "shell.execute_reply": "2026-03-12T07:17:14.259998Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.404760Z", - "start_time": "2026-03-18T14:24:48.393926Z" + "end_time": "2026-03-18T14:37:57.594638Z", + "start_time": "2026-03-18T14:37:57.582212Z" } }, - "source": "# NaN in arithmetic — no errors, automatic handling\nadd_result = x + data\nmul_result = x * data\ncon_result = x >= data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint(\"con: NaN position RHS =\", con_result.rhs.sel(time=1).item()) # NaN (skipped)", + "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])", "outputs": [], "execution_count": null }, @@ -107,8 +142,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.269997Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.419394Z", - "start_time": "2026-03-18T14:24:48.407499Z" + "end_time": "2026-03-18T14:37:57.613805Z", + "start_time": "2026-03-18T14:37:57.601797Z" } }, "source": [ @@ -147,8 +182,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.279785Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.436675Z", - "start_time": "2026-03-18T14:24:48.427136Z" + "end_time": "2026-03-18T14:37:57.630876Z", + "start_time": "2026-03-18T14:37:57.621278Z" } }, "source": [ @@ -187,8 +222,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.286655Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.449061Z", - "start_time": "2026-03-18T14:24:48.440993Z" + "end_time": "2026-03-18T14:37:57.646183Z", + "start_time": "2026-03-18T14:37:57.638792Z" } }, "source": [ @@ -240,8 +275,8 @@ "shell.execute_reply": "2026-03-12T07:17:14.300443Z" }, "ExecuteTime": { - "end_time": "2026-03-18T14:24:48.471791Z", - "start_time": "2026-03-18T14:24:48.458849Z" + "end_time": "2026-03-18T14:37:57.664838Z", + "start_time": "2026-03-18T14:37:57.648814Z" } }, "source": [ @@ -282,7 +317,7 @@ "cell_type": "markdown", "id": "summary", "metadata": {}, - "source": "---\n\n## Summary\n\n| Aspect | v1 | Legacy |\n|---|---|---|\n| **NaN means** | Absent term (not absent coordinate) | Numeric placeholder (filled silently) |\n| **NaN sources** | `mask=`, structural ops, user data | Same |\n| **NaN in add/sub** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | Constraint skipped | Same |\n| **Combining expressions** | Absent terms ignored, valid terms kept | NaN filled before combining |\n| **Coordinate absent when** | All terms absent AND const is NaN | Never (NaN always filled) |\n| **Masking** | Automatic via NaN in mul/div; explicit via `.sel()` or `mask=` | Implicit via NaN / `auto_mask` |\n| **Storage** | Float fields + `-1` sentinels | Same, but NaN has dual role |\n| **`.fillna()` needed?** | Only when you want non-default fill (e.g., `fillna(1)` for div) | Never (done automatically) |" + "source": "---\n\n## Summary\n\n| Aspect | v1 | Legacy |\n|---|---|---|\n| **NaN means** | Absent term (not absent coordinate) | Numeric placeholder (filled silently) |\n| **NaN sources** | `mask=`, structural ops, user data | Same |\n| **NaN in add/sub** | Treated as 0 (additive identity) | Same |\n| **NaN in mul/div** | **Masks** the term (becomes absent) | Filled with 0 (mul) or 1 (div) |\n| **NaN in constraint RHS** | `ValueError` (use `.sel()`/`mask=`) | Auto-masked (constraint skipped) |\n| **Combining expressions** | Absent terms ignored, valid terms kept | NaN filled before combining |\n| **Coordinate absent when** | All terms absent AND const is NaN | Never (NaN always filled) |\n| **Masking** | Automatic via NaN in mul/div; explicit via `.sel()` or `mask=` | Implicit via NaN / `auto_mask` |\n| **Storage** | Float fields + `-1` sentinels | Same, but NaN has dual role |\n| **`.fillna()` needed?** | Only when you want non-default fill (e.g., `fillna(1)` for div) | Never (done automatically) |" } ], "metadata": { diff --git a/linopy/expressions.py b/linopy/expressions.py index 9718f325..9c6b5fdb 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1289,8 +1289,15 @@ def to_constraint( rhs = as_dataarray(rhs, coords=self.coords, dims=self.coord_dims) if isinstance(rhs, DataArray): - # NaN in RHS → constraint is skipped at those positions - # (NaN propagates into the sign field, which linopy treats as absent) + is_legacy = ( + join is None and options["arithmetic_convention"] == "legacy" + ) or join == "legacy" + if not is_legacy and rhs.isnull().any(): + raise ValueError( + "Constraint RHS contains NaN values. NaN in a bound would " + "silently skip constraints. Use .fillna() to set a default " + "bound, or .sel()/mask= to exclude positions explicitly." + ) if effective_join == "override": aligned_rhs = rhs.assign_coords(coords=self.const.coords) expr_const = self.const diff --git a/linopy/variables.py b/linopy/variables.py index d9bf9f31..5c1610ca 100644 --- a/linopy/variables.py +++ b/linopy/variables.py @@ -318,15 +318,19 @@ def to_linexpr( """ coefficient = as_dataarray(coefficient, coords=self.coords, dims=self.dims) coefficient = coefficient.reindex_like(self.labels, fill_value=0) + is_v1 = options["arithmetic_convention"] == "v1" + # In v1, NaN in coefficient means "absent term" (masks the variable). + # Detect before filling so we can mark those slots absent. + coeff_nan = coefficient.isnull() if is_v1 else None coefficient = coefficient.fillna(0) ds = Dataset({"coeffs": coefficient, "vars": self.labels}).expand_dims( TERM_DIM, -1 ) - # In v1 mode, set coeffs=NaN and const=NaN where the variable is - # absent so that absence propagates through arithmetic (consistent - # with expression path where shift/where/reindex fill with FILL_VALUE) - if options["arithmetic_convention"] == "v1": + if is_v1: + # Mark slots as absent where: variable is absent OR coefficient is NaN absent = self.labels == -1 + if coeff_nan is not None: + absent = absent | coeff_nan if absent.any(): nan_fill = DataArray( np.where(absent, np.nan, 0.0), coords=self.labels.coords @@ -335,9 +339,14 @@ def to_linexpr( np.where(absent, np.nan, coefficient.values), coords=self.labels.coords, ) + var_fill = DataArray( + np.where(absent, -1, self.labels.values), + coords=self.labels.coords, + ) ds = ds.assign( const=nan_fill, coeffs=coeff_fill.expand_dims(TERM_DIM, -1), + vars=var_fill.expand_dims(TERM_DIM, -1), ) return expressions.LinearExpression(ds, self.model) diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 8b6f5b0f..618045c0 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1439,14 +1439,24 @@ def test_shifted_expr_div_array_propagates(self, v: Variable) -> None: result = expr / arr assert np.isnan(result.coeffs.squeeze().values[0]) - def test_variable_to_linexpr_nan_coefficient(self, v: Variable) -> None: - """to_linexpr fills NaN with 0 under both conventions (internal conversion).""" + @pytest.mark.legacy_only + def test_variable_to_linexpr_nan_coefficient_fills(self, v: Variable) -> None: + """Legacy: to_linexpr fills NaN coefficient with 0.""" nan_coeff = np.ones(v.sizes["dim_2"]) nan_coeff[0] = np.nan result = v.to_linexpr(nan_coeff) assert not np.isnan(result.coeffs.squeeze().values).any() assert result.coeffs.squeeze().values[0] == 0.0 + @pytest.mark.v1_only + def test_variable_to_linexpr_nan_coefficient_masks(self, v: Variable) -> None: + """v1: NaN coefficient in to_linexpr masks the term.""" + nan_coeff = np.ones(v.sizes["dim_2"]) + nan_coeff[0] = np.nan + result = v.to_linexpr(nan_coeff) + assert result.isnull().values[0] + assert not result.isnull().values[1] + class TestMultiDim: @pytest.mark.legacy_only def test_multidim_subset_mul_fills(self, m: Model) -> None: From e9a5a400bf956977dc2c007ebdd637111d73db8d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:55:33 +0100 Subject: [PATCH 3/6] Show .sel() masking approach in missing-data notebook demo Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/missing-data.ipynb | 41 +++---------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/examples/missing-data.ipynb b/examples/missing-data.ipynb index 2d4a3ed9..34921172 100644 --- a/examples/missing-data.ipynb +++ b/examples/missing-data.ipynb @@ -63,42 +63,7 @@ "cell_type": "markdown", "id": "rqgv2f7nwpb", "metadata": {}, - "source": [ - "---\n", - "\n", - "## NaN internals\n", - "\n", - "This section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n", - "\n", - "### How NaN enters\n", - "\n", - "NaN can enter linopy data structures from three sources:\n", - "\n", - "1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n", - "2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n", - "3. **User-supplied data** with NaN values — NaN in multiplicative constants masks terms; NaN in additive constants is treated as 0.\n", - "\n", - "Operations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n", - "\n", - "### How NaN propagates\n", - "\n", - "An expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n", - "\n", - "When expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n", - "\n", - "A coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n", - "\n", - "### Where NaN lives\n", - "\n", - "NaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n", - "\n", - "### Why this is consistent\n", - "\n", - "- **`lhs >= rhs` is `lhs - rhs >= 0`**, so RHS obeys the same rule as any constant — no special case.\n", - "- **No dual role for NaN**: it always means \"absent/nothing here.\" Internal NaN (from `shift`, `mask=`) and user NaN (from data) are treated identically.\n", - "\n", - "- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent." - ] + "source": "---\n\n## NaN internals\n\nThis section covers the internal mechanics of NaN handling. For the user-facing rules, see [Arithmetic Convention](arithmetic-convention.ipynb#nan-convention).\n\n### How NaN enters\n\nNaN can enter linopy data structures from three sources:\n\n1. **`mask=` argument** at construction (`add_variables`, `add_constraints`) — you explicitly declare which slots exist.\n2. **Structural operations** that produce absent slots: `.shift()`, `.where()`, `.reindex()`, `.reindex_like()`, `.unstack()` (with missing combinations).\n3. **User-supplied data** with NaN values — NaN in multiplicative constants masks terms; NaN in additive constants is treated as 0.\n\nOperations that do **not** produce NaN: `.roll()` (circular), `.sel()` / `.isel()` (subset), `.drop_sel()` (drops), `.expand_dims()` / `.broadcast_like()` (broadcast existing data).\n\n### How NaN propagates\n\nAn expression is a sum of terms. Each term has a coefficient, a variable reference, and the expression has a shared constant. NaN marks an **individual term** as absent — it does not mask the entire coordinate.\n\nWhen expressions are combined (e.g., `x*2 + y.shift(time=1)`), each term is kept independently. At time=0, `y.shift` contributes no term (NaN coeffs, vars=-1), but `x*2` is still valid. The result at time=0 is `2*x[0]` — not absent.\n\nA coordinate is only fully absent when **all** terms have vars=-1 **and** the constant is NaN. This is exactly what `isnull()` checks.\n\n### Where NaN lives\n\nNaN is burned directly into the float fields: `coeffs`, `const`, `rhs`, `lower`, `upper`. Integer fields (`labels`, `vars`) use **-1** as their equivalent sentinel. There is no separate boolean mask array.\n\n### Why this is consistent\n\n- **No dual role for NaN**: it always means \"absent/nothing here.\" Internal NaN (from `shift`, `mask=`) and user NaN (from data) are treated identically.\n- **Absent terms, not absent coordinates**: combining a valid expression with a partially-absent one does not destroy the valid part. Only when *every* term at a coordinate is absent is the coordinate itself absent.\n- **Constraint RHS is an exception**: while `expr <= rhs` is algebraically `expr - rhs <= 0`, NaN in the RHS raises `ValueError` rather than being treated as 0. Silently turning `expr <= nan` into `expr <= 0` would almost certainly be wrong — constraints are declarations, not arithmetic." }, { "cell_type": "markdown", @@ -121,7 +86,7 @@ "start_time": "2026-03-18T14:37:57.582212Z" } }, - "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])", + "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises — use .sel() to exclude NaN positions\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])\n\n# Fix: select only valid positions\nvalid = data.notnull()\ncon = m.add_constraints(x.sel(time=valid) >= data.sel(time=valid), name=\"data_bound\")\ncon", "outputs": [], "execution_count": null }, @@ -311,7 +276,7 @@ "cell_type": "markdown", "id": "legacy-header", "metadata": {}, - "source": "---\n\n## Legacy NaN behavior (for comparison)\n\nUnder legacy, NaN was handled implicitly:\n- **In arithmetic**: silently replaced with neutral elements (0 for add/sub/mul, 1 for div)\n- **In constraint RHS**: NaN meant \"no constraint here\" — auto-masked internally\n- **With `auto_mask=True`**: NaN in variable bounds meant \"no variable here\"\n\nThis was convenient but could mask data quality issues. A NaN from a data pipeline bug would silently become 0, producing a valid but wrong model. The v1 convention makes NaN handling more transparent: NaN masks in mul/div (removing the term entirely) and contributes 0 in add/sub.\n\n### Migration\n\n| Legacy code | v1 behavior | Action needed? |\n|---|---|---|\n| `x + data_with_nans` | NaN → 0 (same effect) | None |\n| `x * data_with_nans` | NaN → **absent** (legacy filled with 0) | If you wanted zero terms, use `.fillna(0)` |\n| `x / data_with_nans` | NaN → **absent** (legacy filled with 1) | If you wanted identity, use `.fillna(1)` |\n| `m.add_constraints(expr >= nan_rhs)` | NaN → constraint skipped (same effect) | None |\n| `Model(auto_mask=True)` | Explicit `mask=` or `.sel()` | Same as before |" + "source": "---\n\n## Legacy NaN behavior (for comparison)\n\nUnder legacy, NaN was handled implicitly:\n- **In arithmetic**: silently replaced with neutral elements (0 for add/sub/mul, 1 for div)\n- **In constraint RHS**: NaN meant \"no constraint here\" — auto-masked internally\n- **With `auto_mask=True`**: NaN in variable bounds meant \"no variable here\"\n\nThis was convenient but could mask data quality issues. A NaN from a data pipeline bug would silently become 0, producing a valid but wrong model. The v1 convention makes NaN handling more transparent: NaN masks in mul/div (removing the term entirely) and contributes 0 in add/sub.\n\n### Migration\n\n| Legacy code | v1 behavior | Action needed? |\n|---|---|---|\n| `x + data_with_nans` | NaN → 0 (same effect) | None |\n| `x * data_with_nans` | NaN → **absent** (legacy filled with 0) | If you wanted zero terms, use `.fillna(0)` |\n| `x / data_with_nans` | NaN → **absent** (legacy filled with 1) | If you wanted identity, use `.fillna(1)` |\n| `m.add_constraints(expr >= nan_rhs)` | **`ValueError`** (legacy skipped silently) | Use `.sel()` or `mask=` to exclude NaN positions |\n| `Model(auto_mask=True)` | Explicit `mask=` or `.sel()` | Same as before |" }, { "cell_type": "markdown", From 099277b9fbdaa820d8692cdc59bc7d050dc38e65 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:57:03 +0100 Subject: [PATCH 4/6] Add mask= approach alongside .sel() in constraint NaN demo Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/missing-data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/missing-data.ipynb b/examples/missing-data.ipynb index 34921172..9ccd30fd 100644 --- a/examples/missing-data.ipynb +++ b/examples/missing-data.ipynb @@ -86,7 +86,7 @@ "start_time": "2026-03-18T14:37:57.582212Z" } }, - "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises — use .sel() to exclude NaN positions\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])\n\n# Fix: select only valid positions\nvalid = data.notnull()\ncon = m.add_constraints(x.sel(time=valid) >= data.sel(time=valid), name=\"data_bound\")\ncon", + "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises — use .sel() or mask= to exclude NaN positions\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])\n\n# Fix option 1: .sel() — constraint has fewer coordinates\nvalid = data.notnull()\ncon = m.add_constraints(x.sel(time=valid) >= data.sel(time=valid), name=\"data_bound\")\nprint(\"\\n.sel() result:\")\nprint(con)\n\n# Fix option 2: mask= — constraint keeps all coordinates, NaN positions masked\ncon2 = m.add_constraints(\n x >= data.fillna(0), name=\"data_bound_masked\", mask=data.notnull()\n)\nprint(\"\\nmask= result:\")\nprint(con2)", "outputs": [], "execution_count": null }, From 833f346178c07e9710cc3758347d4d4ea67b504b Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:58:33 +0100 Subject: [PATCH 5/6] Split constraint NaN demo into separate cells Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/missing-data.ipynb | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/examples/missing-data.ipynb b/examples/missing-data.ipynb index 9ccd30fd..a9d245f7 100644 --- a/examples/missing-data.ipynb +++ b/examples/missing-data.ipynb @@ -86,10 +86,34 @@ "start_time": "2026-03-18T14:37:57.582212Z" } }, - "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True\nprint()\n\n# Constraint RHS with NaN raises — use .sel() or mask= to exclude NaN positions\ntry:\n x >= data\nexcept ValueError as e:\n print(\"constraint: ValueError raised —\", str(e)[:60])\n\n# Fix option 1: .sel() — constraint has fewer coordinates\nvalid = data.notnull()\ncon = m.add_constraints(x.sel(time=valid) >= data.sel(time=valid), name=\"data_bound\")\nprint(\"\\n.sel() result:\")\nprint(con)\n\n# Fix option 2: mask= — constraint keeps all coordinates, NaN positions masked\ncon2 = m.add_constraints(\n x >= data.fillna(0), name=\"data_bound_masked\", mask=data.notnull()\n)\nprint(\"\\nmask= result:\")\nprint(con2)", + "source": "# NaN in arithmetic — automatic handling\nadd_result = x + data\nmul_result = x * data\n\nprint(\"add: NaN position const =\", add_result.const.sel(time=1).item()) # 0.0\nprint(\"mul: NaN position absent?\", mul_result.isnull().sel(time=1).item()) # True", "outputs": [], "execution_count": null }, + { + "cell_type": "code", + "id": "j0cr0fxs4u", + "source": "# Constraint RHS with NaN raises\ntry:\n x >= data\nexcept ValueError as e:\n print(\"ValueError:\", str(e))", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "id": "8bmseul44h6", + "source": "# Fix option 1: .sel() — constraint has fewer coordinates\nvalid = data.notnull()\nm.add_constraints(x.sel(time=valid) >= data.sel(time=valid), name=\"data_bound\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "id": "pde026g5zy", + "source": "# Fix option 2: mask= — constraint keeps all coordinates, NaN positions masked\nm.add_constraints(x >= data.fillna(0), name=\"data_bound_masked\", mask=data.notnull())", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "id": "fillna-header", From 42a8855b526cec1d9831635bf3f261573c91a1de Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 16:47:18 +0100 Subject: [PATCH 6/6] Require fill_value when alignment introduces NaN in mul/div NaN already in the data (user-intentional) still masks silently. But NaN introduced by coordinate alignment (join="left"/"outer") now raises ValueError, requiring explicit fill_value=0 or fill_value=1. This catches the footgun where a partial scaling factor (e.g., rate covering only some techs) silently masks terms instead of preserving them with the identity element. Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/arithmetic-convention.ipynb | 152 ++++++++++----------- examples/mixed-coordinate-arithmetic.ipynb | 108 +++++++-------- linopy/expressions.py | 33 ++++- test/test_algebraic_properties.py | 20 ++- test/test_linear_expression.py | 56 +++----- 5 files changed, 181 insertions(+), 188 deletions(-) diff --git a/examples/arithmetic-convention.ipynb b/examples/arithmetic-convention.ipynb index 103d5d2e..c2f50531 100644 --- a/examples/arithmetic-convention.ipynb +++ b/examples/arithmetic-convention.ipynb @@ -11,8 +11,8 @@ "id": "imports", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:38.994093Z", - "start_time": "2026-03-18T14:34:38.298539Z" + "end_time": "2026-03-18T14:56:26.272476Z", + "start_time": "2026-03-18T14:56:25.483445Z" } }, "source": [ @@ -40,8 +40,8 @@ "id": "opt-in", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:38.999338Z", - "start_time": "2026-03-18T14:34:38.997821Z" + "end_time": "2026-03-18T14:56:26.279296Z", + "start_time": "2026-03-18T14:56:26.277566Z" } }, "source": [ @@ -93,8 +93,8 @@ "id": "v1-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.045856Z", - "start_time": "2026-03-18T14:34:39.006455Z" + "end_time": "2026-03-18T14:56:26.324957Z", + "start_time": "2026-03-18T14:56:26.285975Z" } }, "source": [ @@ -125,8 +125,8 @@ "id": "v1-same-coords", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.057693Z", - "start_time": "2026-03-18T14:34:39.048822Z" + "end_time": "2026-03-18T14:56:26.341720Z", + "start_time": "2026-03-18T14:56:26.329927Z" } }, "source": [ @@ -141,8 +141,8 @@ "id": "v1-matching-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.067745Z", - "start_time": "2026-03-18T14:34:39.060274Z" + "end_time": "2026-03-18T14:56:26.357764Z", + "start_time": "2026-03-18T14:56:26.346487Z" } }, "source": [ @@ -158,8 +158,8 @@ "id": "v1-broadcast-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.082063Z", - "start_time": "2026-03-18T14:34:39.072201Z" + "end_time": "2026-03-18T14:56:26.395182Z", + "start_time": "2026-03-18T14:56:26.382039Z" } }, "source": [ @@ -175,8 +175,8 @@ "id": "v1-broadcast-expr", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.096271Z", - "start_time": "2026-03-18T14:34:39.084937Z" + "end_time": "2026-03-18T14:56:26.427143Z", + "start_time": "2026-03-18T14:56:26.413559Z" } }, "source": [ @@ -191,8 +191,8 @@ "id": "v1-scalar", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.104795Z", - "start_time": "2026-03-18T14:34:39.099576Z" + "end_time": "2026-03-18T14:56:26.438414Z", + "start_time": "2026-03-18T14:56:26.432214Z" } }, "source": [ @@ -207,8 +207,8 @@ "id": "v1-constraint-broadcast", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.119915Z", - "start_time": "2026-03-18T14:34:39.108496Z" + "end_time": "2026-03-18T14:56:26.460657Z", + "start_time": "2026-03-18T14:56:26.445807Z" } }, "source": [ @@ -232,8 +232,8 @@ "id": "v1-mismatch-expr", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.130986Z", - "start_time": "2026-03-18T14:34:39.123779Z" + "end_time": "2026-03-18T14:56:26.475771Z", + "start_time": "2026-03-18T14:56:26.468918Z" } }, "source": [ @@ -254,8 +254,8 @@ "id": "v1-mismatch-constant", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.137583Z", - "start_time": "2026-03-18T14:34:39.134202Z" + "end_time": "2026-03-18T14:56:26.496761Z", + "start_time": "2026-03-18T14:56:26.490520Z" } }, "source": [ @@ -274,8 +274,8 @@ "id": "v1-mismatch-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.148566Z", - "start_time": "2026-03-18T14:34:39.144984Z" + "end_time": "2026-03-18T14:56:26.519721Z", + "start_time": "2026-03-18T14:56:26.516119Z" } }, "source": [ @@ -308,8 +308,8 @@ "id": "v1-sel-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.164561Z", - "start_time": "2026-03-18T14:34:39.154727Z" + "end_time": "2026-03-18T14:56:26.536876Z", + "start_time": "2026-03-18T14:56:26.527266Z" } }, "source": [ @@ -333,8 +333,8 @@ "id": "v1-join-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.181064Z", - "start_time": "2026-03-18T14:34:39.171966Z" + "end_time": "2026-03-18T14:56:26.550680Z", + "start_time": "2026-03-18T14:56:26.542068Z" } }, "source": [ @@ -348,13 +348,11 @@ "id": "v1-join-outer", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.193303Z", - "start_time": "2026-03-18T14:34:39.184848Z" + "end_time": "2026-03-18T14:56:26.563433Z", + "start_time": "2026-03-18T14:56:26.556410Z" } }, - "source": [ - "x.mul(partial, join=\"left\") # keep x's coords; missing factor positions become absent" - ], + "source": "x.mul(partial, join=\"left\", fill_value=0) # keep x's coords, fill missing factor with 0", "outputs": [], "execution_count": null }, @@ -373,8 +371,8 @@ "id": "v1-assign-coords-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.206073Z", - "start_time": "2026-03-18T14:34:39.196659Z" + "end_time": "2026-03-18T14:56:26.576391Z", + "start_time": "2026-03-18T14:56:26.566996Z" } }, "source": [ @@ -399,8 +397,8 @@ "id": "v1-align-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.219399Z", - "start_time": "2026-03-18T14:34:39.210230Z" + "end_time": "2026-03-18T14:56:26.589586Z", + "start_time": "2026-03-18T14:56:26.580494Z" } }, "source": [ @@ -433,8 +431,8 @@ "id": "p3a6v5kx6es", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.252483Z", - "start_time": "2026-03-18T14:34:39.228667Z" + "end_time": "2026-03-18T14:56:26.614440Z", + "start_time": "2026-03-18T14:56:26.594136Z" } }, "source": [ @@ -488,8 +486,8 @@ "id": "legacy-switch", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.262226Z", - "start_time": "2026-03-18T14:34:39.260387Z" + "end_time": "2026-03-18T14:56:26.618494Z", + "start_time": "2026-03-18T14:56:26.617133Z" } }, "source": [ @@ -506,8 +504,8 @@ "id": "legacy-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.270153Z", - "start_time": "2026-03-18T14:34:39.266223Z" + "end_time": "2026-03-18T14:56:26.629119Z", + "start_time": "2026-03-18T14:56:26.624906Z" } }, "source": [ @@ -538,8 +536,8 @@ "id": "legacy-subset", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.284452Z", - "start_time": "2026-03-18T14:34:39.276080Z" + "end_time": "2026-03-18T14:56:26.641157Z", + "start_time": "2026-03-18T14:56:26.632415Z" } }, "source": [ @@ -554,8 +552,8 @@ "id": "legacy-same-size", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.301725Z", - "start_time": "2026-03-18T14:34:39.292060Z" + "end_time": "2026-03-18T14:56:26.653480Z", + "start_time": "2026-03-18T14:56:26.645172Z" } }, "source": [ @@ -584,8 +582,8 @@ "id": "legacy-nan-fill", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.310943Z", - "start_time": "2026-03-18T14:34:39.305097Z" + "end_time": "2026-03-18T14:56:26.663322Z", + "start_time": "2026-03-18T14:56:26.658106Z" } }, "source": [ @@ -611,8 +609,8 @@ "id": "legacy-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.323117Z", - "start_time": "2026-03-18T14:34:39.316143Z" + "end_time": "2026-03-18T14:56:26.674201Z", + "start_time": "2026-03-18T14:56:26.668133Z" } }, "source": [ @@ -628,8 +626,8 @@ "id": "legacy-restore-v1", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.329262Z", - "start_time": "2026-03-18T14:34:39.327680Z" + "end_time": "2026-03-18T14:56:26.688194Z", + "start_time": "2026-03-18T14:56:26.686792Z" } }, "source": [ @@ -651,8 +649,8 @@ "id": "join-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.339568Z", - "start_time": "2026-03-18T14:34:39.334733Z" + "end_time": "2026-03-18T14:56:26.706424Z", + "start_time": "2026-03-18T14:56:26.702247Z" } }, "source": [ @@ -672,8 +670,8 @@ "id": "join-inner", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.352078Z", - "start_time": "2026-03-18T14:34:39.343332Z" + "end_time": "2026-03-18T14:56:26.718732Z", + "start_time": "2026-03-18T14:56:26.709019Z" } }, "source": [ @@ -688,8 +686,8 @@ "id": "join-outer", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.364865Z", - "start_time": "2026-03-18T14:34:39.355179Z" + "end_time": "2026-03-18T14:56:26.732665Z", + "start_time": "2026-03-18T14:56:26.722208Z" } }, "source": [ @@ -704,8 +702,8 @@ "id": "join-left", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.383599Z", - "start_time": "2026-03-18T14:34:39.374361Z" + "end_time": "2026-03-18T14:56:26.756112Z", + "start_time": "2026-03-18T14:56:26.746555Z" } }, "source": [ @@ -720,8 +718,8 @@ "id": "join-right", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.395480Z", - "start_time": "2026-03-18T14:34:39.386776Z" + "end_time": "2026-03-18T14:56:26.781891Z", + "start_time": "2026-03-18T14:56:26.771923Z" } }, "source": [ @@ -736,8 +734,8 @@ "id": "join-override", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.409201Z", - "start_time": "2026-03-18T14:34:39.400520Z" + "end_time": "2026-03-18T14:56:26.793874Z", + "start_time": "2026-03-18T14:56:26.785830Z" } }, "source": [ @@ -760,8 +758,8 @@ "id": "join-mul-example", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.418196Z", - "start_time": "2026-03-18T14:34:39.411921Z" + "end_time": "2026-03-18T14:56:26.804553Z", + "start_time": "2026-03-18T14:56:26.798349Z" } }, "source": [ @@ -778,8 +776,8 @@ "id": "join-constraint", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.430153Z", - "start_time": "2026-03-18T14:34:39.424328Z" + "end_time": "2026-03-18T14:56:26.816201Z", + "start_time": "2026-03-18T14:56:26.810482Z" } }, "source": [ @@ -813,8 +811,8 @@ "id": "practical-setup", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.439625Z", - "start_time": "2026-03-18T14:34:39.435610Z" + "end_time": "2026-03-18T14:56:26.832787Z", + "start_time": "2026-03-18T14:56:26.828682Z" } }, "source": [ @@ -833,8 +831,8 @@ "id": "practical-capacity", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.456108Z", - "start_time": "2026-03-18T14:34:39.444200Z" + "end_time": "2026-03-18T14:56:26.857454Z", + "start_time": "2026-03-18T14:56:26.844827Z" } }, "source": [ @@ -850,8 +848,8 @@ "id": "practical-solar", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.469841Z", - "start_time": "2026-03-18T14:34:39.459321Z" + "end_time": "2026-03-18T14:56:26.878730Z", + "start_time": "2026-03-18T14:56:26.868205Z" } }, "source": [ @@ -871,8 +869,8 @@ "id": "practical-peak", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:34:39.485345Z", - "start_time": "2026-03-18T14:34:39.472969Z" + "end_time": "2026-03-18T14:56:26.895908Z", + "start_time": "2026-03-18T14:56:26.882821Z" } }, "source": [ diff --git a/examples/mixed-coordinate-arithmetic.ipynb b/examples/mixed-coordinate-arithmetic.ipynb index 1a201e97..af49794e 100644 --- a/examples/mixed-coordinate-arithmetic.ipynb +++ b/examples/mixed-coordinate-arithmetic.ipynb @@ -8,22 +8,22 @@ }, { "cell_type": "code", - "execution_count": null, "id": "vnmxvu41lk", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.667374Z", - "start_time": "2026-03-18T14:21:27.664771Z" + "end_time": "2026-03-18T15:11:58.624527Z", + "start_time": "2026-03-18T15:11:58.002434Z" } }, - "outputs": [], "source": [ "import xarray as xr\n", "\n", "import linopy\n", "\n", "linopy.options[\"arithmetic_convention\"] = \"v1\"" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -33,15 +33,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "3fe7y8gn5a2", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.742431Z", - "start_time": "2026-03-18T14:21:27.719644Z" + "end_time": "2026-03-18T15:11:58.665108Z", + "start_time": "2026-03-18T15:11:58.627517Z" } }, - "outputs": [], "source": [ "m = linopy.Model()\n", "\n", @@ -57,7 +55,9 @@ "cost_a = xr.DataArray([7, 9, float(\"nan\")], coords=[(\"tech\", tech_all)])\n", "cost_b = xr.DataArray([float(\"nan\"), float(\"nan\"), 11], coords=[(\"tech\", tech_all)])\n", "cost_c = xr.DataArray([13, 17, 19], coords=[(\"tech\", tech_all)])" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -67,15 +67,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "biw39h6a1e", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.788816Z", - "start_time": "2026-03-18T14:21:27.756276Z" + "end_time": "2026-03-18T15:11:58.688387Z", + "start_time": "2026-03-18T15:11:58.667722Z" } }, - "outputs": [], "source": [ "combined = (\n", " cap_a.mul(cost_a.fillna(0), join=\"left\")\n", @@ -83,7 +81,9 @@ " .add(cap_c.mul(cost_c, join=\"left\"), join=\"outer\")\n", ")\n", "combined" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -99,15 +99,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "hb8n0uzb1u", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.818344Z", - "start_time": "2026-03-18T14:21:27.798495Z" + "end_time": "2026-03-18T15:11:58.726974Z", + "start_time": "2026-03-18T15:11:58.698597Z" } }, - "outputs": [], "source": [ "combined_v2 = (\n", " cap_a.mul(cost_a.dropna(\"tech\"), join=\"left\")\n", @@ -115,7 +113,9 @@ " .add(cap_c.mul(cost_c, join=\"left\"), join=\"outer\")\n", ")\n", "combined_v2" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -125,15 +125,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "311s75nab7q", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.862196Z", - "start_time": "2026-03-18T14:21:27.829301Z" + "end_time": "2026-03-18T15:11:58.753074Z", + "start_time": "2026-03-18T15:11:58.732344Z" } }, - "outputs": [], "source": [ "# Costs scoped to each variable's technologies — no NaN needed\n", "cost_a_scoped = xr.DataArray([7, 9], coords=[(\"tech\", tech_a)])\n", @@ -146,7 +144,9 @@ " .add(cap_c * cost_c_scoped, join=\"outer\")\n", ")\n", "combined_v3" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -156,15 +156,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "azddqkp858", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.902375Z", - "start_time": "2026-03-18T14:21:27.872779Z" + "end_time": "2026-03-18T15:11:58.782598Z", + "start_time": "2026-03-18T15:11:58.761871Z" } }, - "outputs": [], "source": [ "# Align all variables and costs to the union of tech coordinates\n", "cap_a_al, cap_b_al, cap_c_al, cost_a_al, cost_b_al, cost_c_al = linopy.align(\n", @@ -174,7 +172,9 @@ "# NaN in costs naturally masks — no fillna needed!\n", "combined_v4 = cap_a_al * cost_a_al + cap_b_al * cost_b_al + cap_c_al * cost_c_al\n", "combined_v4" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -184,18 +184,18 @@ }, { "cell_type": "code", - "execution_count": null, "id": "p92dqoyi8d", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.914837Z", - "start_time": "2026-03-18T14:21:27.911829Z" + "end_time": "2026-03-18T15:11:58.788715Z", + "start_time": "2026-03-18T15:11:58.787060Z" } }, - "outputs": [], "source": [ "rate = xr.DataArray([1.04], coords=[(\"tech\", [\"gas\"])])" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -205,26 +205,16 @@ }, { "cell_type": "code", - "execution_count": null, "id": "8pw3s5xra62", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:27.997888Z", - "start_time": "2026-03-18T14:21:27.976186Z" + "end_time": "2026-03-18T15:14:37.549415Z", + "start_time": "2026-03-18T15:14:37.516481Z" } }, + "source": "combined_rate_a = (\n cap_a.mul(cost_a.fillna(0), join=\"left\")\n .add(cap_b.mul(cost_b.fillna(0), join=\"left\"), join=\"outer\")\n .add(\n cap_c.mul(cost_c, join=\"left\").mul(rate, join=\"left\", fill_value=1),\n join=\"outer\",\n )\n)\ncombined_rate_a", "outputs": [], - "source": [ - "combined_rate_a = (\n", - " cap_a.mul(cost_a.fillna(0), join=\"left\")\n", - " .add(cap_b.mul(cost_b.fillna(0), join=\"left\"), join=\"outer\")\n", - " .add(\n", - " cap_c.mul(cost_c, join=\"left\").mul(rate, join=\"left\", fill_value=1),\n", - " join=\"outer\",\n", - " )\n", - ")\n", - "combined_rate_a" - ] + "execution_count": null }, { "cell_type": "markdown", @@ -234,15 +224,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "rtyit39tuj", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:28.079495Z", - "start_time": "2026-03-18T14:21:28.007888Z" + "end_time": "2026-03-18T15:11:58.835196Z", + "start_time": "2026-03-18T15:11:58.815621Z" } }, - "outputs": [], "source": [ "# Extend rate to all techs (fill with 1 = no scaling), then multiply with cost\n", "cost_c_rated = cost_c * rate.reindex(tech=tech_all).fillna(1)\n", @@ -254,7 +242,9 @@ " .add(cap_c * cost_c_rated, join=\"outer\")\n", ")\n", "combined_rate_b" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -264,15 +254,13 @@ }, { "cell_type": "code", - "execution_count": null, "id": "tymb0e9grj", "metadata": { "ExecuteTime": { - "end_time": "2026-03-18T14:21:28.087846Z", - "start_time": "2026-03-18T13:41:33.345728Z" + "end_time": "2026-03-18T15:11:58.857705Z", + "start_time": "2026-03-18T15:11:58.838815Z" } }, - "outputs": [], "source": [ "# NaN in cost_a at \"gas\" naturally masks cap_a at \"gas\" — no fillna needed!\n", "combined_nan_mask = (\n", @@ -281,7 +269,9 @@ " .add(cap_c * cost_c, join=\"outer\")\n", ")\n", "combined_nan_mask" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/linopy/expressions.py b/linopy/expressions.py index 9c6b5fdb..c2b47dc9 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -669,6 +669,7 @@ def _apply_constant_op( op: Callable[[DataArray, DataArray], DataArray], fill_value: float, join: JoinOptions | None = None, + _user_fill_value: bool = True, ) -> GenericExpression: is_legacy = ( join is None and options["arithmetic_convention"] == "legacy" @@ -683,6 +684,8 @@ def _apply_constant_op( scalar = DataArray(other) return self.assign(coeffs=op(coeffs, scalar), const=op(const, scalar)) factor = as_dataarray(other, coords=self.coords, dims=self.coord_dims) + # Track which positions already have NaN before alignment + pre_align_nan = factor.isnull() if not is_legacy else None self_const, factor, needs_data_reindex = self._align_constant( factor, fill_value=fill_value, join=join ) @@ -691,7 +694,23 @@ def _apply_constant_op( self_const = self_const.fillna(0) # In v1, NaN in factor acts as a mask: positions where factor is NaN # become fully absent slots (vars=-1, coeffs=NaN, const=NaN). + # But NaN *introduced by alignment* (not in the original data) requires + # an explicit fill_value — otherwise it's ambiguous whether the user + # wants masking (fill_value=0) or identity (fill_value=1). nan_mask = factor.isnull() if not is_legacy else None + if nan_mask is not None and nan_mask.any() and not _user_fill_value: + # Positions not in the original factor were not NaN — they + # didn't exist. Fill with False so they count as alignment-introduced. + alignment_nan = nan_mask & ~pre_align_nan.reindex_like( + nan_mask, fill_value=False + ) + if alignment_nan.any(): + raise ValueError( + "Factor has NaN after coordinate alignment. This is " + "ambiguous for mul/div — pass fill_value= explicitly:\n" + " .mul(other, join=..., fill_value=0) # NaN → 0 (kill term)\n" + " .mul(other, join=..., fill_value=1) # NaN → 1 (no scaling)" + ) if needs_data_reindex: fv = {**self._fill_value, "const": 0} data = self.data.reindex_like(self_const, fill_value=fv) @@ -725,11 +744,16 @@ def _multiply_by_constant( join: JoinOptions | None = None, fill_value: float | None = None, ) -> GenericExpression: + user_specified = fill_value is not None if fill_value is None: is_legacy = options["arithmetic_convention"] == "legacy" or join == "legacy" fill_value = 0 if is_legacy else np.nan return self._apply_constant_op( - other, operator.mul, fill_value=fill_value, join=join + other, + operator.mul, + fill_value=fill_value, + join=join, + _user_fill_value=user_specified, ) def _divide_by_constant( @@ -738,11 +762,16 @@ def _divide_by_constant( join: JoinOptions | None = None, fill_value: float | None = None, ) -> GenericExpression: + user_specified = fill_value is not None if fill_value is None: is_legacy = options["arithmetic_convention"] == "legacy" or join == "legacy" fill_value = 1 if is_legacy else np.nan return self._apply_constant_op( - other, operator.truediv, fill_value=fill_value, join=join + other, + operator.truediv, + fill_value=fill_value, + join=join, + _user_fill_value=user_specified, ) def __div__(self: GenericExpression, other: SideLike) -> GenericExpression: diff --git a/test/test_algebraic_properties.py b/test/test_algebraic_properties.py index 780ae2a0..ade944fa 100644 --- a/test/test_algebraic_properties.py +++ b/test/test_algebraic_properties.py @@ -505,14 +505,12 @@ def test_add_without_fill_value_still_revives(self, x: Variable) -> None: assert result.const.values[0] == 5 @pytest.mark.v1_only - def test_mul_misaligned_da_masks_without_fill_value(self, x: Variable) -> None: - """In v1, mul with misaligned DataArray masks terms where factor is NaN.""" + def test_mul_misaligned_da_raises_without_fill_value(self, x: Variable) -> None: + """In v1, mul with alignment-introduced NaN raises without fill_value.""" expr = 1 * x da = xr.DataArray([2.0], dims="time", coords={"time": [1]}) - result = expr.mul(da, join="left") - # time=0 has NaN factor → absent; time=1 has factor 2 → coeff 2 - assert result.isnull().sel(time=0).item() - assert result.coeffs.squeeze().sel(time=1).item() == pytest.approx(2.0) + with pytest.raises(ValueError, match="fill_value"): + expr.mul(da, join="left") @pytest.mark.v1_only def test_mul_misaligned_da_with_fill_value(self, x: Variable) -> None: @@ -523,14 +521,12 @@ def test_mul_misaligned_da_with_fill_value(self, x: Variable) -> None: assert not np.isnan(result.coeffs.values).all() @pytest.mark.v1_only - def test_div_misaligned_da_masks_without_fill_value(self, x: Variable) -> None: - """In v1, div with misaligned DataArray masks terms where divisor is NaN.""" + def test_div_misaligned_da_raises_without_fill_value(self, x: Variable) -> None: + """In v1, div with alignment-introduced NaN raises without fill_value.""" expr = 1 * x da = xr.DataArray([2.0], dims="time", coords={"time": [1]}) - result = expr.div(da, join="left") - # time=0 has NaN divisor → absent; time=1 has divisor 2 → coeff 0.5 - assert result.isnull().sel(time=0).item() - assert result.coeffs.squeeze().sel(time=1).item() == pytest.approx(0.5) + with pytest.raises(ValueError, match="fill_value"): + expr.div(da, join="left") @pytest.mark.v1_only def test_div_misaligned_da_with_fill_value(self, x: Variable) -> None: diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 618045c0..75448e12 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -766,14 +766,13 @@ def test_subset_sub_var_raises(self, v: Variable, subset: xr.DataArray) -> None: @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) - def test_mul_subset_join_left_masks( + def test_mul_subset_join_left_raises( self, v: Variable, subset: xr.DataArray, operand: str ) -> None: - """In v1, join='left' with subset masks where factor is NaN.""" + """In v1, join='left' with subset raises without fill_value.""" target = v if operand == "var" else 1 * v - result = target.mul(subset, join="left") - # Positions not covered by subset are masked (absent) - assert result.isnull().any() + with pytest.raises(ValueError, match="fill_value"): + target.mul(subset, join="left") @pytest.mark.v1_only @pytest.mark.parametrize("operand", ["var", "expr"]) @@ -2484,15 +2483,11 @@ def test_mul_constant_join_outer(self, a: Variable) -> None: assert result.coeffs.sel(i=2).item() == 3 @pytest.mark.v1_only - def test_mul_constant_join_outer_masks(self, a: Variable) -> None: - """In v1, outer join with misaligned factor masks where factor is NaN.""" + def test_mul_constant_join_outer_raises(self, a: Variable) -> None: + """In v1, outer join with misaligned factor raises without fill_value.""" const = xr.DataArray([2, 3, 4], dims=["i"], coords={"i": [1, 2, 3]}) - result = a.to_linexpr().mul(const, join="outer") - assert list(result.data.indexes["i"]) == [0, 1, 2, 3] - # i=0 not in const → NaN factor → absent; i=3 not in a → absent - assert result.isnull().sel(i=0).item() - assert result.coeffs.sel(i=1).item() == 2 - assert result.coeffs.sel(i=2).item() == 3 + with pytest.raises(ValueError, match="fill_value"): + a.to_linexpr().mul(const, join="outer") @pytest.mark.v1_only def test_mul_constant_join_outer_with_fill_value(self, a: Variable) -> None: @@ -2529,14 +2524,11 @@ def test_div_constant_join_outer(self, a: Variable) -> None: assert list(result.data.indexes["i"]) == [0, 1, 2, 3] @pytest.mark.v1_only - def test_div_constant_join_outer_masks(self, a: Variable) -> None: - """In v1, outer join with misaligned divisor masks where divisor is NaN.""" + def test_div_constant_join_outer_raises(self, a: Variable) -> None: + """In v1, outer join with misaligned divisor raises without fill_value.""" const = xr.DataArray([2, 3, 4], dims=["i"], coords={"i": [1, 2, 3]}) - result = a.to_linexpr().div(const, join="outer") - assert list(result.data.indexes["i"]) == [0, 1, 2, 3] - # i=0 not in const → NaN divisor → absent - assert result.isnull().sel(i=0).item() - assert result.coeffs.sel(i=1).item() == pytest.approx(0.5) + with pytest.raises(ValueError, match="fill_value"): + a.to_linexpr().div(const, join="outer") @pytest.mark.v1_only def test_div_constant_join_outer_with_fill_value(self, a: Variable) -> None: @@ -2694,17 +2686,11 @@ def test_mul_constant_outer_fill_values(self, a: Variable) -> None: assert result.coeffs.squeeze().sel(i=0).item() == 0 @pytest.mark.v1_only - def test_mul_constant_outer_masks_v1(self, a: Variable) -> None: + def test_mul_constant_outer_raises_v1(self, a: Variable) -> None: expr = 1 * a + 5 other = xr.DataArray([2, 3], dims=["i"], coords={"i": [1, 3]}) - result = expr.mul(other, join="outer") - assert set(result.coords["i"].values) == {0, 1, 2, 3} - # i=1 has factor 2 → coeff 2, const 10 - assert result.coeffs.squeeze().sel(i=1).item() == 2 - assert result.const.sel(i=1).item() == 10 - # i=0, i=2 have NaN factor → absent - assert result.isnull().sel(i=0).item() - assert result.isnull().sel(i=2).item() + with pytest.raises(ValueError, match="fill_value"): + expr.mul(other, join="outer") @pytest.mark.v1_only def test_mul_constant_outer_with_fill_value_v1(self, a: Variable) -> None: @@ -2738,17 +2724,11 @@ def test_div_constant_outer_fill_values(self, a: Variable) -> None: assert result.coeffs.squeeze().sel(i=0).item() == pytest.approx(1.0) @pytest.mark.v1_only - def test_div_constant_outer_masks_v1(self, a: Variable) -> None: + def test_div_constant_outer_raises_v1(self, a: Variable) -> None: expr = 1 * a + 10 other = xr.DataArray([2.0, 5.0], dims=["i"], coords={"i": [1, 3]}) - result = expr.div(other, join="outer") - assert set(result.coords["i"].values) == {0, 1, 2, 3} - # i=1 has divisor 2 → coeff 0.5, const 5 - assert result.coeffs.squeeze().sel(i=1).item() == pytest.approx(0.5) - assert result.const.sel(i=1).item() == pytest.approx(5.0) - # i=0, i=2 have NaN divisor → absent - assert result.isnull().sel(i=0).item() - assert result.isnull().sel(i=2).item() + with pytest.raises(ValueError, match="fill_value"): + expr.div(other, join="outer") @pytest.mark.v1_only def test_div_constant_outer_with_fill_value_v1(self, a: Variable) -> None: