devitocodes · ggorman · Feb 19, 2026 · FabioLuporini · Feb 20, 2026 · FabioLuporini
diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py
@@ -647,14 +647,47 @@
             loop_inc = f'{o.index} += {o.limits[2]}'
 
         # Append unbounded indices, if any
-        if o.uindices:
-            uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}' for i in o.uindices]
+        if o.uindices and o.pragmas:
+            # When pragmas are present (e.g., OpenMP parallel for), the
+            # for-loop header must be in canonical form: a single loop
+            # variable in init/cond/incr. Compute uindex values inside
+            # the loop body instead, where they are automatically
+            # thread-private
+            uinit_stmts = []
+            for i in o.uindices:
+                if i.is_Modulo:
+                    # Modulo dimensions: symbolic_incr computes the
+                    # value from the parent dimension (the loop variable)
+                    value = i.symbolic_incr
+                else:
+                    # IncrDimension: derive value from loop variable
+                    # position relative to loop start
+                    if o.direction == Backward:
+                        n_iters = _max - o.dim
+                    else:
+                        n_iters = o.dim - _min
+                    _step = o.limits[2]
+                    if _step != 1:
+                        n_iters = n_iters / _step
+                    if i.symbolic_incr != 1:
+                        value = i.symbolic_min + n_iters * i.symbolic_incr
+                    else:
+                        value = i.symbolic_min + n_iters
+                uinit_stmts.append(
+                    c.Statement(f'int {i.name} = {self.ccode(value)}')
+                )
+            body = list(uinit_stmts) + list(body)
+        elif o.uindices:
+            uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}'
+                     for i in o.uindices]
             loop_init = c.Line(', '.join([loop_init] + uinit))
 
             ustep = []
             for i in o.uindices:
                 op = '=' if i.is_Modulo else '+='
-                ustep.append(f'{i.name} {op} {self.ccode(i.symbolic_incr)}')
+                ustep.append(
+                    f'{i.name} {op} {self.ccode(i.symbolic_incr)}'
+                )
             loop_inc = c.Line(', '.join([loop_inc] + ustep))
 
         # Create For header+body

diff --git a/devito/types/dimension.py b/devito/types/dimension.py
@@ -1344,7 +1344,11 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs):
         elif isinstance(self.parent, BlockDimension):
             # `self` is a BlockDimension within an outer BlockDimension, but
             # no value supplied -> the sub-block will span the entire block
-            return {name: args[self.parent.step.name]}
+            try:
+                return {name: args[self.parent.step.name]}
+            except AttributeError:
+                # Parent's step is a literal (e.g. Integer), use it directly
+                return {name: int(self.parent.step)}
         else:
             # TODO": Check the args for space order and apply heuristics (e.g.,
             # `2*space_order`?) for even better block sizes
@@ -1365,12 +1369,18 @@ def _arg_check(self, args, *_args):
         value = args[name]
         if isinstance(self.parent, BlockDimension):
             # sub-BlockDimensions must be perfect divisors of their parent
-            parent_value = args[self.parent.step.name]
+            try:
+                parent_step_name = self.parent.step.name
+                parent_value = args[parent_step_name]
+            except AttributeError:
+                # Parent's step is a literal (e.g. Integer)
+                parent_step_name = str(self.parent.step)
+                parent_value = int(self.parent.step)
             if parent_value % value > 0:
                 raise InvalidArgument(
                     f'Illegal block size `{name}={value}`: sub-block sizes '
                     'must divide the parent block size evenly '
-                    f'(`{self.parent.step.name}={parent_value}`)'
+                    f'(`{parent_step_name}={parent_value}`)'
                 )
         else:
             if value < 0:

diff --git a/tests/test_dle.py b/tests/test_dle.py
@@ -109,6 +109,35 @@ def test_cache_blocking_structure(blockinner, openmp, expected):
         assert 'omp for' in trees[0][1].pragmas[0].ccode.value
 
 
+@skipif('noomp')
+@pytest.mark.parametrize("blockinner", [False, True])
+def test_cache_blocking_openmp_execution(blockinner):
+    """
+    Verify that blockinner + OpenMP actually compiles and produces
+    correct results. This catches canonical-form violations in the
+    generated for-loop headers (e.g. uindices in the init clause).
+    """
+    grid = Grid(shape=(12, 12, 12), dtype=np.float64)
+
+    u = TimeFunction(name='u', grid=grid, space_order=4)
+    v = TimeFunction(name='v', grid=grid, space_order=4)
+
+    eqns = [Eq(u.forward, u.laplace + 1.)]
+
+    op0 = Operator(eqns, opt='noop')
+    op1 = Operator(eqns, opt=('advanced', {'openmp': True,
+                                            'blockinner': blockinner,
+                                            'par-collapse-ncores': 1}))
+
+    u.data[:] = 0.1
+    op0(time_M=5)
+
+    v.data[:] = 0.1
+    op1(u=v, time_M=5)
+
+    assert np.allclose(u.data, v.data, rtol=1e-12)
+
+
 def test_cache_blocking_structure_subdims():
     """
     Test that:
@@ -514,6 +543,34 @@ def test_cache_blocking_hierarchical(blockshape0, blockshape1, exception):
         raise AssertionError('Assert False') from e
 
 
+@pytest.mark.parametrize("blockinner", [False, True])
+def test_cache_blocking_hierarchical_high_order(blockinner):
+    """
+    Verify that blocklevels=2 works correctly with high space_order
+    stencils. High-order stencils produce literal Integer step sizes
+    in BlockDimension._arg_values, exercising a different code path
+    than low-order stencils.
+    """
+    grid = Grid(shape=(45, 45, 45), dtype=np.float64)
+
+    u = TimeFunction(name='u', grid=grid, time_order=1, space_order=8)
+    v = TimeFunction(name='v', grid=grid, time_order=1, space_order=8)
+
+    # Use a small coefficient to keep the stencil stable
+    eqns = [Eq(u.forward, u + 1e-4 * u.laplace)]
+
+    op0 = Operator(eqns, opt='noop')
+    op1 = Operator(eqns, opt=('advanced', {'blockinner': blockinner,
+                                            'blocklevels': 2}))
+
+    u.data[0, :] = np.linspace(-1, 1, 45**3).reshape((45, 45, 45))
+    v.data[0, :] = u.data[0, :]
+    op0(u=u, time_M=10)
+    op1(u=v, time_M=10)
+
+    assert np.allclose(u.data, v.data, rtol=1e-12)
+
+
 @pytest.mark.parametrize("blockinner", [False, True])
 def test_cache_blocking_imperfect_nest(blockinner):
     """