Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,14 +647,47 @@
loop_inc = f'{o.index} += {o.limits[2]}'

# Append unbounded indices, if any
if o.uindices:
uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}' for i in o.uindices]
if o.uindices and o.pragmas:
# When pragmas are present (e.g., OpenMP parallel for), the
# for-loop header must be in canonical form: a single loop
# variable in init/cond/incr. Compute uindex values inside
# the loop body instead, where they are automatically
# thread-private
uinit_stmts = []
for i in o.uindices:
if i.is_Modulo:
# Modulo dimensions: symbolic_incr computes the
# value from the parent dimension (the loop variable)
value = i.symbolic_incr
else:
# IncrDimension: derive value from loop variable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand where this is coming from, it's like it's trying to add new logic, which is not the point of this PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean this whole block of code...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seconded

# position relative to loop start
if o.direction == Backward:
n_iters = _max - o.dim
else:
n_iters = o.dim - _min

Check failure on line 668 in devito/ir/iet/visitors.py

View workflow job for this annotation

GitHub Actions / Lint the codebase

ruff (SIM108)

devito/ir/iet/visitors.py:665:21: SIM108 Use ternary operator `n_iters = _max - o.dim if o.direction == Backward else o.dim - _min` instead of `if`-`else`-block
_step = o.limits[2]
if _step != 1:
n_iters = n_iters / _step
if i.symbolic_incr != 1:
value = i.symbolic_min + n_iters * i.symbolic_incr
else:
value = i.symbolic_min + n_iters
uinit_stmts.append(
c.Statement(f'int {i.name} = {self.ccode(value)}')
)
body = list(uinit_stmts) + list(body)
elif o.uindices:
uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}'
for i in o.uindices]
loop_init = c.Line(', '.join([loop_init] + uinit))

ustep = []
for i in o.uindices:
op = '=' if i.is_Modulo else '+='
ustep.append(f'{i.name} {op} {self.ccode(i.symbolic_incr)}')
ustep.append(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change the layout here?

f'{i.name} {op} {self.ccode(i.symbolic_incr)}'
)
loop_inc = c.Line(', '.join([loop_inc] + ustep))

# Create For header+body
Expand Down
16 changes: 13 additions & 3 deletions devito/types/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,11 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs):
elif isinstance(self.parent, BlockDimension):
# `self` is a BlockDimension within an outer BlockDimension, but
# no value supplied -> the sub-block will span the entire block
return {name: args[self.parent.step.name]}
try:
return {name: args[self.parent.step.name]}
except AttributeError:
# Parent's step is a literal (e.g. Integer), use it directly
return {name: int(self.parent.step)}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change?

else:
# TODO": Check the args for space order and apply heuristics (e.g.,
# `2*space_order`?) for even better block sizes
Expand All @@ -1365,12 +1369,18 @@ def _arg_check(self, args, *_args):
value = args[name]
if isinstance(self.parent, BlockDimension):
# sub-BlockDimensions must be perfect divisors of their parent
parent_value = args[self.parent.step.name]
try:
parent_step_name = self.parent.step.name
parent_value = args[parent_step_name]
except AttributeError:
# Parent's step is a literal (e.g. Integer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

I think AI got pretty badly confused

It feels like it's completely missing the multi-layer structure of the compiler for some reasons... since the CGen visitor that it attempted to fix above is at the very end of compilation, while these changes are for very high level objects...

parent_step_name = str(self.parent.step)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parent_step_name is getting repeated a bunch and quite inconsistently for reasons that escape me

parent_value = int(self.parent.step)
if parent_value % value > 0:
raise InvalidArgument(
f'Illegal block size `{name}={value}`: sub-block sizes '
'must divide the parent block size evenly '
f'(`{self.parent.step.name}={parent_value}`)'
f'(`{parent_step_name}={parent_value}`)'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

)
else:
if value < 0:
Expand Down
57 changes: 57 additions & 0 deletions tests/test_dle.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,35 @@ def test_cache_blocking_structure(blockinner, openmp, expected):
assert 'omp for' in trees[0][1].pragmas[0].ccode.value


@skipif('noomp')
@pytest.mark.parametrize("blockinner", [False, True])
def test_cache_blocking_openmp_execution(blockinner):
"""
Verify that blockinner + OpenMP actually compiles and produces
correct results. This catches canonical-form violations in the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This catches canonical-form violations in the
    generated for-loop headers (e.g. uindices in the init clause).

I'm not convinced it does. This test should really inspect the generated code/operator to verify this.

generated for-loop headers (e.g. uindices in the init clause).
"""
grid = Grid(shape=(12, 12, 12), dtype=np.float64)

u = TimeFunction(name='u', grid=grid, space_order=4)
v = TimeFunction(name='v', grid=grid, space_order=4)

eqns = [Eq(u.forward, u.laplace + 1.)]

op0 = Operator(eqns, opt='noop')
op1 = Operator(eqns, opt=('advanced', {'openmp': True,
'blockinner': blockinner,
'par-collapse-ncores': 1}))

u.data[:] = 0.1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unnecessary, as is the v.data[:] = 0.1 below

op0(time_M=5)

v.data[:] = 0.1
op1(u=v, time_M=5)

assert np.allclose(u.data, v.data, rtol=1e-12)


def test_cache_blocking_structure_subdims():
"""
Test that:
Expand Down Expand Up @@ -514,6 +543,34 @@ def test_cache_blocking_hierarchical(blockshape0, blockshape1, exception):
raise AssertionError('Assert False') from e


@pytest.mark.parametrize("blockinner", [False, True])
def test_cache_blocking_hierarchical_high_order(blockinner):
"""
Verify that blocklevels=2 works correctly with high space_order
stencils. High-order stencils produce literal Integer step sizes
in BlockDimension._arg_values, exercising a different code path
than low-order stencils.
"""
grid = Grid(shape=(45, 45, 45), dtype=np.float64)

u = TimeFunction(name='u', grid=grid, time_order=1, space_order=8)
v = TimeFunction(name='v', grid=grid, time_order=1, space_order=8)

# Use a small coefficient to keep the stencil stable
eqns = [Eq(u.forward, u + 1e-4 * u.laplace)]

op0 = Operator(eqns, opt='noop')
op1 = Operator(eqns, opt=('advanced', {'blockinner': blockinner,
'blocklevels': 2}))

u.data[0, :] = np.linspace(-1, 1, 45**3).reshape((45, 45, 45))
v.data[0, :] = u.data[0, :]
op0(u=u, time_M=10)
op1(u=v, time_M=10)

assert np.allclose(u.data, v.data, rtol=1e-12)


@pytest.mark.parametrize("blockinner", [False, True])
def test_cache_blocking_imperfect_nest(blockinner):
"""
Expand Down
Loading