-
Notifications
You must be signed in to change notification settings - Fork 248
Fix blockinner+OpenMP and blocklevels=2 with high-order stencils #2856
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -647,14 +647,47 @@ | |
| loop_inc = f'{o.index} += {o.limits[2]}' | ||
|
|
||
| # Append unbounded indices, if any | ||
| if o.uindices: | ||
| uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}' for i in o.uindices] | ||
| if o.uindices and o.pragmas: | ||
| # When pragmas are present (e.g., OpenMP parallel for), the | ||
| # for-loop header must be in canonical form: a single loop | ||
| # variable in init/cond/incr. Compute uindex values inside | ||
| # the loop body instead, where they are automatically | ||
| # thread-private | ||
| uinit_stmts = [] | ||
| for i in o.uindices: | ||
| if i.is_Modulo: | ||
| # Modulo dimensions: symbolic_incr computes the | ||
| # value from the parent dimension (the loop variable) | ||
| value = i.symbolic_incr | ||
| else: | ||
| # IncrDimension: derive value from loop variable | ||
| # position relative to loop start | ||
| if o.direction == Backward: | ||
| n_iters = _max - o.dim | ||
| else: | ||
| n_iters = o.dim - _min | ||
|
Check failure on line 668 in devito/ir/iet/visitors.py
|
||
| _step = o.limits[2] | ||
| if _step != 1: | ||
| n_iters = n_iters / _step | ||
| if i.symbolic_incr != 1: | ||
| value = i.symbolic_min + n_iters * i.symbolic_incr | ||
| else: | ||
| value = i.symbolic_min + n_iters | ||
| uinit_stmts.append( | ||
| c.Statement(f'int {i.name} = {self.ccode(value)}') | ||
| ) | ||
| body = list(uinit_stmts) + list(body) | ||
| elif o.uindices: | ||
| uinit = [f'{i.name} = {self.ccode(i.symbolic_min)}' | ||
| for i in o.uindices] | ||
| loop_init = c.Line(', '.join([loop_init] + uinit)) | ||
|
|
||
| ustep = [] | ||
| for i in o.uindices: | ||
| op = '=' if i.is_Modulo else '+=' | ||
| ustep.append(f'{i.name} {op} {self.ccode(i.symbolic_incr)}') | ||
| ustep.append( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why change the layout here? |
||
| f'{i.name} {op} {self.ccode(i.symbolic_incr)}' | ||
| ) | ||
| loop_inc = c.Line(', '.join([loop_inc] + ustep)) | ||
|
|
||
| # Create For header+body | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1344,7 +1344,11 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs): | |
| elif isinstance(self.parent, BlockDimension): | ||
| # `self` is a BlockDimension within an outer BlockDimension, but | ||
| # no value supplied -> the sub-block will span the entire block | ||
| return {name: args[self.parent.step.name]} | ||
| try: | ||
| return {name: args[self.parent.step.name]} | ||
| except AttributeError: | ||
| # Parent's step is a literal (e.g. Integer), use it directly | ||
| return {name: int(self.parent.step)} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? |
||
| else: | ||
| # TODO": Check the args for space order and apply heuristics (e.g., | ||
| # `2*space_order`?) for even better block sizes | ||
|
|
@@ -1365,12 +1369,18 @@ def _arg_check(self, args, *_args): | |
| value = args[name] | ||
| if isinstance(self.parent, BlockDimension): | ||
| # sub-BlockDimensions must be perfect divisors of their parent | ||
| parent_value = args[self.parent.step.name] | ||
| try: | ||
| parent_step_name = self.parent.step.name | ||
| parent_value = args[parent_step_name] | ||
| except AttributeError: | ||
| # Parent's step is a literal (e.g. Integer) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? I think AI got pretty badly confused It feels like it's completely missing the multi-layer structure of the compiler for some reasons... since the CGen visitor that it attempted to fix above is at the very end of compilation, while these changes are for very high level objects... |
||
| parent_step_name = str(self.parent.step) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| parent_value = int(self.parent.step) | ||
| if parent_value % value > 0: | ||
| raise InvalidArgument( | ||
| f'Illegal block size `{name}={value}`: sub-block sizes ' | ||
| 'must divide the parent block size evenly ' | ||
| f'(`{self.parent.step.name}={parent_value}`)' | ||
| f'(`{parent_step_name}={parent_value}`)' | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change? |
||
| ) | ||
| else: | ||
| if value < 0: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,6 +109,35 @@ def test_cache_blocking_structure(blockinner, openmp, expected): | |
| assert 'omp for' in trees[0][1].pragmas[0].ccode.value | ||
|
|
||
|
|
||
| @skipif('noomp') | ||
| @pytest.mark.parametrize("blockinner", [False, True]) | ||
| def test_cache_blocking_openmp_execution(blockinner): | ||
| """ | ||
| Verify that blockinner + OpenMP actually compiles and produces | ||
| correct results. This catches canonical-form violations in the | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not convinced it does. This test should really inspect the generated code/operator to verify this. |
||
| generated for-loop headers (e.g. uindices in the init clause). | ||
| """ | ||
| grid = Grid(shape=(12, 12, 12), dtype=np.float64) | ||
|
|
||
| u = TimeFunction(name='u', grid=grid, space_order=4) | ||
| v = TimeFunction(name='v', grid=grid, space_order=4) | ||
|
|
||
| eqns = [Eq(u.forward, u.laplace + 1.)] | ||
|
|
||
| op0 = Operator(eqns, opt='noop') | ||
| op1 = Operator(eqns, opt=('advanced', {'openmp': True, | ||
| 'blockinner': blockinner, | ||
| 'par-collapse-ncores': 1})) | ||
|
|
||
| u.data[:] = 0.1 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unnecessary, as is the |
||
| op0(time_M=5) | ||
|
|
||
| v.data[:] = 0.1 | ||
| op1(u=v, time_M=5) | ||
|
|
||
| assert np.allclose(u.data, v.data, rtol=1e-12) | ||
|
|
||
|
|
||
| def test_cache_blocking_structure_subdims(): | ||
| """ | ||
| Test that: | ||
|
|
@@ -514,6 +543,34 @@ def test_cache_blocking_hierarchical(blockshape0, blockshape1, exception): | |
| raise AssertionError('Assert False') from e | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("blockinner", [False, True]) | ||
| def test_cache_blocking_hierarchical_high_order(blockinner): | ||
| """ | ||
| Verify that blocklevels=2 works correctly with high space_order | ||
| stencils. High-order stencils produce literal Integer step sizes | ||
| in BlockDimension._arg_values, exercising a different code path | ||
| than low-order stencils. | ||
| """ | ||
| grid = Grid(shape=(45, 45, 45), dtype=np.float64) | ||
|
|
||
| u = TimeFunction(name='u', grid=grid, time_order=1, space_order=8) | ||
| v = TimeFunction(name='v', grid=grid, time_order=1, space_order=8) | ||
|
|
||
| # Use a small coefficient to keep the stencil stable | ||
| eqns = [Eq(u.forward, u + 1e-4 * u.laplace)] | ||
|
|
||
| op0 = Operator(eqns, opt='noop') | ||
| op1 = Operator(eqns, opt=('advanced', {'blockinner': blockinner, | ||
| 'blocklevels': 2})) | ||
|
|
||
| u.data[0, :] = np.linspace(-1, 1, 45**3).reshape((45, 45, 45)) | ||
| v.data[0, :] = u.data[0, :] | ||
| op0(u=u, time_M=10) | ||
| op1(u=v, time_M=10) | ||
|
|
||
| assert np.allclose(u.data, v.data, rtol=1e-12) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("blockinner", [False, True]) | ||
| def test_cache_blocking_imperfect_nest(blockinner): | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand where this is coming from, it's like it's trying to add new logic, which is not the point of this PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean this whole block of code...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seconded