Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 3 additions & 18 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,8 @@ jobs:
container: ghcr.io/gridtools/gridtools-base:${{ matrix.compiler }}
strategy:
matrix:
compiler: [gcc-8, gcc-9, gcc-10, gcc-11, gcc-12, gcc-13, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16, clang-14-cuda-11, gcc-10-cuda-11.8, gcc-11-cuda-12.0, gcc-12-cuda-12.3, gcc-12-cuda-12.4, base-rocm-6.2.2, gcc-10-hpx, nvhpc-23.3, nvhpc-23.9]
build_type: [debug, release]
exclude:
- compiler: gcc-8
build_type: debug
- compiler: gcc-9
build_type: debug
- compiler: gcc-10
build_type: debug
- compiler: gcc-11
build_type: debug
- compiler: clang-13
build_type: debug
- compiler: clang-14
build_type: debug
- compiler: gcc-10-hpx
build_type: debug
compiler: [gcc-13, clang-16, clang-14-cuda-11, gcc-12-cuda-12.4, base-rocm-6.2.2, gcc-10-hpx, nvhpc-23.9]
build_type: [release]
steps:
- uses: actions/checkout@v2
- name: setup environment
Expand All @@ -38,7 +23,7 @@ jobs:
echo "OMP_NUM_THREADS=$(nproc)" >> $GITHUB_ENV
- name: build
run: |
python3 pyutils/driver.py -vv build -b ${{ matrix.build_type }} -o $(pwd)/build -i $(pwd)/install -t perftests
python3 pyutils/driver.py -vv build -b ${{ matrix.build_type }} -o $(pwd)/build -i $(pwd)/install
- name: run tests
# no GPUs available -> no tests (for nvhpc we could run cpu, but we currently don't expose that option in pyutils/driver.py)
if: (!contains(matrix.compiler, 'cuda') && !contains(matrix.compiler, 'rocm') && !contains(matrix.compiler, 'nvhpc'))
Expand Down
15 changes: 7 additions & 8 deletions include/gridtools/common/hymap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ namespace gridtools {
template <class...>
struct values;

#if !defined(__NVCC__) && defined(__clang__) && __clang_major__ <= 17
#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 12)
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This deduction-guide enablement drops the existing NVCC exclusion, even though the comment right below says NVCC fails CTAD for nested templates. As written, CUDA builds (where __NVCC__ is defined and the host compiler defines __GNUC__) will enable the deduction guide and can reintroduce the NVCC compilation failure. Consider adding !defined(__NVCC__) (or a more specific CUDA compiler guard) to this #if.

Suggested change
#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 12)
#if !defined(__NVCC__) && (defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 12))

Copilot uses AI. Check for mistakes.
template <class... Vs>
values(Vs const &...) -> values<Vs...>;
#endif
Expand All @@ -213,16 +213,18 @@ namespace gridtools {
template <class... Keys>
template <class... Vals>
struct keys<Keys...>::values {
#if !defined(__clang__) || __clang_major__ < 18
static_assert(sizeof...(Vals) == sizeof...(Keys), "invalid hymap");
#endif

tuple<Vals...> m_vals;

template <class... Args,
std::enable_if_t<std::conjunction_v<std::is_constructible<Vals, Args>...>, int> = 0>
std::enable_if_t<sizeof...(Args) == sizeof...(Vals) &&
std::conjunction_v<std::is_constructible<Vals, Args>...>,
int> = 0>
constexpr GT_FUNCTION values(Args &&...args) noexcept : m_vals{std::forward<Args>(args)...} {}

constexpr GT_FUNCTION values(Vals const &...args) noexcept : m_vals(args...) {}

constexpr GT_FUNCTION values(tuple<Vals...> &&args) noexcept : m_vals(std::move(args)) {}
constexpr GT_FUNCTION values(tuple<Vals...> const &args) noexcept : m_vals(args) {}

Expand All @@ -234,10 +236,7 @@ namespace gridtools {

template <class Src>
constexpr GT_FUNCTION
std::enable_if_t<((!std::is_same_v<values, std::decay_t<Src>> && is_hymap<std::decay_t<Src>>::value) &&
... &&
std::is_assignable_v<Vals &,
std::add_lvalue_reference_t<element_at<Keys, std::remove_reference_t<Src>>>>),
std::enable_if_t<!std::is_same_v<values, std::decay_t<Src>> && is_hymap<std::decay_t<Src>>::value,
values &>
operator=(Src &&src) {
(...,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace gridtools {
struct pow_f {
template <class Arg>
GT_FUNCTION constexpr auto operator()(Arg const &arg) const {
return gt_pow<I>::template apply(arg);
return gt_pow<I>::apply(arg);
}
};

Expand Down
16 changes: 8 additions & 8 deletions tests/regression/icosahedral/curl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ namespace {
run(spec,
stencil_backend_t(),
TypeParam ::make_grid(),
TypeParam ::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal),
TypeParam ::icosahedral_make_storage(edges(), repo.dual_edge_length),
TypeParam ::icosahedral_make_storage(edges(), repo.u),
TypeParam ::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal()),
TypeParam ::icosahedral_make_storage(edges(), repo.dual_edge_length()),
TypeParam ::icosahedral_make_storage(edges(), repo.u()),
out);
TypeParam::verify(repo.curl_u, out, eq<TypeParam>);
TypeParam::verify(repo.curl_u(), out, eq<TypeParam>);
}

GT_REGRESSION_TEST(curl_flow_convention, icosahedral_test_environment<2>, stencil_backend_t) {
Expand All @@ -54,10 +54,10 @@ namespace {
run_single_stage(curl_functor_flow_convention(),
stencil_backend_t(),
TypeParam ::make_grid(),
TypeParam ::icosahedral_make_storage(edges(), repo.u),
TypeParam ::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal),
TypeParam ::icosahedral_make_storage(edges(), repo.dual_edge_length),
TypeParam ::icosahedral_make_storage(edges(), repo.u()),
TypeParam ::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal()),
TypeParam ::icosahedral_make_storage(edges(), repo.dual_edge_length()),
out);
TypeParam ::verify(repo.curl_u, out, eq<TypeParam>);
TypeParam ::verify(repo.curl_u(), out, eq<TypeParam>);
}
} // namespace
16 changes: 8 additions & 8 deletions tests/regression/icosahedral/div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ namespace {
run(spec,
stencil_backend_t(),
TypeParam ::make_grid(),
TypeParam ::icosahedral_make_storage(edges(), repo.u),
TypeParam ::icosahedral_make_storage(edges(), repo.edge_length),
TypeParam ::icosahedral_make_storage(cells(), repo.cell_area_reciprocal),
TypeParam ::icosahedral_make_storage(edges(), repo.u()),
TypeParam ::icosahedral_make_storage(edges(), repo.edge_length()),
TypeParam ::icosahedral_make_storage(cells(), repo.cell_area_reciprocal()),
out);
TypeParam ::verify(repo.div_u, out);
TypeParam ::verify(repo.div_u(), out);
}

GT_REGRESSION_TEST(div_flow_convention, icosahedral_test_environment<2>, stencil_backend_t) {
Expand All @@ -47,10 +47,10 @@ namespace {
run_single_stage(div_functor_flow_convention_connectivity(),
stencil_backend_t(),
TypeParam::make_grid(),
TypeParam ::icosahedral_make_storage(edges(), repo.u),
TypeParam ::icosahedral_make_storage(edges(), repo.edge_length),
TypeParam ::icosahedral_make_storage(cells(), repo.cell_area_reciprocal),
TypeParam ::icosahedral_make_storage(edges(), repo.u()),
TypeParam ::icosahedral_make_storage(edges(), repo.edge_length()),
TypeParam ::icosahedral_make_storage(cells(), repo.cell_area_reciprocal()),
out);
TypeParam ::verify(repo.div_u, out);
TypeParam ::verify(repo.div_u(), out);
}
} // namespace
32 changes: 16 additions & 16 deletions tests/regression/icosahedral/lap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,15 @@ namespace {
run(spec,
stencil_backend_t(),
TypeParam::make_grid(),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length),
TypeParam::icosahedral_make_storage(cells(), repo.cell_area_reciprocal),
TypeParam::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length),
TypeParam::icosahedral_make_storage(edges(), repo.u),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length()),
TypeParam::icosahedral_make_storage(cells(), repo.cell_area_reciprocal()),
TypeParam::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal()),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length()),
TypeParam::icosahedral_make_storage(edges(), repo.u()),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length_reciprocal()),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length_reciprocal()),
out);
TypeParam::verify(TypeParam::icosahedral_make_storage(edges(), repo.lap), out);
TypeParam::verify(TypeParam::icosahedral_make_storage(edges(), repo.lap()), out);
}

GT_REGRESSION_TEST(lap_flow_convention, icosahedral_test_environment<2>, stencil_backend_t) {
Expand Down Expand Up @@ -125,14 +125,14 @@ namespace {
run(spec,
stencil_backend_t(),
TypeParam::make_grid(),
TypeParam::icosahedral_make_storage(edges(), repo.u),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length),
TypeParam::icosahedral_make_storage(cells(), repo.cell_area_reciprocal),
TypeParam::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length_reciprocal),
TypeParam::icosahedral_make_storage(edges(), repo.u()),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length()),
TypeParam::icosahedral_make_storage(cells(), repo.cell_area_reciprocal()),
TypeParam::icosahedral_make_storage(vertices(), repo.dual_area_reciprocal()),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length()),
TypeParam::icosahedral_make_storage(edges(), repo.dual_edge_length_reciprocal()),
TypeParam::icosahedral_make_storage(edges(), repo.edge_length_reciprocal()),
out);
TypeParam::verify(TypeParam::icosahedral_make_storage(edges(), repo.lap), out);
TypeParam::verify(TypeParam::icosahedral_make_storage(edges(), repo.lap()), out);
}
} // namespace
151 changes: 84 additions & 67 deletions tests/regression/icosahedral/operators_repository.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include <cassert>
#include <cmath>
#include <functional>

#include <gridtools/common/defs.hpp>
#include <gridtools/stencil/frontend/icosahedral/location_type.hpp>
Expand All @@ -24,8 +23,6 @@ class operators_repository {
using edges = gridtools::stencil::icosahedral::edges;
using vertices = gridtools::stencil::icosahedral::vertices;

using fun_t = std::function<double(int, int, int, int)>;

size_t m_d1, m_d2;

const double PI = std::atan(1) * 4;
Expand All @@ -38,70 +35,90 @@ class operators_repository {
double y(int j) const { return j * 1. / m_d2; }

public:
fun_t u = [this](int i, int j, int k, int c) {
auto t = PI * (x<edges>(i, c) + 1.5 * y(j));
return k + 2 * (2 + cos(t) + sin(2 * t));
};

fun_t edge_length = [this](int i, int j, int, int c) {
auto t = PI * (x<edges>(i, c) + 1.5 * y(j));
return 2.95 + (2 + cos(t) + sin(2 * t)) / 4;
};

fun_t edge_length_reciprocal = [this](int i, int j, int k, int c) { return 1 / edge_length(i, j, k, c); };

fun_t cell_area_reciprocal = [this](int i, int j, int, int c) {
auto xx = x<cells>(i, c);
auto yy = y(j);
return 1 / (2.53 + (2 + cos(PI * (1.5 * xx + 2.5 * yy)) + sin(2 * PI * (xx + 1.5 * yy))) / 4);
};

fun_t dual_area_reciprocal = [this](int i, int j, int, int c) {
auto xx = x<vertices>(i, c);
auto yy = y(j);
return 1 / (1.1 + (2 + cos(PI * (1.5 * xx + yy)) + sin(1.5 * PI * (xx + 1.5 * yy))) / 4);
};

fun_t dual_edge_length = [this](int i, int j, int, int c) {
auto xx = x<edges>(i, c);
auto yy = y(j);
return 2.2 + (2 + cos(PI * (xx + 2.5 * yy)) + sin(2 * PI * (xx + 3.5 * yy))) / 4;
};

fun_t dual_edge_length_reciprocal = [this](int i, int j, int k, int c) { return 1 / dual_edge_length(i, j, k, c); };

fun_t div_u = [this](int i, int j, int k, int c) {
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<cells, edges>(i, j, k, c)) {
res += (c == 0 ? 1 : -1) * neighbour.call(u) * neighbour.call(edge_length);
++e;
}
return res * cell_area_reciprocal(i, j, k, c);
};

fun_t curl_u = [this](int i, int j, int k, int c) {
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<vertices, edges>(i, j, k, c)) {
res += (e % 2 ? 1 : -1) * neighbour.call(u) * neighbour.call(dual_edge_length);
++e;
}
return res * dual_area_reciprocal(i, j, k, c);
};

fun_t lap = [this](int i, int j, int k, int c) {
auto neighbours_ec = gridtools::neighbours_of<edges, cells>(i, j, k, c);
assert(neighbours_ec.size() == 2);
auto grad_n =
(neighbours_ec[1].call(div_u) - neighbours_ec[0].call(div_u)) * dual_edge_length_reciprocal(i, j, k, c);

auto neighbours_vc = gridtools::neighbours_of<edges, vertices>(i, j, k, c);
assert(neighbours_vc.size() == 2);
auto grad_tau =
(neighbours_vc[1].call(curl_u) - neighbours_vc[0].call(curl_u)) * edge_length_reciprocal(i, j, k, c);
return grad_n - grad_tau;
};
auto u() const {
return [this](int i, int j, int k, int c) {
auto t = PI * (x<edges>(i, c) + 1.5 * y(j));
return k + 2 * (2 + cos(t) + sin(2 * t));
};
}

auto edge_length() const {
return [this](int i, int j, int, int c) {
auto t = PI * (x<edges>(i, c) + 1.5 * y(j));
return 2.95 + (2 + cos(t) + sin(2 * t)) / 4;
};
}

auto edge_length_reciprocal() const {
return [this](int i, int j, int k, int c) { return 1 / edge_length()(i, j, k, c); };
}

auto cell_area_reciprocal() const {
return [this](int i, int j, int, int c) {
auto xx = x<cells>(i, c);
auto yy = y(j);
return 1 / (2.53 + (2 + cos(PI * (1.5 * xx + 2.5 * yy)) + sin(2 * PI * (xx + 1.5 * yy))) / 4);
};
}

auto dual_area_reciprocal() const {
return [this](int i, int j, int, int c) {
auto xx = x<vertices>(i, c);
auto yy = y(j);
return 1 / (1.1 + (2 + cos(PI * (1.5 * xx + yy)) + sin(1.5 * PI * (xx + 1.5 * yy))) / 4);
};
}

auto dual_edge_length() const {
return [this](int i, int j, int, int c) {
auto xx = x<edges>(i, c);
auto yy = y(j);
return 2.2 + (2 + cos(PI * (xx + 2.5 * yy)) + sin(2 * PI * (xx + 3.5 * yy))) / 4;
};
}

auto dual_edge_length_reciprocal() const {
return [this](int i, int j, int k, int c) { return 1 / dual_edge_length()(i, j, k, c); };
}

auto div_u() const {
return [this](int i, int j, int k, int c) {
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<cells, edges>(i, j, k, c)) {
res += (c == 0 ? 1 : -1) * neighbour.call(u()) * neighbour.call(edge_length());
++e;
}
return res * cell_area_reciprocal()(i, j, k, c);
};
}

auto curl_u() const {
return [this](int i, int j, int k, int c) {
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<vertices, edges>(i, j, k, c)) {
res += (e % 2 ? 1 : -1) * neighbour.call(u()) * neighbour.call(dual_edge_length());
++e;
}
return res * dual_area_reciprocal()(i, j, k, c);
Comment on lines +86 to +104
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These lambdas repeatedly call u(), edge_length(), div_u(), etc. inside loops/expressions, which reconstructs new closure objects each time. Even if optimized, it makes the intent harder to read and can inhibit optimization in some toolchains. Consider creating the needed functors once per returned lambda (e.g., auto u_f = u();) and reusing them within the computation.

Suggested change
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<cells, edges>(i, j, k, c)) {
res += (c == 0 ? 1 : -1) * neighbour.call(u()) * neighbour.call(edge_length());
++e;
}
return res * cell_area_reciprocal()(i, j, k, c);
};
}
auto curl_u() const {
return [this](int i, int j, int k, int c) {
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<vertices, edges>(i, j, k, c)) {
res += (e % 2 ? 1 : -1) * neighbour.call(u()) * neighbour.call(dual_edge_length());
++e;
}
return res * dual_area_reciprocal()(i, j, k, c);
auto u_f = u();
auto edge_length_f = edge_length();
auto cell_area_reciprocal_f = cell_area_reciprocal();
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<cells, edges>(i, j, k, c)) {
res += (c == 0 ? 1 : -1) * neighbour.call(u_f) * neighbour.call(edge_length_f);
++e;
}
return res * cell_area_reciprocal_f(i, j, k, c);
};
}
auto curl_u() const {
return [this](int i, int j, int k, int c) {
auto u_f = u();
auto dual_edge_length_f = dual_edge_length();
auto dual_area_reciprocal_f = dual_area_reciprocal();
double res = 0;
int e = 0;
for (auto &&neighbour : gridtools::neighbours_of<vertices, edges>(i, j, k, c)) {
res += (e % 2 ? 1 : -1) * neighbour.call(u_f) * neighbour.call(dual_edge_length_f);
++e;
}
return res * dual_area_reciprocal_f(i, j, k, c);

Copilot uses AI. Check for mistakes.
};
}

auto lap() const {
return [this](int i, int j, int k, int c) {
auto neighbours_ec = gridtools::neighbours_of<edges, cells>(i, j, k, c);
assert(neighbours_ec.size() == 2);
auto grad_n = (neighbours_ec[1].call(div_u()) - neighbours_ec[0].call(div_u())) *
dual_edge_length_reciprocal()(i, j, k, c);

auto neighbours_vc = gridtools::neighbours_of<edges, vertices>(i, j, k, c);
assert(neighbours_vc.size() == 2);
auto grad_tau = (neighbours_vc[1].call(curl_u()) - neighbours_vc[0].call(curl_u())) *
edge_length_reciprocal()(i, j, k, c);
return grad_n - grad_tau;
};
}

operators_repository(size_t d1, size_t d2) : m_d1(d1), m_d2(d2) {}
};
2 changes: 1 addition & 1 deletion tests/unit_tests/common/test_hymap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ namespace gridtools {
EXPECT_EQ(7.3, at_key<b>(dst));
}

#if !defined(__NVCC__)
#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 12)
TEST(deduction, smoke) {
auto testee = hymap::keys<a, b>::values(42, 5.3);

Expand Down
Loading
Loading