//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include #include #include #include //-------------------------------------------------------------------------- namespace Test { struct MyPair : Kokkos::pair {}; } // namespace Test template <> struct Kokkos::reduction_identity { KOKKOS_FUNCTION static Test::MyPair min() { return Test::MyPair{{INT_MAX, INT_MAX}}; } }; namespace Test { struct ReducerTag {}; template struct TestReducers { struct SumFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value += values(i); } }; struct TeamSumFunctor { using member_type = typename Kokkos::TeamPolicy::member_type; KOKKOS_INLINE_FUNCTION void operator()(const member_type& m, Scalar& value) const { if (m.team_rank() == m.team_size() - 1) value += Scalar(1); } }; struct TeamSumNestedFunctor { using member_type = typename Kokkos::TeamPolicy::member_type; SumFunctor f; int M, N; Kokkos::View result; TeamSumNestedFunctor(SumFunctor& f_, const int M_, const int N_, Kokkos::View result_) : f(f_), M(M_), N(N_), result(result_) {} KOKKOS_INLINE_FUNCTION void operator()(const member_type& m) const { const int i = m.league_rank(); Scalar local_scalar; Kokkos::Sum reducer_scalar( local_scalar); Kokkos::parallel_reduce(Kokkos::TeamThreadRange(m, N), f, reducer_scalar); result(i) = local_scalar; } }; struct ProdFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value *= values(i); } }; struct MinFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { if (values(i) < value) value = values(i); } }; struct MaxFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { if (values(i) > value) value = values(i); } }; struct MinLocFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, typename Kokkos::MinLoc::value_type& value) const { if (values(i) < value.val) { value.val = values(i); value.loc = i; } } }; struct MinLocFunctor2D { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, const int& j, typename Kokkos::MinLoc::value_type& value) const { if (values(i, j) < value.val) { value.val = values(i, j); value.loc = {{i, j}}; } } }; struct MaxLocFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, typename Kokkos::MaxLoc::value_type& value) const { if (values(i) > value.val) { value.val = values(i); value.loc = i; } } }; struct MaxLocFunctor2D { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, const int& j, typename Kokkos::MaxLoc::value_type& value) const { if (values(i, j) > value.val) { value.val = values(i, j); value.loc = {{i, j}}; } } }; struct MinMaxLocFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, typename Kokkos::MinMaxLoc::value_type& value) const { if (values(i) > value.max_val) { value.max_val = values(i); value.max_loc = i; } if (values(i) < value.min_val) { value.min_val = values(i); value.min_loc = i; } } }; struct MinMaxLocFunctor2D { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const int& i, const int& j, typename Kokkos::MinMaxLoc::value_type& value) const { if (values(i, j) > value.max_val) { value.max_val = values(i, j); value.max_loc = {{i, j}}; } if (values(i, j) < value.min_val) { value.min_val = values(i, j); value.min_loc = {{i, j}}; } } }; struct BAndFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value = value & values(i); } }; struct BOrFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value = value | values(i); } }; struct LAndFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value = value && values(i); } }; struct LOrFunctor { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const int& i, Scalar& value) const { value = value || values(i); } }; struct SumFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value += values(i); } }; struct ProdFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value *= values(i); } }; struct MinFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { if (values(i) < value) value = values(i); } }; struct MaxFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { if (values(i) > value) value = values(i); } }; struct MinLocFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const ReducerTag, const int& i, typename Kokkos::MinLoc::value_type& value) const { if (values(i) < value.val) { value.val = values(i); value.loc = i; } } }; struct MaxLocFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const ReducerTag, const int& i, typename Kokkos::MaxLoc::value_type& value) const { if (values(i) > value.val) { value.val = values(i); value.loc = i; } } }; struct MinMaxLocFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()( const ReducerTag, const int& i, typename Kokkos::MinMaxLoc::value_type& value) const { if (values(i) > value.max_val) { value.max_val = values(i); value.max_loc = i; } if (values(i) < value.min_val) { value.min_val = values(i); value.min_loc = i; } } }; struct BAndFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value = value & values(i); } }; struct BOrFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value = value | values(i); } }; struct LAndFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value = value && values(i); } }; struct LOrFunctorTag { Kokkos::View values; KOKKOS_INLINE_FUNCTION void operator()(const ReducerTag, const int& i, Scalar& value) const { value = value || values(i); } }; // get number of teams for TeamPolicy depending on the tested type constexpr static int get_num_teams() { if constexpr (sizeof(Scalar) == 1) { return 126; } else if constexpr (std::is_same_v) { return 256; } return 1024; } static void test_sum_team_policy(int N, SumFunctor f, Scalar reference_sum) { #ifdef KOKKOS_ENABLE_OPENACC if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) { return; // FIXME_OPENACC } #endif Scalar sum_scalar; Kokkos::View sum_view("result"); Kokkos::deep_copy(sum_view, Scalar(1)); // Test team policy reduction { constexpr int num_teams = get_num_teams(); TeamSumFunctor tf; // FIXME_OPENMPTARGET temporary restriction for team size to be at least // 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET int team_size = std::is_same::value ? 32 : 1; #else int team_size = 1; #endif auto team_pol = Kokkos::TeamPolicy(num_teams, team_size); Kokkos::parallel_reduce(team_pol, tf, sum_view); Kokkos::deep_copy(sum_scalar, sum_view); ASSERT_EQ(sum_scalar, Scalar{num_teams}) << "num_teams: " << num_teams; } // Test TeamThreadRange level reduction with 0 work produces 0 result { const int league_size = 1; Kokkos::View result("result", league_size); TeamSumNestedFunctor tnf(f, league_size, 0, result); // FIXME_OPENMPTARGET temporary restriction for team size to be at least // 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET int team_size = std::is_same::value ? 32 : 1; #else int team_size = 1; #endif auto team_pol = Kokkos::TeamPolicy(1, team_size); Kokkos::parallel_for(team_pol, tnf); auto result_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result); ASSERT_EQ(result_h(0), Scalar{0}) << "N: " << N; } // Same test as above, but with inner reduction over N, and league_size=10 { const int league_size = 10; Kokkos::View result("result", league_size); TeamSumNestedFunctor tnf(f, league_size, N, result); // FIXME_OPENMPTARGET temporary restriction for team size to be at least // 32 #ifdef KOKKOS_ENABLE_OPENMPTARGET int initial_team_size = std::is_same_v ? 32 : 1; #else int initial_team_size = 1; #endif auto team_size_max = Kokkos::TeamPolicy(league_size, initial_team_size) .team_size_max(tnf, Kokkos::ParallelForTag()); auto team_size = std::min(team_size_max, TEST_EXECSPACE().concurrency()); auto team_pol = Kokkos::TeamPolicy(league_size, team_size); Kokkos::parallel_for(team_pol, tnf); auto result_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result); for (int i = 0; i < result_h.extent_int(0); ++i) { ASSERT_EQ(result_h(i), reference_sum) << "N: " << N; } } } static void test_sum(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_sum = 0; for (int i = 0; i < N; i++) { int denom = sizeof(Scalar) <= 2 ? 10 : 100; // clang-format off // For bhalf, we start overflowing integer values at 2^8. // after 2^8, we lose representation of odd numbers; // after 2^9, we lose representation of odd and even numbers in position 1. // after 2^10, we lose representation of odd and even numbers in position 1-3. // after 2^11, we lose representation of odd and even numbers in position 1-7. // ... // Generally, for IEEE 754 floating point numbers, we start this overflow pattern at: 2^(num_fraction_bits+1). // brain float has num_fraction_bits = 7. // This mask addresses #4719 for N <= 51. // The mask is not needed for N <= 25. // clang-format on int mask = std::is_same::value && N > 25 ? (int)0xfffffffe : (int)0xffffffff; h_values(i) = (Scalar)((rand() % denom) & mask); reference_sum += h_values(i); } Kokkos::deep_copy(values, h_values); SumFunctor f; f.values = values; SumFunctorTag f_tag; f_tag.values = values; Scalar init = 0; { Scalar sum_scalar = Scalar(1); Kokkos::Sum reducer_scalar(sum_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_scalar); ASSERT_EQ(sum_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(sum_scalar, reference_sum) << "N: " << N; sum_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(sum_scalar, reference_sum) << "N: " << N; Scalar sum_scalar_view = reducer_scalar.reference(); ASSERT_EQ(sum_scalar_view, reference_sum) << "N: " << N; } test_sum_team_policy(N, f, reference_sum); { Kokkos::View sum_view("View"); sum_view() = Scalar(1); Kokkos::Sum reducer_view(sum_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_view); Kokkos::fence(); Scalar sum_view_scalar = sum_view(); ASSERT_EQ(sum_view_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); sum_view_scalar = sum_view(); ASSERT_EQ(sum_view_scalar, reference_sum) << "N: " << N; Scalar sum_view_view = reducer_view.reference(); ASSERT_EQ(sum_view_view, reference_sum) << "N: " << N; } { Kokkos::View sum_view("View"); Kokkos::deep_copy(sum_view, Scalar(1)); Kokkos::Sum reducer_view( sum_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_view); Kokkos::fence(); Scalar sum_view_scalar; Kokkos::deep_copy(sum_view_scalar, sum_view); ASSERT_EQ(sum_view_scalar, init) << "N: " << N; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Kokkos::deep_copy(sum_view_scalar, sum_view); ASSERT_EQ(sum_view_scalar, reference_sum) << "N: " << N; } } static void test_prod(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_prod = 1; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 4 + 1); reference_prod *= h_values(i); } Kokkos::deep_copy(values, h_values); ProdFunctor f; f.values = values; ProdFunctorTag f_tag; f_tag.values = values; Scalar init = 1; { Scalar prod_scalar = Scalar(0); Kokkos::Prod reducer_scalar(prod_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_scalar); ASSERT_EQ(prod_scalar, init); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(prod_scalar, reference_prod); prod_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(prod_scalar, reference_prod); Scalar prod_scalar_view = reducer_scalar.reference(); ASSERT_EQ(prod_scalar_view, reference_prod); } { Kokkos::View prod_view("View"); prod_view() = Scalar(0); Kokkos::Prod reducer_view(prod_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_view); Kokkos::fence(); Scalar prod_view_scalar = prod_view(); ASSERT_EQ(prod_view_scalar, init); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); prod_view_scalar = prod_view(); ASSERT_EQ(prod_view_scalar, reference_prod); Scalar prod_view_view = reducer_view.reference(); ASSERT_EQ(prod_view_view, reference_prod); } { Kokkos::View prod_view("View"); Kokkos::deep_copy(prod_view, Scalar(0)); Kokkos::Prod reducer_view( prod_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 0), f, reducer_view); Kokkos::fence(); Scalar prod_view_scalar; Kokkos::deep_copy(prod_view_scalar, prod_view); ASSERT_EQ(prod_view_scalar, init); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Kokkos::deep_copy(prod_view_scalar, prod_view); ASSERT_EQ(prod_view_scalar, reference_prod); } } static void test_min(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_min = std::numeric_limits::max(); for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000); if (h_values(i) < reference_min) reference_min = h_values(i); } Kokkos::deep_copy(values, h_values); MinFunctor f; f.values = values; MinFunctorTag f_tag; f_tag.values = values; Scalar init = std::numeric_limits::max(); { Scalar min_scalar = init; Kokkos::Min reducer_scalar(min_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(min_scalar, reference_min); min_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(min_scalar, reference_min); Scalar min_scalar_view = reducer_scalar.reference(); ASSERT_EQ(min_scalar_view, reference_min); } { Kokkos::View min_view("View"); min_view() = init; Kokkos::Min reducer_view(min_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar min_view_scalar = min_view(); ASSERT_EQ(min_view_scalar, reference_min); Scalar min_view_view = reducer_view.reference(); ASSERT_EQ(min_view_view, reference_min); } } static void test_max(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_max = std::numeric_limits::min(); for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000 + 1); if (h_values(i) > reference_max) reference_max = h_values(i); } Kokkos::deep_copy(values, h_values); MaxFunctor f; f.values = values; MaxFunctorTag f_tag; f_tag.values = values; Scalar init = std::numeric_limits::min(); { Scalar max_scalar = init; Kokkos::Max reducer_scalar(max_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(max_scalar, reference_max); max_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(max_scalar, reference_max); Scalar max_scalar_view = reducer_scalar.reference(); ASSERT_EQ(max_scalar_view, reference_max); } { Kokkos::View max_view("View"); max_view() = init; Kokkos::Max reducer_view(max_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar max_view_scalar = max_view(); ASSERT_EQ(max_view_scalar, reference_max); Scalar max_view_view = reducer_view.reference(); ASSERT_EQ(max_view_view, reference_max); } } static void test_minloc(int N) { using value_type = typename Kokkos::MinLoc::value_type; Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_min = std::numeric_limits::max(); int reference_loc = -1; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000 + 2); if (h_values(i) < reference_min) { reference_min = h_values(i); reference_loc = i; } else if (h_values(i) == reference_min) { // Make min unique. h_values(i) += Scalar(1); } } Kokkos::deep_copy(values, h_values); MinLocFunctor f; f.values = values; MinLocFunctorTag f_tag; f_tag.values = values; { value_type min_scalar; Kokkos::MinLoc reducer_scalar(min_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(min_scalar.val, reference_min); ASSERT_EQ(min_scalar.loc, reference_loc); min_scalar = value_type(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(min_scalar.val, reference_min); ASSERT_EQ(min_scalar.loc, reference_loc); value_type min_scalar_view = reducer_scalar.reference(); ASSERT_EQ(min_scalar_view.val, reference_min); ASSERT_EQ(min_scalar_view.loc, reference_loc); } { Kokkos::View min_view("View"); Kokkos::MinLoc reducer_view(min_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); value_type min_view_scalar = min_view(); ASSERT_EQ(min_view_scalar.val, reference_min); ASSERT_EQ(min_view_scalar.loc, reference_loc); value_type min_view_view = reducer_view.reference(); ASSERT_EQ(min_view_view.val, reference_min); ASSERT_EQ(min_view_view.loc, reference_loc); } } static void test_minloc_2d(int N) { using reducer_type = Kokkos::MinLoc; using value_type = typename reducer_type::value_type; Kokkos::View values("Values", N, N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_min = std::numeric_limits::max(); MyPair reference_loc = {{-1, -1}}; for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { h_values(i, j) = (Scalar)(rand() % 100000 + 2); if (h_values(i, j) < reference_min) { reference_min = h_values(i, j); reference_loc = {{i, j}}; } else if (h_values(i, j) == reference_min) { // Make min unique. h_values(i, j) += Scalar(1); } } Kokkos::deep_copy(values, h_values); MinLocFunctor2D f; f.values = values; { value_type min_scalar; reducer_type reducer_scalar(min_scalar); Kokkos::parallel_reduce( Kokkos::MDRangePolicy, ExecSpace>({0, 0}, {N, N}), f, reducer_scalar); ASSERT_EQ(min_scalar.val, reference_min); ASSERT_EQ(min_scalar.loc, reference_loc); } } static void test_maxloc(int N) { using value_type = typename Kokkos::MaxLoc::value_type; Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_max = std::numeric_limits::min(); int reference_loc = -1; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000 + 2); if (h_values(i) > reference_max) { reference_max = h_values(i); reference_loc = i; } else if (h_values(i) == reference_max) { // Make max unique. h_values(i) -= Scalar(1); } } Kokkos::deep_copy(values, h_values); MaxLocFunctor f; f.values = values; MaxLocFunctorTag f_tag; f_tag.values = values; { value_type max_scalar; Kokkos::MaxLoc reducer_scalar(max_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(max_scalar.val, reference_max); ASSERT_EQ(max_scalar.loc, reference_loc); max_scalar = value_type(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(max_scalar.val, reference_max); ASSERT_EQ(max_scalar.loc, reference_loc); value_type max_scalar_view = reducer_scalar.reference(); ASSERT_EQ(max_scalar_view.val, reference_max); ASSERT_EQ(max_scalar_view.loc, reference_loc); } { Kokkos::View max_view("View"); Kokkos::MaxLoc reducer_view(max_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); value_type max_view_scalar = max_view(); ASSERT_EQ(max_view_scalar.val, reference_max); ASSERT_EQ(max_view_scalar.loc, reference_loc); value_type max_view_view = reducer_view.reference(); ASSERT_EQ(max_view_view.val, reference_max); ASSERT_EQ(max_view_view.loc, reference_loc); } } static void test_maxloc_2d(int N) { using reducer_type = Kokkos::MaxLoc; using value_type = typename reducer_type::value_type; Kokkos::View values("Values", N, N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_max = std::numeric_limits::min(); MyPair reference_loc = {{-1, -1}}; for (int i = 0; i < N; ++i) for (int j = 0; j < N; ++j) { h_values(i, j) = (Scalar)(rand() % 100000 + 2); if (h_values(i, j) > reference_max) { reference_max = h_values(i, j); reference_loc = {{i, j}}; } else if (h_values(i, j) == reference_max) { // Make max unique. h_values(i, j) -= Scalar(1); } } Kokkos::deep_copy(values, h_values); MaxLocFunctor2D f; f.values = values; { value_type max_scalar; reducer_type reducer_scalar(max_scalar); Kokkos::parallel_reduce( Kokkos::MDRangePolicy, ExecSpace>({0, 0}, {N, N}), f, reducer_scalar); ASSERT_EQ(max_scalar.val, reference_max); ASSERT_EQ(max_scalar.loc, reference_loc); } } static void test_minmaxloc(int N) { using value_type = typename Kokkos::MinMaxLoc::value_type; Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_max = std::numeric_limits::min(); Scalar reference_min = std::numeric_limits::max(); int reference_minloc = -1; int reference_maxloc = -1; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000 + 2); } for (int i = 0; i < N; i++) { if (h_values(i) > reference_max) { reference_max = h_values(i); reference_maxloc = i; } else if (h_values(i) == reference_max) { // Make max unique. h_values(i) -= Scalar(1); } } for (int i = 0; i < N; i++) { if (h_values(i) < reference_min) { reference_min = h_values(i); reference_minloc = i; } else if (h_values(i) == reference_min) { // Make min unique. h_values(i) += Scalar(1); } } Kokkos::deep_copy(values, h_values); MinMaxLocFunctor f; f.values = values; MinMaxLocFunctorTag f_tag; f_tag.values = values; { value_type minmax_scalar; Kokkos::MinMaxLoc reducer_scalar(minmax_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(minmax_scalar.min_val, reference_min); for (int i = 0; i < N; i++) { if ((i == minmax_scalar.min_loc) && (h_values(i) == reference_min)) { reference_minloc = i; } } ASSERT_EQ(minmax_scalar.min_loc, reference_minloc); ASSERT_EQ(minmax_scalar.max_val, reference_max); for (int i = 0; i < N; i++) { if ((i == minmax_scalar.max_loc) && (h_values(i) == reference_max)) { reference_maxloc = i; } } ASSERT_EQ(minmax_scalar.max_loc, reference_maxloc); minmax_scalar = value_type(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(minmax_scalar.min_val, reference_min); for (int i = 0; i < N; i++) { if ((i == minmax_scalar.min_loc) && (h_values(i) == reference_min)) { reference_minloc = i; } } ASSERT_EQ(minmax_scalar.min_loc, reference_minloc); ASSERT_EQ(minmax_scalar.max_val, reference_max); for (int i = 0; i < N; i++) { if ((i == minmax_scalar.max_loc) && (h_values(i) == reference_max)) { reference_maxloc = i; } } ASSERT_EQ(minmax_scalar.max_loc, reference_maxloc); value_type minmax_scalar_view = reducer_scalar.reference(); ASSERT_EQ(minmax_scalar_view.min_val, reference_min); ASSERT_EQ(minmax_scalar_view.min_loc, reference_minloc); ASSERT_EQ(minmax_scalar_view.max_val, reference_max); ASSERT_EQ(minmax_scalar_view.max_loc, reference_maxloc); } { Kokkos::View minmax_view("View"); Kokkos::MinMaxLoc reducer_view(minmax_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); value_type minmax_view_scalar = minmax_view(); ASSERT_EQ(minmax_view_scalar.min_val, reference_min); ASSERT_EQ(minmax_view_scalar.min_loc, reference_minloc); ASSERT_EQ(minmax_view_scalar.max_val, reference_max); ASSERT_EQ(minmax_view_scalar.max_loc, reference_maxloc); value_type minmax_view_view = reducer_view.reference(); ASSERT_EQ(minmax_view_view.min_val, reference_min); ASSERT_EQ(minmax_view_view.min_loc, reference_minloc); ASSERT_EQ(minmax_view_view.max_val, reference_max); ASSERT_EQ(minmax_view_view.max_loc, reference_maxloc); } } static void test_minmaxloc_2d(int N) { using reducer_type = Kokkos::MinMaxLoc; using value_type = typename reducer_type::value_type; Kokkos::View values("Values", N, N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_max = std::numeric_limits::min(); Scalar reference_min = std::numeric_limits::max(); MyPair reference_minloc = {{-1, -1}}; MyPair reference_maxloc = {{-1, -1}}; for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { h_values(i, j) = (Scalar)(rand() % 100000 + 2); } for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { if (h_values(i, j) > reference_max) { reference_max = h_values(i, j); reference_maxloc = {{i, j}}; } else if (h_values(i, j) == reference_max) { // Make max unique. h_values(i, j) -= Scalar(1); } } for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { if (h_values(i, j) < reference_min) { reference_min = h_values(i, j); reference_minloc = {{i, j}}; } else if (h_values(i, j) == reference_min) { // Make min unique. h_values(i, j) += Scalar(1); } } Kokkos::deep_copy(values, h_values); MinMaxLocFunctor2D f; f.values = values; { value_type minmax_scalar; reducer_type reducer_scalar(minmax_scalar); Kokkos::parallel_reduce( Kokkos::MDRangePolicy, ExecSpace>({0, 0}, {N, N}), f, reducer_scalar); ASSERT_EQ(minmax_scalar.min_val, reference_min); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { if ((minmax_scalar.min_loc == MyPair{{i, j}}) && (h_values(i, j) == reference_min)) { reference_minloc = {{i, j}}; } } ASSERT_EQ(minmax_scalar.min_loc, reference_minloc); ASSERT_EQ(minmax_scalar.max_val, reference_max); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { if ((minmax_scalar.max_loc == MyPair{{i, j}}) && (h_values(i, j) == reference_max)) { reference_maxloc = {{i, j}}; } } ASSERT_EQ(minmax_scalar.max_loc, reference_maxloc); } } static void test_BAnd(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_band = Scalar() | (~Scalar()); for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 100000 + 1); reference_band = reference_band & h_values(i); } Kokkos::deep_copy(values, h_values); BAndFunctor f; f.values = values; BAndFunctorTag f_tag; f_tag.values = values; Scalar init = Scalar() | (~Scalar()); { Scalar band_scalar = init; Kokkos::BAnd reducer_scalar(band_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(band_scalar, reference_band); band_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(band_scalar, reference_band); Scalar band_scalar_view = reducer_scalar.reference(); ASSERT_EQ(band_scalar_view, reference_band); } { Kokkos::View band_view("View"); band_view() = init; Kokkos::BAnd reducer_view(band_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar band_view_scalar = band_view(); ASSERT_EQ(band_view_scalar, reference_band); Scalar band_view_view = reducer_view.reference(); ASSERT_EQ(band_view_view, reference_band); } } static void test_BOr(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_bor = Scalar() & (~Scalar()); for (int i = 0; i < N; i++) { h_values(i) = (Scalar)((rand() % 100000 + 1) * 2); reference_bor = reference_bor | h_values(i); } Kokkos::deep_copy(values, h_values); BOrFunctor f; f.values = values; BOrFunctorTag f_tag; f_tag.values = values; Scalar init = Scalar() & (~Scalar()); { Scalar bor_scalar = init; Kokkos::BOr reducer_scalar(bor_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(bor_scalar, reference_bor); bor_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(bor_scalar, reference_bor); Scalar bor_scalar_view = reducer_scalar.reference(); ASSERT_EQ(bor_scalar_view, reference_bor); } { Kokkos::View bor_view("View"); bor_view() = init; Kokkos::BOr reducer_view(bor_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar bor_view_scalar = bor_view(); ASSERT_EQ(bor_view_scalar, reference_bor); Scalar bor_view_view = reducer_view.reference(); ASSERT_EQ(bor_view_view, reference_bor); } } static void test_LAnd(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_land = 1; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 2); reference_land = reference_land && h_values(i); } Kokkos::deep_copy(values, h_values); LAndFunctor f; f.values = values; LAndFunctorTag f_tag; f_tag.values = values; Scalar init = 1; { Scalar land_scalar = init; Kokkos::LAnd reducer_scalar(land_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(land_scalar, reference_land); land_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(land_scalar, reference_land); Scalar land_scalar_view = reducer_scalar.reference(); ASSERT_EQ(land_scalar_view, reference_land); } { Kokkos::View land_view("View"); land_view() = init; Kokkos::LAnd reducer_view(land_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar land_view_scalar = land_view(); ASSERT_EQ(land_view_scalar, reference_land); Scalar land_view_view = reducer_view.reference(); ASSERT_EQ(land_view_view, reference_land); } } static void test_LOr(int N) { Kokkos::View values("Values", N); auto h_values = Kokkos::create_mirror_view(values); Scalar reference_lor = 0; for (int i = 0; i < N; i++) { h_values(i) = (Scalar)(rand() % 2); reference_lor = reference_lor || h_values(i); } Kokkos::deep_copy(values, h_values); LOrFunctor f; f.values = values; LOrFunctorTag f_tag; f_tag.values = values; Scalar init = 0; { Scalar lor_scalar = init; Kokkos::LOr reducer_scalar(lor_scalar); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_scalar); ASSERT_EQ(lor_scalar, reference_lor); lor_scalar = init; Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f_tag, reducer_scalar); ASSERT_EQ(lor_scalar, reference_lor); Scalar lor_scalar_view = reducer_scalar.reference(); ASSERT_EQ(lor_scalar_view, reference_lor); } { Kokkos::View lor_view("View"); lor_view() = init; Kokkos::LOr reducer_view(lor_view); Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), f, reducer_view); Kokkos::fence(); Scalar lor_view_scalar = lor_view(); ASSERT_EQ(lor_view_scalar, reference_lor); Scalar lor_view_view = reducer_view.reference(); ASSERT_EQ(lor_view_view, reference_lor); } } static void execute_float() { test_sum(10001); test_prod(35); test_min(10003); #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minloc_2d(100); #endif #endif test_max(10007); #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_maxloc_2d(100); #endif #endif // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENMPTARGET - The minmaxloc test fails llvm < 13 version, // test_minmaxloc_2d requires custom reductions #if defined(KOKKOS_ENABLE_OPENMPTARGET) #if defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300) && \ (KOKKOS_COMPILER_CLANG <= 1700) test_minmaxloc(10007); #else if (!std::is_same_v) test_minmaxloc(10007); #endif #else test_minmaxloc(10007); test_minmaxloc_2d(100); #endif #endif } // NOTE test_prod generates N random numbers between 1 and 4. // Although unlikely, the test below could still in principle overflow. // For reference log(numeric_limits)/log(4) is 15.5 static void execute_integer() { test_sum(10001); test_prod(sizeof(Scalar) > 4 ? 35 : 19); // avoid int overflow (see above) test_min(10003); #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_minloc(10003); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_minloc_2d(100); #endif #endif test_max(10007); #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. test_maxloc(10007); #if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same_v) #endif // FIXME_OPENMPTARGET requires custom reductions. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) test_maxloc_2d(100); #endif #endif // FIXME_OPENACC - OpenACC (V3.3) does not support custom reductions. #if !defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENMPTARGET - The minmaxloc test fails llvm < 13 version, // the minmaxloc_2d test requires custom reductions. #if defined(KOKKOS_ENABLE_OPENMPTARGET) #if defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300) test_minmaxloc(10007); #else if (!std::is_same_v) test_minmaxloc(10007); #endif #else test_minmaxloc(10007); test_minmaxloc_2d(100); #endif #endif test_BAnd(35); test_BOr(35); test_LAnd(35); test_LOr(35); } static void execute_basic() { test_sum(10001); test_prod(35); } static void execute_bool() { test_LAnd(10001); test_LOr(35); } }; } // namespace Test