//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include namespace { // Extended lambdas in parallel_for and parallel_reduce will not compile if // KOKKOS_ENABLE_CUDA_LAMBDA is off #if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) struct TeamTeamCombinedReducer { public: void test_team_thread_range_only_scalars(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamThreadRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += 1; localVal1 += i + 1; localVal2 += (i + 1) * n; localVal3 += n; }, teamResult0, teamResult1, teamResult2, teamResult3); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ(n, hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(n * n * (n + 1) / 2, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } void test_team_thread_range_only_builtin(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamThreadRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += i + 1; localVal1 *= n; localVal2 = (localVal2 > (i + 1)) ? (i + 1) : localVal2; localVal3 = (localVal3 < (i + 1)) ? (i + 1) : localVal3; }, Kokkos::Sum(teamResult0), Kokkos::Prod(teamResult1), Kokkos::Min(teamResult2), Kokkos::Max(teamResult3)); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::prod(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::min(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::max(), hostView(3)); } else { EXPECT_EQ((n * (n + 1) / 2), hostView(0)); EXPECT_EQ(std::pow(n, n), hostView(1)); EXPECT_EQ(1, hostView(2)); EXPECT_EQ(n, hostView(3)); } } void test_team_thread_range_combined_reducers(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view", 4); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamThreadRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += i + 1; localVal1 += i + 1; localVal2 = (localVal2 < (i + 1)) ? (i + 1) : localVal2; localVal3 += n; }, teamResult0, Kokkos::Sum(teamResult1), Kokkos::Max(teamResult2), teamResult3); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::max(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ((n * (n + 1) / 2), hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(n, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } void test_thread_vector_range_only_scalars(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, 1); auto threadVectorRange = Kokkos::ThreadVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_for(teamThreadRange, [&](int const&) { Kokkos::parallel_reduce( threadVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += 1; localVal1 += i + 1; localVal2 += (i + 1) * n; localVal3 += n; }, teamResult0, teamResult1, teamResult2, teamResult3); teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ(n, hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(n * n * (n + 1) / 2, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } void test_thread_vector_range_only_builtin(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, 1); auto threadVectorRange = Kokkos::ThreadVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_for(teamThreadRange, [&](int const&) { Kokkos::parallel_reduce( threadVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += i + 1; localVal1 *= n; localVal2 = (localVal2 > (i + 1)) ? (i + 1) : localVal2; localVal3 = (localVal3 < (i + 1)) ? (i + 1) : localVal3; }, Kokkos::Sum(teamResult0), Kokkos::Prod(teamResult1), Kokkos::Min(teamResult2), Kokkos::Max(teamResult3)); teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::prod(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::min(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::max(), hostView(3)); } else { EXPECT_EQ((n * (n + 1) / 2), hostView(0)); EXPECT_EQ(std::pow(n, n), hostView(1)); EXPECT_EQ(1, hostView(2)); EXPECT_EQ(n, hostView(3)); } } void test_thread_vector_range_combined_reducers(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamThreadRange = Kokkos::TeamThreadRange(team, 1); auto threadVectorRange = Kokkos::ThreadVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_for(teamThreadRange, [&](int const&) { Kokkos::parallel_reduce( threadVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 *= n; localVal1 += i + 1; localVal2 = (localVal2 > (i + 1)) ? (i + 1) : localVal2; localVal3 += n; }, Kokkos::Prod(teamResult0), teamResult1, Kokkos::Min(teamResult2), teamResult3); teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::prod(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::min(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ(std::pow(n, n), hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(1, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } void test_team_vector_range_only_scalars(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamVectorRange = Kokkos::TeamVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += 1; localVal1 += i + 1; localVal2 += (i + 1) * n; localVal3 += n; }, teamResult0, teamResult1, teamResult2, teamResult3); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ(n, hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(n * n * (n + 1) / 2, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } void test_team_vector_range_only_builtin(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamVectorRange = Kokkos::TeamVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += i + 1; localVal1 *= n; localVal2 = (localVal2 > (i + 1)) ? (i + 1) : localVal2; localVal3 = (localVal3 < (i + 1)) ? (i + 1) : localVal3; }, Kokkos::Sum(teamResult0), Kokkos::Prod(teamResult1), Kokkos::Min(teamResult2), Kokkos::Max(teamResult3)); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::prod(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::min(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::max(), hostView(3)); } else { EXPECT_EQ((n * (n + 1) / 2), hostView(0)); EXPECT_EQ(std::pow(n, n), hostView(1)); EXPECT_EQ(1, hostView(2)); EXPECT_EQ(n, hostView(3)); } } void test_team_vector_range_combined_reducers(const int n) { auto policy = Kokkos::TeamPolicy(1, Kokkos::AUTO); using team_member_type = decltype(policy)::member_type; auto teamView = Kokkos::View("view"); Kokkos::parallel_for( policy, KOKKOS_LAMBDA(team_member_type const& team) { auto teamVectorRange = Kokkos::TeamVectorRange(team, n); int teamResult0, teamResult1, teamResult2, teamResult3; Kokkos::parallel_reduce( teamVectorRange, [=](int const& i, int& localVal0, int& localVal1, int& localVal2, int& localVal3) { localVal0 += i + 1; localVal1 += i + 1; localVal2 = (localVal2 < (i + 1)) ? (i + 1) : localVal2; localVal3 += n; }, teamResult0, Kokkos::Sum(teamResult1), Kokkos::Max(teamResult2), teamResult3); Kokkos::single(Kokkos::PerTeam(team), [=]() { teamView(0) = teamResult0; teamView(1) = teamResult1; teamView(2) = teamResult2; teamView(3) = teamResult3; }); }); auto hostView = Kokkos::create_mirror_view_and_copy( Kokkos::DefaultHostExecutionSpace(), teamView); if (n == 0) { EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(0)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(1)); EXPECT_EQ(Kokkos::reduction_identity::max(), hostView(2)); EXPECT_EQ(Kokkos::reduction_identity::sum(), hostView(3)); } else { EXPECT_EQ((n * (n + 1) / 2), hostView(0)); EXPECT_EQ((n * (n + 1) / 2), hostView(1)); EXPECT_EQ(n, hostView(2)); EXPECT_EQ(n * n, hostView(3)); } } }; TEST(TEST_CATEGORY, team_thread_range_combined_reducers) { #if defined(KOKKOS_ENABLE_OPENMPTARGET) if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #elif defined(KOKKOS_ENABLE_OPENACC) if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #endif TeamTeamCombinedReducer tester; tester.test_team_thread_range_only_scalars(5); tester.test_team_thread_range_only_builtin(7); tester.test_team_thread_range_combined_reducers(0); tester.test_team_thread_range_combined_reducers(9); } TEST(TEST_CATEGORY, thread_vector_range_combined_reducers) { #if defined(KOKKOS_ENABLE_OPENMPTARGET) if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #elif defined(KOKKOS_ENABLE_OPENACC) if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #endif TeamTeamCombinedReducer tester; tester.test_thread_vector_range_only_scalars(5); tester.test_thread_vector_range_only_builtin(7); tester.test_thread_vector_range_combined_reducers(0); tester.test_thread_vector_range_combined_reducers(9); } TEST(TEST_CATEGORY, team_vector_range_combined_reducers) { #ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #endif #ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC if constexpr (std::is_same_v) GTEST_SKIP() << "team_reduce with a generic reducer is not implemented for " << TEST_EXECSPACE::name(); #endif TeamTeamCombinedReducer tester; tester.test_team_vector_range_only_scalars(5); tester.test_team_vector_range_only_builtin(7); tester.test_team_vector_range_combined_reducers(0); tester.test_team_vector_range_combined_reducers(9); } #endif } // namespace