//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include namespace Test { namespace TeamMDRange { struct FillFlattenedIndex { explicit FillFlattenedIndex(int n0, int n1, int n2, int n3 = 1, int n4 = 1, int n5 = 1, int n6 = 1, int n7 = 1) : initValue{n0, n1, n2, n3, n4, n5, n6, n7} {} KOKKOS_INLINE_FUNCTION int operator()(int n0, int n1, int n2, int n3 = 0, int n4 = 0, int n5 = 0, int n6 = 0, int n7 = 0) const { return ((((((n7 * initValue[7] + n6) * initValue[6] + n5) * initValue[5] + n4) * initValue[4] + n3) * initValue[3] + n2) * initValue[2] + n1) * initValue[1] + n0; } int initValue[8]; }; struct TestTeamMDParallelFor { using DataType = int64_t; using DimsType = int[8]; template static void check_result_3D(HostViewType h_view, FillFunctor const& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { EXPECT_EQ(h_view(i, j, k), fillFunctor(i, j, k)); } } } } template static void check_result_4D(HostViewType h_view, FillFunctor& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { for (size_t l = 0; l < h_view.extent(3); ++l) { EXPECT_EQ(h_view(i, j, k, l), fillFunctor(i, j, k, l)); } } } } } template static void check_result_5D(HostViewType h_view, FillFunctor& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { for (size_t l = 0; l < h_view.extent(3); ++l) { for (size_t m = 0; m < h_view.extent(4); ++m) { EXPECT_EQ(h_view(i, j, k, l, m), fillFunctor(i, j, k, l, m)); } } } } } } template static void check_result_6D(HostViewType h_view, FillFunctor& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { for (size_t l = 0; l < h_view.extent(3); ++l) { for (size_t m = 0; m < h_view.extent(4); ++m) { for (size_t n = 0; n < h_view.extent(5); ++n) { EXPECT_EQ(h_view(i, j, k, l, m, n), fillFunctor(i, j, k, l, m, n)); } } } } } } } template static void check_result_7D(HostViewType h_view, FillFunctor& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { for (size_t l = 0; l < h_view.extent(3); ++l) { for (size_t m = 0; m < h_view.extent(4); ++m) { for (size_t n = 0; n < h_view.extent(5); ++n) { for (size_t o = 0; o < h_view.extent(6); ++o) { EXPECT_EQ(h_view(i, j, k, l, m, n, o), fillFunctor(i, j, k, l, m, n, o)); } } } } } } } } template static void check_result_8D(HostViewType h_view, FillFunctor& fillFunctor) { for (size_t i = 0; i < h_view.extent(0); ++i) { for (size_t j = 0; j < h_view.extent(1); ++j) { for (size_t k = 0; k < h_view.extent(2); ++k) { for (size_t l = 0; l < h_view.extent(3); ++l) { for (size_t m = 0; m < h_view.extent(4); ++m) { for (size_t n = 0; n < h_view.extent(5); ++n) { for (size_t o = 0; o < h_view.extent(6); ++o) { for (size_t p = 0; p < h_view.extent(7); ++p) { EXPECT_EQ(h_view(i, j, k, l, m, n, o, p), fillFunctor(i, j, k, l, m, n, o, p)); } } } } } } } } } }; // If KOKKOS_ENABLE_CUDA_LAMBDA is off, extended lambdas used in parallel_for // and parallel_reduce in these tests will not compile correctly #if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) template struct TestTeamThreadMDRangeParallelFor : public TestTeamMDParallelFor { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_for_3D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; ViewType v("v", leagueSize, n0, n1); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1); Kokkos::parallel_for(teamRange, [=](int i, int j) { v(leagueRank, i, j) += fillFlattenedIndex(leagueRank, i, j); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_3D(h_view, fillFlattenedIndex); } template static void test_parallel_for_4D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2); Kokkos::parallel_for(teamRange, [=](int i, int j, int k) { v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_4D(h_view, fillFlattenedIndex); } template static void test_parallel_for_5D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3); Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) { v(leagueRank, i, j, k, l) += fillFlattenedIndex(leagueRank, i, j, k, l); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_5D(h_view, fillFlattenedIndex); } template static void test_parallel_for_6D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m) { v(leagueRank, i, j, k, l, m) += fillFlattenedIndex(leagueRank, i, j, k, l, m); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_6D(h_view, fillFlattenedIndex); } template static void test_parallel_for_7D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m, int n) { v(leagueRank, i, j, k, l, m, n) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_7D(h_view, fillFlattenedIndex); } template static void test_parallel_for_8D_TeamThreadMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m, int n, int o) { v(leagueRank, i, j, k, l, m, n, o) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_8D(h_view, fillFlattenedIndex); } template static void test_parallel_single_direction_test(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int n0 = dims[0]; int n1 = dims[1]; int n2 = dims[2]; ViewType v("v", n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(n0, n1, n2); Kokkos::parallel_for( Kokkos::TeamPolicy(1, Kokkos::AUTO), KOKKOS_LAMBDA(const TeamType& team) { auto teamRange = Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2); Kokkos::parallel_for(teamRange, [=](int i, int j, int k) { v(i, j, k) += fillFlattenedIndex(i, j, k); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_3D(h_view, fillFlattenedIndex); } }; template struct TestThreadVectorMDRangeParallelFor : public TestTeamMDParallelFor { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_for_4D_ThreadVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto teamRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2); Kokkos::parallel_for(teamThreadRange, [=](int i) { Kokkos::parallel_for(teamRange, [=](int j, int k) { v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k); }); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_4D(h_view, fillFlattenedIndex); } template static void test_parallel_for_5D_ThreadVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto teamRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3); Kokkos::parallel_for(teamThreadRange, [=](int i) { Kokkos::parallel_for(teamRange, [=](int j, int k, int l) { v(leagueRank, i, j, k, l) += fillFlattenedIndex(leagueRank, i, j, k, l); }); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_5D(h_view, fillFlattenedIndex); } template static void test_parallel_for_6D_ThreadVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto teamRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4); Kokkos::parallel_for(teamThreadRange, [=](int i) { Kokkos::parallel_for(teamRange, [=](int j, int k, int l, int m) { v(leagueRank, i, j, k, l, m) += fillFlattenedIndex(leagueRank, i, j, k, l, m); }); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_6D(h_view, fillFlattenedIndex); } template static void test_parallel_for_7D_ThreadVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto teamRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4, n5); Kokkos::parallel_for(teamThreadRange, [=](int i) { Kokkos::parallel_for( teamRange, [=](int j, int k, int l, int m, int n) { v(leagueRank, i, j, k, l, m, n) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n); }); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_7D(h_view, fillFlattenedIndex); } template static void test_parallel_for_8D_ThreadVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto teamRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for(teamThreadRange, [=](int i) { Kokkos::parallel_for( teamRange, [=](int j, int k, int l, int m, int n, int o) { v(leagueRank, i, j, k, l, m, n, o) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); }); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_8D(h_view, fillFlattenedIndex); } }; template struct TestTeamVectorMDRangeParallelFor : public TestTeamMDParallelFor { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_for_3D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; ViewType v("v", leagueSize, n0, n1); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1); Kokkos::parallel_for(teamRange, [=](int i, int j) { v(leagueRank, i, j) += fillFlattenedIndex(leagueRank, i, j); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_3D(h_view, fillFlattenedIndex); } template static void test_parallel_for_4D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2); Kokkos::parallel_for(teamRange, [=](int i, int j, int k) { v(leagueRank, i, j, k) += fillFlattenedIndex(leagueRank, i, j, k); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_4D(h_view, fillFlattenedIndex); } template static void test_parallel_for_5D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3); Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) { v(leagueRank, i, j, k, l) += fillFlattenedIndex(leagueRank, i, j, k, l); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_5D(h_view, fillFlattenedIndex); } template static void test_parallel_for_6D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m) { v(leagueRank, i, j, k, l, m) += fillFlattenedIndex(leagueRank, i, j, k, l, m); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_6D(h_view, fillFlattenedIndex); } template static void test_parallel_for_7D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m, int n) { v(leagueRank, i, j, k, l, m, n) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_7D(h_view, fillFlattenedIndex); } template static void test_parallel_for_8D_TeamVectorMDRange(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(const TeamType& team) { int leagueRank = team.league_rank(); auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for( teamRange, [=](int i, int j, int k, int l, int m, int n, int o) { v(leagueRank, i, j, k, l, m, n, o) += fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_8D(h_view, fillFlattenedIndex); } template static void test_parallel_double_direction_test(DimsType const& dims) { using ViewType = typename Kokkos::View; using HostViewType = typename ViewType::HostMirror; int n0 = dims[0]; int n1 = dims[1]; int n2 = dims[2]; int n3 = dims[3]; ViewType v("v", n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::TeamPolicy(1, Kokkos::AUTO), KOKKOS_LAMBDA(const TeamType& team) { auto teamRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3); Kokkos::parallel_for(teamRange, [=](int i, int j, int k, int l) { v(i, j, k, l) += fillFlattenedIndex(i, j, k, l); }); }); HostViewType h_view = Kokkos::create_mirror_view_and_copy( typename HostViewType::traits::memory_space(), v); check_result_4D(h_view, fillFlattenedIndex); } }; struct TestTeamMDParallelReduce { using DataType = int64_t; using DimsType = int[8]; template constexpr static DataType get_expected_partial_sum(DimsType const& dims, size_t maxRank, F const& f, DimsType& indices, size_t rank) { if (rank == maxRank) { return f(indices[0], indices[1], indices[2], indices[3], indices[4], indices[5], indices[6], indices[7]); } auto& index = indices[rank]; DataType accValue = 0; for (index = 0; index < dims[rank]; ++index) { accValue += get_expected_partial_sum(dims, maxRank, f, indices, rank + 1); } return accValue; } template static DataType get_expected_sum(DimsType const& dims, size_t maxRank, F const& f) { DimsType indices = {}; return get_expected_partial_sum(dims, maxRank, f, indices, 0); } }; template struct TestTeamThreadMDRangeParallelReduce : public TestTeamMDParallelReduce { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_reduce_for_3D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; ViewType v("v", leagueSize, n0, n1); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1); Kokkos::parallel_for( Kokkos::MDRangePolicy>({0, 0, 0}, {leagueSize, n0, n1}), KOKKOS_LAMBDA(const int i, const int j, const int k) { v(i, j, k) = fillFlattenedIndex(i, j, k); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1), [=](const int& i, const int& j, DataType& threadSum) { threadSum += v(leagueRank, i, j); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 3, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_4D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0}, {leagueSize, n0, n1, n2}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { v(i, j, k, l) = fillFlattenedIndex(i, j, k, l); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2), [=](const int& i, const int& j, const int& k, DataType& threadSum) { threadSum += v(leagueRank, i, j, k); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_5D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m) { v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3), [=](const int& i, const int& j, const int& k, const int& l, DataType& threadSum) { threadSum += v(leagueRank, i, j, k, l); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_6D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m, const int n) { v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4), [=](const int& i, const int& j, const int& k, const int& l, const int& m, DataType& threadSum) { threadSum += v(leagueRank, i, j, k, l, m); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } // MDRangePolicy only allows up to rank of 6. Because of this, expectedSum // array had to be constructed from a nested parallel_for loop. template static void test_parallel_reduce_for_7D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { v(leagueRank, i, j, k, l, m, n) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n); } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5), [=](const int& i, const int& j, const int& k, const int& l, const int& m, const int& n, DataType& threadSum) { threadSum += v(leagueRank, i, j, k, l, m, n); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_8D_TeamThreadMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { for (int o = 0; o < n6; ++o) { v(leagueRank, i, j, k, l, m, n, o) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); } } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; Kokkos::parallel_reduce( Kokkos::TeamThreadMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5, n6), [=](const int& i, const int& j, const int& k, const int& l, const int& m, const int& n, const int& o, DataType& threadSum) { threadSum += v(leagueRank, i, j, k, l, m, n, o); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } }; template struct TestThreadVectorMDRangeParallelReduce : public TestTeamMDParallelReduce { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_reduce_for_4D_ThreadVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0}, {leagueSize, n0, n1, n2}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { v(i, j, k, l) = fillFlattenedIndex(i, j, k, l); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto threadVectorRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2); Kokkos::parallel_for(teamThreadRange, [=, &leagueSum](const int& i) { DataType threadSum = 0; Kokkos::parallel_reduce( threadVectorRange, [=](const int& j, const int& k, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k); }, threadSum); Kokkos::single(Kokkos::PerThread(team), [&]() { leagueSum += threadSum; }); }); }, finalSum); DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_5D_ThreadVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m) { v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto threadVectorRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3); Kokkos::parallel_for(teamThreadRange, [=, &leagueSum](const int& i) { DataType threadSum = 0; Kokkos::parallel_reduce( threadVectorRange, [=](const int& j, const int& k, const int& l, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l); }, threadSum); Kokkos::single(Kokkos::PerThread(team), [&]() { leagueSum += threadSum; }); }); }, finalSum); DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_6D_ThreadVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m, const int n) { v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto threadVectorRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4); Kokkos::parallel_for(teamThreadRange, [=, &leagueSum](const int& i) { DataType threadSum = 0; Kokkos::parallel_reduce( threadVectorRange, [=](const int& j, const int& k, const int& l, const int& m, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m); }, threadSum); Kokkos::single(Kokkos::PerThread(team), [&]() { leagueSum += threadSum; }); }); }, finalSum); DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_7D_ThreadVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { v(leagueRank, i, j, k, l, m, n) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n); } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto threadVectorRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4, n5); Kokkos::parallel_for(teamThreadRange, [=, &leagueSum](const int& i) { DataType threadSum = 0; Kokkos::parallel_reduce( threadVectorRange, [=](const int& j, const int& k, const int& l, const int& m, const int& n, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m, n); }, threadSum); Kokkos::single(Kokkos::PerThread(team), [&]() { leagueSum += threadSum; }); }); }, finalSum); DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_8D_ThreadVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { for (int o = 0; o < n6; ++o) { v(leagueRank, i, j, k, l, m, n, o) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); } } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); auto teamThreadRange = Kokkos::TeamThreadRange(team, n0); auto threadVectorRange = Kokkos::ThreadVectorMDRange, TeamType>( team, n1, n2, n3, n4, n5, n6); Kokkos::parallel_for(teamThreadRange, [=, &leagueSum](const int& i) { DataType threadSum = 0; Kokkos::parallel_reduce( threadVectorRange, [=](const int& j, const int& k, const int& l, const int& m, const int& n, const int& o, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m, n, o); }, threadSum); Kokkos::single(Kokkos::PerThread(team), [&]() { leagueSum += threadSum; }); }); }, finalSum); DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } }; template struct TestTeamVectorMDRangeParallelReduce : public TestTeamMDParallelReduce { using TeamType = typename Kokkos::TeamPolicy::member_type; template static void test_parallel_reduce_for_4D_TeamVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; ViewType v("v", leagueSize, n0, n1, n2); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0}, {leagueSize, n0, n1, n2}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { v(i, j, k, l) = fillFlattenedIndex(i, j, k, l); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; auto teamVectorRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2); Kokkos::parallel_reduce( teamVectorRange, [=](const int& i, const int& j, const int& k, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 4, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_5D_TeamVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; ViewType v("v", leagueSize, n0, n1, n2, n3); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m) { v(i, j, k, l, m) = fillFlattenedIndex(i, j, k, l, m); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; auto teamVectorRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3); Kokkos::parallel_reduce( teamVectorRange, [=](const int& i, const int& j, const int& k, const int& l, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 5, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_6D_TeamVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4); Kokkos::parallel_for( Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}), KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, const int m, const int n) { v(i, j, k, l, m, n) = fillFlattenedIndex(i, j, k, l, m, n); }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; auto teamVectorRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4); Kokkos::parallel_reduce( teamVectorRange, [=](const int& i, const int& j, const int& k, const int& l, const int& m, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 6, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_7D_TeamVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { v(leagueRank, i, j, k, l, m, n) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n); } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; auto teamVectorRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5); Kokkos::parallel_reduce( teamVectorRange, [=](const int& i, const int& j, const int& k, const int& l, const int& m, const int& n, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m, n); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 7, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } template static void test_parallel_reduce_for_8D_TeamVectorMDRange( DimsType const& dims) { using ViewType = typename Kokkos::View; int leagueSize = dims[0]; int n0 = dims[1]; int n1 = dims[2]; int n2 = dims[3]; int n3 = dims[4]; int n4 = dims[5]; int n5 = dims[6]; int n6 = dims[7]; ViewType v("v", leagueSize, n0, n1, n2, n3, n4, n5, n6); FillFlattenedIndex fillFlattenedIndex(leagueSize, n0, n1, n2, n3, n4, n5, n6); auto mdRangePolicy = Kokkos::MDRangePolicy>( {0, 0, 0, 0, 0, 0}, {leagueSize, n0, n1, n2, n3, n4}); Kokkos::parallel_for( mdRangePolicy, KOKKOS_LAMBDA(const int leagueRank, const int i, const int j, const int k, const int l, const int m) { for (int n = 0; n < n5; ++n) { for (int o = 0; o < n6; ++o) { v(leagueRank, i, j, k, l, m, n, o) = fillFlattenedIndex(leagueRank, i, j, k, l, m, n, o); } } }); DataType finalSum; Kokkos::parallel_reduce( Kokkos::TeamPolicy(leagueSize, Kokkos::AUTO, #ifdef KOKKOS_ENABLE_OPENMPTARGET 2 #else Kokkos::TeamPolicy< ExecSpace>::vector_length_max() #endif ), KOKKOS_LAMBDA(TeamType const& team, DataType& leagueSum) { auto leagueRank = team.league_rank(); DataType teamSum; auto teamVectorRange = Kokkos::TeamVectorMDRange, TeamType>( team, n0, n1, n2, n3, n4, n5, n6); Kokkos::parallel_reduce( teamVectorRange, [=](const int& i, const int& j, const int& k, const int& l, const int& m, const int& n, const int& o, DataType& vectorSum) { vectorSum += v(leagueRank, i, j, k, l, m, n, o); }, teamSum); // FIXME_OPENMPTARGET #ifdef KOKKOS_ENABLE_OPENMPTARGET if (team.team_rank() == 0) leagueSum += teamSum; #else Kokkos::single(Kokkos::PerTeam(team), [&]() { leagueSum += teamSum; }); #endif }, finalSum); DataType expectedSum = get_expected_sum(dims, 8, fillFlattenedIndex); EXPECT_EQ(finalSum, expectedSum); } }; /*--------------------------------------------------------------------------*/ constexpr auto Left = Kokkos::Iterate::Left; constexpr auto Right = Kokkos::Iterate::Right; // Using prime numbers makes debugging easier // small dimensions were needed for larger dimensions to reduce test run time int dims[] = {3, 5, 7, 11, 13, 17, 19, 23}; int smallDims[] = {2, 3, 2, 3, 5, 2, 3, 5}; TEST(TEST_CATEGORY, TeamThreadMDRangeParallelFor) { TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_3D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_3D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_7D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_7D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_8D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_8D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_single_direction_test(dims); TestTeamThreadMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_single_direction_test(dims); } TEST(TEST_CATEGORY, ThreadVectorMDRangeParallelFor) { TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelFor:: test_parallel_for_7D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelFor:: test_parallel_for_7D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelFor:: test_parallel_for_8D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelFor:: test_parallel_for_8D_ThreadVectorMDRange(smallDims); } TEST(TEST_CATEGORY, TeamVectorMDRangeParallelFor) { TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_3D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_3D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_4D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_5D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_6D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_7D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_7D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_8D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_for_8D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_double_direction_test(dims); TestTeamVectorMDRangeParallelFor< TEST_EXECSPACE>::test_parallel_double_direction_test(dims); } TEST(TEST_CATEGORY, TeamThreadMDRangeParallelReduce) { TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_3D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_3D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_4D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_4D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_5D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_5D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_6D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_6D_TeamThreadMDRange(dims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_7D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_7D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_8D_TeamThreadMDRange(smallDims); TestTeamThreadMDRangeParallelReduce:: test_parallel_reduce_for_8D_TeamThreadMDRange(smallDims); } TEST(TEST_CATEGORY, ThreadVectorMDRangeParallelReduce) { // FIXME_SYCL sycl::group_barrier doesn't work correctly for non-Intel GPUs #if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU) if (std::is_same_v) GTEST_SKIP() << "skipping because of bug in group_barrier implementation"; #endif TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_4D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_4D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_5D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_5D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_6D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_6D_ThreadVectorMDRange(dims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_7D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_7D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_8D_ThreadVectorMDRange(smallDims); TestThreadVectorMDRangeParallelReduce:: test_parallel_reduce_for_8D_ThreadVectorMDRange(smallDims); } TEST(TEST_CATEGORY, TeamVectorMDRangeParallelReduce) { // FIXME_SYCL sycl::group_barrier doesn't work correctly for non-Intel GPUs #if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU) if (std::is_same_v) GTEST_SKIP() << "skipping because of bug in group_barrier implementation"; #endif TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_4D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_4D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_5D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_5D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_6D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_6D_TeamVectorMDRange(dims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_7D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_7D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_8D_TeamVectorMDRange(smallDims); TestTeamVectorMDRangeParallelReduce:: test_parallel_reduce_for_8D_TeamVectorMDRange(smallDims); } #endif } // namespace TeamMDRange } // namespace Test