//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include namespace Test { namespace { template struct TestRange { using value_type = int; ///< alias required for the parallel_reduce using view_type = Kokkos::View; view_type m_flags; view_type result_view; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; struct OffsetTag {}; struct VerifyOffsetTag {}; int N; #ifndef KOKKOS_WORKAROUND_OPENMPTARGET_GCC static const int offset = 13; #else int offset; #endif TestRange(const size_t N_) : m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), N_), result_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, "results"), N_), N(N_) { #ifdef KOKKOS_WORKAROUND_OPENMPTARGET_GCC offset = 13; #endif } void test_for() { typename view_type::HostMirror host_flags = Kokkos::create_mirror_view(m_flags); Kokkos::parallel_for(Kokkos::RangePolicy(0, N), *this); { using ThisType = TestRange; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name()); } Kokkos::parallel_for( Kokkos::RangePolicy(0, N), *this); { using ThisType = TestRange; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + typeid(VerifyInitTag).name()); } Kokkos::deep_copy(host_flags, m_flags); int error_count = 0; for (int i = 0; i < N; ++i) { if (int(i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::RangePolicy(0, N), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (int(2 * i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::RangePolicy(offset, N + offset), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::RangePolicy(0, N), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (i + offset != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); } KOKKOS_INLINE_FUNCTION void operator()(const int i) const { m_flags(i) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyInitTag &, const int i) const { if (i != m_flags(i)) { Kokkos::printf("TestRange::test_for_error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const ResetTag &, const int i) const { m_flags(i) = 2 * m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const VerifyResetTag &, const int i) const { if (2 * i != m_flags(i)) { Kokkos::printf("TestRange::test_for_error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i) const { m_flags(i - offset) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { Kokkos::printf("TestRange::test_for_error at %d != %d\n", i + offset, m_flags(i)); } } //---------------------------------------- void test_reduce() { value_type total = 0; Kokkos::parallel_for(Kokkos::RangePolicy(0, N), *this); Kokkos::parallel_reduce("TestKernelReduce", Kokkos::RangePolicy(0, N), *this, total); // sum( 0 .. N-1 ) ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); Kokkos::parallel_reduce( "TestKernelReduce_long", Kokkos::RangePolicy(0, N), *this, total); // sum( 0 .. N-1 ) ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); Kokkos::parallel_reduce( Kokkos::RangePolicy(offset, N + offset), *this, total); // sum( 1 .. N ) ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total)); } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &update) const { update += m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i, value_type &update) const { update += 1 + m_flags(i - offset); } void test_dynamic_policy() { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) auto const N_no_implicit_capture = N; using policy_t = Kokkos::RangePolicy >; int const concurrency = ExecSpace().concurrency(); { Kokkos::View > count("Count", concurrency); Kokkos::View a("A", N); Kokkos::parallel_for( policy_t(0, N), KOKKOS_LAMBDA(const int &i) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; }); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, value_type &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((concurrency > 1) && (N > 4 * concurrency)) { size_t min = N; size_t max = 0; for (int t = 0; t < concurrency; t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( concurrency > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } { Kokkos::View > count("Count", concurrency); Kokkos::View a("A", N); value_type sum = 0; Kokkos::parallel_reduce( policy_t(0, N), KOKKOS_LAMBDA(const int &i, value_type &lsum) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; lsum++; }, sum); ASSERT_EQ(sum, N); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, value_type &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((concurrency > 1) && (N > 4 * concurrency)) { size_t min = N; size_t max = 0; for (int t = 0; t < concurrency; t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( concurrency > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } #endif } }; } // namespace TEST(TEST_CATEGORY, range_for) { { TestRange > f(0); f.test_for(); } { TestRange > f(0); f.test_for(); } { TestRange > f(2); f.test_for(); } { TestRange > f(3); f.test_for(); } { TestRange > f(1000); f.test_for(); } { TestRange > f(1001); f.test_for(); } } TEST(TEST_CATEGORY, range_reduce) { { TestRange > f(0); f.test_reduce(); } { TestRange > f(0); f.test_reduce(); } { TestRange > f(2); f.test_reduce(); } { TestRange > f(3); f.test_reduce(); } { TestRange > f(1000); f.test_reduce(); } { TestRange > f(1001); f.test_reduce(); } } #ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, range_dynamic_policy) { #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ !defined(KOKKOS_ENABLE_SYCL) { TestRange > f(0); f.test_dynamic_policy(); } { TestRange > f(3); f.test_dynamic_policy(); } { TestRange > f(1001); f.test_dynamic_policy(); } #endif } #endif } // namespace Test