//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include // This file is largely duplicating TestRange.hpp but it applies // Kokkos::Experimental require at every place where a parallel // operation is executed. namespace Test { namespace { template struct TestRangeRequire { using value_type = int; ///< alias required for the parallel_reduce using view_type = Kokkos::View; view_type m_flags; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; struct OffsetTag {}; struct VerifyOffsetTag {}; int N; static const int offset = 13; TestRangeRequire(const size_t N_) : m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), N_), N(N_) {} void test_for() { typename view_type::HostMirror host_flags = Kokkos::create_mirror_view(m_flags); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); { using ThisType = TestRangeRequire; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name()); } Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); { using ThisType = TestRangeRequire; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + typeid(VerifyInitTag).name()); } Kokkos::deep_copy(host_flags, m_flags); int error_count = 0; for (int i = 0; i < N; ++i) { if (int(i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (int(2 * i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(offset, N + offset), Property()), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::Experimental::require( Kokkos::RangePolicy, VerifyOffsetTag>(0, N), Property()), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (i + offset != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); } KOKKOS_INLINE_FUNCTION void operator()(const int i) const { m_flags(i) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyInitTag &, const int i) const { if (i != m_flags(i)) { Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const ResetTag &, const int i) const { m_flags(i) = 2 * m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const VerifyResetTag &, const int i) const { if (2 * i != m_flags(i)) { Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i) const { m_flags(i - offset) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { Kokkos::printf("TestRangeRequire::test_for error at %d != %d\n", i + offset, m_flags(i)); } } //---------------------------------------- void test_reduce() { int total = 0; Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this, total); // sum( 0 .. N-1 ) ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); Kokkos::parallel_reduce( Kokkos::Experimental::require( Kokkos::RangePolicy(offset, N + offset), Property()), *this, total); // sum( 1 .. N ) ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total)); } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &update) const { update += m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i, value_type &update) const { update += 1 + m_flags(i - offset); } //---------------------------------------- void test_dynamic_policy() { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) auto const N_no_implicit_capture = N; using policy_t = Kokkos::RangePolicy >; int const concurrency = ExecSpace().concurrency(); { Kokkos::View > count("Count", concurrency); Kokkos::View a("A", N); Kokkos::parallel_for( policy_t(0, N), KOKKOS_LAMBDA(const int &i) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; }); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((concurrency > 1) && (N > 4 * concurrency)) { size_t min = N; size_t max = 0; for (int t = 0; t < concurrency; t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( concurrency > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } { Kokkos::View > count("Count", concurrency); Kokkos::View a("A", N); int sum = 0; Kokkos::parallel_reduce( policy_t(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; lsum++; }, sum); ASSERT_EQ(sum, N); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((concurrency > 1) && (N > 4 * concurrency)) { size_t min = N; size_t max = 0; for (int t = 0; t < concurrency; t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( concurrency > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } #endif } }; } // namespace TEST(TEST_CATEGORY, range_for_require) { using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_for(); } { TestRangeRequire, Property> f(0); f.test_for(); } { TestRangeRequire, Property> f(2); f.test_for(); } { TestRangeRequire, Property> f(3); f.test_for(); } { TestRangeRequire, Property> f(1000); f.test_for(); } { TestRangeRequire, Property> f(1001); f.test_for(); } } TEST(TEST_CATEGORY, range_reduce_require) { using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_reduce(); } { TestRangeRequire, Property> f(0); f.test_reduce(); } { TestRangeRequire, Property> f(2); f.test_reduce(); } { TestRangeRequire, Property> f(3); f.test_reduce(); } { TestRangeRequire, Property> f(1000); f.test_reduce(); } { TestRangeRequire, Property> f(1001); f.test_reduce(); } } #ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, range_dynamic_policy_require) { #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ !defined(KOKKOS_ENABLE_SYCL) using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_dynamic_policy(); } { TestRangeRequire, Property> f(3); f.test_dynamic_policy(); } { TestRangeRequire, Property> f(1001); f.test_dynamic_policy(); } #endif } #endif } // namespace Test