//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include #include namespace Test { template struct NoOpReduceFunctor { KOKKOS_FUNCTION void operator()(int, ValueType&) const { Kokkos::abort("Should never be called!"); } KOKKOS_FUNCTION void operator()(int, int, ValueType&) const { Kokkos::abort("Should never be called!"); } KOKKOS_FUNCTION void operator()( const typename Kokkos::TeamPolicy::member_type&, ValueType&) const { Kokkos::abort("Should never be called!"); } }; template struct CountTestFunctor { using value_type = int; template using atomic_view = Kokkos::View>; atomic_view count; atomic_view bugs; int expected_count_min; int expected_count_max; template KOKKOS_FUNCTION void operator()(Ts&&...) const noexcept { bugs() += int(count() > expected_count_max || count() < expected_count_min); count()++; } }; template struct SetViewToValueFunctor { using value_type = T; using view_type = Kokkos::View>; view_type v; T value; template KOKKOS_FUNCTION void operator()(Ts&&...) const noexcept { v() = value; } }; template struct SetResultToViewFunctor { using value_type = T; using view_type = Kokkos::View>; view_type v; template KOKKOS_FUNCTION void operator()(U&&, value_type& val) const noexcept { val += v(); } }; struct TEST_CATEGORY_FIXTURE(graph) : public ::testing::Test { public: using count_functor = CountTestFunctor; using set_functor = SetViewToValueFunctor; using set_result_functor = SetResultToViewFunctor; using view_type = Kokkos::View; using atomic_view_type = typename count_functor::template atomic_view; using view_host = Kokkos::View; atomic_view_type count{"count"}; atomic_view_type bugs{"bugs"}; view_host count_host{"count_host"}; view_host bugs_host{"bugs_host"}; TEST_EXECSPACE ex{}; protected: void SetUp() override { Kokkos::deep_copy(ex, count, 0); Kokkos::deep_copy(ex, bugs, 0); ex.fence(); } }; TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one) { auto graph = Kokkos::Experimental::create_graph([&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); }); graph.submit(); Kokkos::deep_copy(graph.get_execution_space(), count_host, count); Kokkos::deep_copy(graph.get_execution_space(), bugs_host, bugs); graph.get_execution_space().fence(); ASSERT_EQ(1, count_host()); ASSERT_EQ(0, bugs_host()); } TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one_rvalue) { Kokkos::Experimental::create_graph(ex, [&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); }).submit(); Kokkos::deep_copy(ex, count_host, count); Kokkos::deep_copy(ex, bugs_host, bugs); ex.fence(); ASSERT_EQ(1, count_host()); ASSERT_EQ(0, bugs_host()); } TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_six) { #ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET team_size incompatible if (std::is_same_v) GTEST_SKIP() << "skipping since OpenMPTarget can't use team_size 1"; #endif #if defined(KOKKOS_ENABLE_SYCL) // FIXME_SYCL if (std::is_same_v) GTEST_SKIP() << "skipping since test case is known to fail with SYCL"; #endif auto graph = Kokkos::Experimental::create_graph(ex, [&](auto root) { auto f_setup_count = root.then_parallel_for(1, set_functor{count, 0}); auto f_setup_bugs = root.then_parallel_for(1, set_functor{bugs, 0}); //---------------------------------------- auto ready = Kokkos::Experimental::when_all(f_setup_count, f_setup_bugs); //---------------------------------------- ready.then_parallel_for(1, count_functor{count, bugs, 0, 6}); //---------------------------------------- ready.then_parallel_for(Kokkos::RangePolicy{0, 1}, count_functor{count, bugs, 0, 6}); //---------------------------------------- ready.then_parallel_for( Kokkos::MDRangePolicy>{{0, 0}, {1, 1}}, count_functor{count, bugs, 0, 6}); //---------------------------------------- ready.then_parallel_for(Kokkos::TeamPolicy{1, 1}, count_functor{count, bugs, 0, 6}); //---------------------------------------- ready.then_parallel_for(2, count_functor{count, bugs, 0, 6}); //---------------------------------------- }); graph.submit(); Kokkos::deep_copy(ex, count_host, count); Kokkos::deep_copy(ex, bugs_host, bugs); ex.fence(); ASSERT_EQ(6, count_host()); ASSERT_EQ(0, bugs_host()); } TEST_F(TEST_CATEGORY_FIXTURE(graph), when_all_cycle) { view_type reduction_out{"reduction_out"}; view_host reduction_host{"reduction_host"}; Kokkos::Experimental::create_graph(ex, [&](auto root) { //---------------------------------------- // Test when_all when redundant dependencies are given auto f1 = root.then_parallel_for(1, set_functor{count, 0}); auto f2 = f1.then_parallel_for(1, count_functor{count, bugs, 0, 0}); auto f3 = f2.then_parallel_for(5, count_functor{count, bugs, 1, 5}); auto f4 = Kokkos::Experimental::when_all(f2, f3).then_parallel_for( 1, count_functor{count, bugs, 6, 6}); Kokkos::Experimental::when_all(f1, f4, f3) .then_parallel_reduce(6, set_result_functor{count}, reduction_out); //---------------------------------------- }).submit(); Kokkos::deep_copy(ex, bugs_host, bugs); Kokkos::deep_copy(ex, count_host, count); Kokkos::deep_copy(ex, reduction_host, reduction_out); ex.fence(); ASSERT_EQ(0, bugs_host()); ASSERT_EQ(7, count_host()); ASSERT_EQ(42, reduction_host()); //---------------------------------------- } // This test is disabled because we don't currently support copying to host, // even asynchronously. We _may_ want to do that eventually? TEST_F(TEST_CATEGORY_FIXTURE(graph), DISABLED_repeat_chain) { auto graph = Kokkos::Experimental::create_graph( ex, [&, count_host = count_host](auto root) { //---------------------------------------- root.then_parallel_for(1, set_functor{count, 0}) .then_parallel_for(1, count_functor{count, bugs, 0, 0}) .then_parallel_for(1, count_functor{count, bugs, 1, 1}) .then_parallel_reduce(1, set_result_functor{count}, count_host) .then_parallel_reduce( 1, set_result_functor{bugs}, Kokkos::Sum{bugs_host}); //---------------------------------------- }); //---------------------------------------- constexpr int repeats = 10; for (int i = 0; i < repeats; ++i) { graph.submit(); ex.fence(); EXPECT_EQ(2, count_host()); EXPECT_EQ(0, bugs_host()); } //---------------------------------------- } TEST_F(TEST_CATEGORY_FIXTURE(graph), zero_work_reduce) { auto graph = Kokkos::Experimental::create_graph( ex, [&](Kokkos::Experimental::GraphNodeRef root) { NoOpReduceFunctor no_op_functor; root.then_parallel_reduce(Kokkos::RangePolicy(0, 0), no_op_functor, count) #if !defined(KOKKOS_ENABLE_SYCL) // FIXME_SYCL #if !defined(KOKKOS_ENABLE_CUDA) && \ !defined(KOKKOS_ENABLE_HIP) // FIXME_CUDA FIXME_HIP .then_parallel_reduce( Kokkos::MDRangePolicy>{{0, 0}, {0, 0}}, no_op_functor, count) #endif .then_parallel_reduce( Kokkos::TeamPolicy{0, Kokkos::AUTO}, no_op_functor, count) #endif ; }); // These fences are only necessary because of the weirdness of how CUDA // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ (defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL)) Kokkos::fence(); #endif graph.submit(); Kokkos::deep_copy(ex, count, 1); // These fences are only necessary because of the weirdness of how CUDA // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ (defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL)) if constexpr (std::is_same_v) Kokkos::fence(); #endif #ifdef KOKKOS_ENABLE_HPX // FIXME_HPX graph.submit() isn't properly enqueued if constexpr (std::is_same_v) Kokkos::fence(); #endif graph.submit(); Kokkos::deep_copy(ex, count_host, count); ex.fence(); ASSERT_EQ(count_host(), 0); } } // end namespace Test