//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_TEST_SCATTER_VIEW_HPP #define KOKKOS_TEST_SCATTER_VIEW_HPP #include #include namespace Test { template struct test_scatter_view_impl_cls; template struct test_scatter_view_impl_cls { public: using scatter_view_type = Kokkos::Experimental::ScatterView; using orig_view_type = Kokkos::View; using size_type = typename Kokkos::HostSpace::size_type; scatter_view_type scatter_view; int scatterSize; test_scatter_view_impl_cls(const scatter_view_type& view) { scatter_view = view; scatterSize = 0; } void initialize(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); Kokkos::deep_copy(host_view, 0); Kokkos::fence(); Kokkos::deep_copy(orig, host_view); } void run_parallel(int n) { scatterSize = n; auto policy = Kokkos::RangePolicy(0, n); Kokkos::parallel_for("scatter_view_test: Sum", policy, *this); } KOKKOS_INLINE_FUNCTION void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 10; ++j) { auto k = (i + j) % scatterSize; scatter_access(k, 0) += 4; ++scatter_access(k, 1); --scatter_access(k, 2); scatter_access(k, 3)++; scatter_access(k, 4)--; scatter_access(k, 5) -= 5; scatter_access_atomic(k, 6) += 2; scatter_access_atomic(k, 7)++; scatter_access_atomic(k, 8)--; --scatter_access_atomic(k, 9); ++scatter_access_atomic(k, 10); scatter_access(k, 11) -= 3; } } void validateResults(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { for (size_type j = 0; j < host_view.extent(1); ++j) { EXPECT_NEAR(host_view(i, j), NumberType(ref[j]), 1e-14) << "Data differs at indices " << i << ", " << j; } } } // check for correct padding void validateResultsForSubview( orig_view_type orig, std::pair& subRangeDim0, std::pair& subRangeDim1) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { for (size_type j = 0; j < host_view.extent(1); ++j) { auto val = host_view(i, j); if ((i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) && (j >= std::get<0>(subRangeDim1) && j < std::get<1>(subRangeDim1))) { // is in subview EXPECT_NEAR(val, NumberType(ref[j]), 1e-14) << "Data differs at indices " << i << ", " << j; } else { // is outside of subview EXPECT_NEAR(val, NumberType(0), 1e-14) << "Data differs at indices " << i << ", " << j; } } } } private: NumberType ref[12] = {80, 20, -20, 20, -20, -100, 40, 20, -20, -20, 20, -60}; }; template struct test_scatter_view_impl_cls { public: using scatter_view_type = Kokkos::Experimental::ScatterView; using orig_view_type = Kokkos::View; using size_type = typename Kokkos::HostSpace::size_type; scatter_view_type scatter_view; int scatterSize; test_scatter_view_impl_cls(const scatter_view_type& view) { scatter_view = view; scatterSize = 0; } void initialize(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { host_view(i, 0) = 1.0; host_view(i, 1) = 1.0; host_view(i, 2) = 1.0; } Kokkos::fence(); Kokkos::deep_copy(orig, host_view); } void run_parallel(int n) { scatterSize = n; auto policy = Kokkos::RangePolicy(0, n); Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 4; ++j) { auto k = (i + j) % scatterSize; scatter_access(k, 0) *= 4.0; scatter_access_atomic(k, 1) *= 2.0; scatter_access(k, 2) *= 1.0; } } void validateResults(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); EXPECT_NEAR(val0, 65536.0, 1e-14 * 65536.0) << "Data differs at index " << i; EXPECT_NEAR(val1, 256.0, 1e-14 * 256.0) << "Data differs at index " << i; EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } } // check for correct padding void validateResultsForSubview( orig_view_type orig, std::pair& subRangeDim0, std::pair& subRangeDim1) { (void)subRangeDim1; auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview EXPECT_NEAR(val0, 65536.0, 1e-14 * 65536.0); EXPECT_NEAR(val1, 256.0, 1e-14 * 256.0); EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0); } else { // is outside of subview EXPECT_NEAR(val0, NumberType(1), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val1, NumberType(1), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val2, NumberType(1), 1e-14) << "Data differs at index " << i; } } } }; template struct test_scatter_view_impl_cls { public: using scatter_view_type = Kokkos::Experimental::ScatterView; using orig_view_type = Kokkos::View; using size_type = typename Kokkos::HostSpace::size_type; scatter_view_type scatter_view; int scatterSize; test_scatter_view_impl_cls(const scatter_view_type& view) { scatter_view = view; scatterSize = 0; } void initialize(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { host_view(i, 0) = 999999.0; host_view(i, 1) = 999999.0; host_view(i, 2) = 999999.0; } Kokkos::fence(); Kokkos::deep_copy(orig, host_view); } void run_parallel(int n) { scatterSize = n; auto policy = Kokkos::RangePolicy(0, n); Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 4; ++j) { auto k = (i + j) % scatterSize; scatter_access(k, 0).update((NumberType)(j + 1) * 4); scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0); scatter_access(k, 2).update((NumberType)(j + 1) * 1.0); } } void validateResults(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); EXPECT_NEAR(val0, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; EXPECT_NEAR(val1, 2.0, 1e-14 * 2.0) << "Data differs at index " << i; EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } } // check for correct padding void validateResultsForSubview( orig_view_type orig, std::pair& subRangeDim0, std::pair& subRangeDim1) { (void)subRangeDim1; auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview EXPECT_NEAR(val0, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; EXPECT_NEAR(val1, 2.0, 1e-14 * 2.0) << "Data differs at index " << i; EXPECT_NEAR(val2, 1.0, 1e-14 * 1.0) << "Data differs at index " << i; } else { // is outside of subview EXPECT_NEAR(val0, NumberType(999999), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val1, NumberType(999999), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val2, NumberType(999999), 1e-14) << "Data differs at index " << i; } } } }; template struct test_scatter_view_impl_cls { public: using scatter_view_type = Kokkos::Experimental::ScatterView; using orig_view_type = Kokkos::View; using size_type = typename Kokkos::HostSpace::size_type; scatter_view_type scatter_view; int scatterSize; test_scatter_view_impl_cls(const scatter_view_type& view) { scatter_view = view; scatterSize = 0; } void initialize(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { host_view(i, 0) = 0.0; host_view(i, 1) = 0.0; host_view(i, 2) = 0.0; } Kokkos::fence(); Kokkos::deep_copy(orig, host_view); } void run_parallel(int n) { scatterSize = n; Kokkos::RangePolicy policy(0, n); Kokkos::parallel_for("scatter_view_test: Prod", policy, *this); } KOKKOS_INLINE_FUNCTION void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 4; ++j) { auto k = (i + j) % scatterSize; scatter_access(k, 0).update((NumberType)(j + 1) * 4); scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0); scatter_access(k, 2).update((NumberType)(j + 1) * 1.0); } } void validateResults(orig_view_type orig) { auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); EXPECT_NEAR(val0, 16.0, 1e-14 * 16.0) << "Data differs at index " << i; EXPECT_NEAR(val1, 8.0, 1e-14 * 8.0) << "Data differs at index " << i; EXPECT_NEAR(val2, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; } } // check for correct padding void validateResultsForSubview( orig_view_type orig, std::pair& subRangeDim0, std::pair& subRangeDim1) { (void)subRangeDim1; auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); Kokkos::fence(); for (size_type i = 0; i < host_view.extent(0); ++i) { auto val0 = host_view(i, 0); auto val1 = host_view(i, 1); auto val2 = host_view(i, 2); if (i >= std::get<0>(subRangeDim0) && i < std::get<1>(subRangeDim0)) { // is in subview EXPECT_NEAR(val0, 16.0, 1e-14 * 16.0) << "Data differs at index " << i; EXPECT_NEAR(val1, 8.0, 1e-14 * 8.0) << "Data differs at index " << i; EXPECT_NEAR(val2, 4.0, 1e-14 * 4.0) << "Data differs at index " << i; } else { // is outside of subview EXPECT_NEAR(val0, NumberType(0), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val1, NumberType(0), 1e-14) << "Data differs at index " << i; EXPECT_NEAR(val2, NumberType(0), 1e-14) << "Data differs at index " << i; } } } }; // Test ScatterView on subview template struct test_default_scatter_sub_view { public: using default_duplication = Kokkos::Impl::Experimental::DefaultDuplication< typename DeviceType::execution_space>; using Duplication = typename default_duplication::type; using Contribution = typename Kokkos::Impl::Experimental::DefaultContribution< typename DeviceType::execution_space, Duplication>::type; using scatter_view_def = typename test_scatter_view_impl_cls::scatter_view_type; using orig_view_def = typename test_scatter_view_impl_cls::orig_view_type; using size_type = typename Kokkos::HostSpace::size_type; void run_test(int n) { // Test creation via create_scatter_view overload 1 { orig_view_def original_view("original_view", n); auto rangeDim0 = std::pair(0 + 1, n - 1); auto rangeDim1 = std::pair(0, original_view.extent(1)); auto original_sub_view = Kokkos::subview(original_view, rangeDim0, rangeDim1); scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view(Op{}, original_sub_view); test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.run_parallel(original_sub_view.extent(0)); Kokkos::Experimental::contribute(original_sub_view, scatter_view); scatter_view.reset_except(original_sub_view); scatter_view_test_impl.run_parallel(original_sub_view.extent(0)); Kokkos::Experimental::contribute(original_sub_view, scatter_view); Kokkos::fence(); scatter_view_test_impl.validateResultsForSubview(original_view, rangeDim0, rangeDim1); } } }; template struct test_default_scatter_view { public: using default_duplication = Kokkos::Impl::Experimental::DefaultDuplication< typename DeviceType::execution_space>; using Duplication = typename default_duplication::type; using Contribution = typename Kokkos::Impl::Experimental::DefaultContribution< typename DeviceType::execution_space, Duplication>::type; using scatter_view_def = typename test_scatter_view_impl_cls::scatter_view_type; using orig_view_def = typename test_scatter_view_impl_cls::orig_view_type; void run_test(int n) { // Test creation via create_scatter_view overload 1 { orig_view_def original_view("original_view", n); scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view(Op{}, original_view); test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); scatter_view.reset_except(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); Kokkos::fence(); scatter_view_test_impl.validateResults(original_view); { scatter_view_def persistent_view("persistent", n); auto result_view = persistent_view.subview(); contribute(result_view, persistent_view); Kokkos::fence(); } } } }; template struct test_scatter_view_config { public: using scatter_view_def = typename test_scatter_view_impl_cls::scatter_view_type; using orig_view_def = typename test_scatter_view_impl_cls::orig_view_type; void compile_constructor() { auto sv = scatter_view_def(Kokkos::view_alloc(DeviceType{}, "label"), 10); } void run_test(int n) { // test allocation { orig_view_def ov1("ov1", n); scatter_view_def sv1; ASSERT_FALSE(sv1.is_allocated()); sv1 = Kokkos::Experimental::create_scatter_view(ov1); scatter_view_def sv2(sv1); scatter_view_def sv3("sv3", n); ASSERT_TRUE(sv1.is_allocated()); ASSERT_TRUE(sv2.is_allocated()); ASSERT_TRUE(sv3.is_allocated()); } // Test creation via create_scatter_view { orig_view_def original_view("original_view", n); scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view< Op, Duplication, Contribution>(original_view); test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); scatter_view.reset_except(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); Kokkos::fence(); scatter_view_test_impl.validateResults(original_view); { scatter_view_def persistent_view("persistent", n); auto result_view = persistent_view.subview(); contribute(result_view, persistent_view); Kokkos::fence(); } } // Test creation via create_scatter_view overload 2 { orig_view_def original_view("original_view", n); scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view( Op{}, Duplication{}, Contribution{}, original_view); test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); scatter_view.reset_except(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); Kokkos::fence(); scatter_view_test_impl.validateResults(original_view); { scatter_view_def persistent_view("persistent", n); auto result_view = persistent_view.subview(); contribute(result_view, persistent_view); Kokkos::fence(); } } // Test creation via constructor { orig_view_def original_view("original_view", n); scatter_view_def scatter_view(original_view); test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); scatter_view.reset_except(original_view); scatter_view_test_impl.run_parallel(n); Kokkos::Experimental::contribute(original_view, scatter_view); Kokkos::fence(); scatter_view_test_impl.validateResults(original_view); { scatter_view_def persistent_view("persistent", n); auto result_view = persistent_view.subview(); contribute(result_view, persistent_view); Kokkos::fence(); } } } }; template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int n) { // ScatterSum test test_scatter_view_config test_sv_right_config; test_sv_right_config.run_test(n); test_scatter_view_config< DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterNonAtomic, ScatterType, NumberType> test_sv_left_config; test_sv_left_config.run_test(n); } }; #ifdef KOKKOS_ENABLE_CUDA // disable duplicated instantiation with CUDA until // UniqueToken can support it template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) {} }; template struct TestDuplicatedScatterView< Kokkos::Device, ScatterType, NumberType> { TestDuplicatedScatterView(int) {} }; template struct TestDuplicatedScatterView< Kokkos::Device, ScatterType, NumberType> { TestDuplicatedScatterView(int) {} }; #endif template void test_scatter_view(int64_t n) { using execution_space = typename DeviceType::execution_space; // no atomics or duplication is only sensible if the execution space // is running essentially in serial (doesn't have to be Serial though, // we also test OpenMP with one thread: LAMMPS cares about that) if (execution_space().concurrency() == 1) { test_scatter_view_config test_sv_config; test_sv_config.run_test(n); } #ifdef KOKKOS_ENABLE_SERIAL if (!std::is_same::value) { #endif test_scatter_view_config test_sv_config; test_sv_config.run_test(n); #ifdef KOKKOS_ENABLE_SERIAL } #endif // with hundreds of threads we were running out of memory. // limit (n) so that duplication doesn't exceed 1GB constexpr std::size_t maximum_allowed_total_bytes = 1ull * 1024ull * 1024ull * 1024ull; std::size_t const maximum_allowed_copy_bytes = maximum_allowed_total_bytes / std::size_t(execution_space().concurrency()); constexpr std::size_t bytes_per_value = sizeof(NumberType) * 12; std::size_t const maximum_allowed_copy_values = maximum_allowed_copy_bytes / bytes_per_value; n = std::min(n, int64_t(maximum_allowed_copy_values)); // if the default is duplicated, this needs to follow the limit { test_default_scatter_view test_default_sv; test_default_sv.run_test(n); } // run same test but on a subview (this covers support for padded // ScatterViews) { test_default_scatter_sub_view test_default_scatter_view_subview; test_default_scatter_view_subview.run_test(n); } TestDuplicatedScatterView duptest(n); } TEST(TEST_CATEGORY, scatterview) { test_scatter_view( 10); test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); // tests were timing out in DEBUG mode, reduce the amount of work #ifdef KOKKOS_ENABLE_DEBUG int big_n = 100 * 1000; #else #if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_OPENMP) #if defined(KOKKOS_ENABLE_SERIAL) bool is_serial = std::is_same::value; #else bool is_serial = false; #endif #if defined(KOKKOS_ENABLE_OPENMP) bool is_openmp = std::is_same::value; #else bool is_openmp = false; #endif int big_n = is_serial || is_openmp ? 100 * 1000 : 10000 * 1000; #else int big_n = 10000 * 1000; #endif #endif test_scatter_view( big_n); test_scatter_view( big_n); test_scatter_view(big_n); test_scatter_view(big_n); test_scatter_view(big_n); } TEST(TEST_CATEGORY, scatterview_devicetype) { using device_type = Kokkos::Device; test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #ifdef KOKKOS_ENABLE_CUDA using device_execution_space = Kokkos::Cuda; using device_memory_space = Kokkos::CudaSpace; using host_accessible_space = Kokkos::CudaUVMSpace; #else using device_execution_space = Kokkos::HIP; using device_memory_space = Kokkos::HIPSpace; using host_accessible_space = Kokkos::HIPManagedSpace; #endif if (std::is_same::value) { using device_device_type = Kokkos::Device; test_scatter_view(10); test_scatter_view(10); test_scatter_view( 10); test_scatter_view(10); test_scatter_view(10); using host_device_type = Kokkos::Device; test_scatter_view(10); test_scatter_view( 10); test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); } #endif } } // namespace Test #endif // KOKKOS_TEST_SCATTER_VIEW_HPP