//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include #include #include #include namespace Test { template void impl_test_local_deepcopy_teampolicy_rank_1(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_2(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_3(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_4(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_5(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_6(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview(A, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview(B, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_teampolicy_rank_7(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. Kokkos::deep_copy(A, 10.0); using team_policy = Kokkos::TeamPolicy; using member_type = typename Kokkos::TeamPolicy::member_type; // Deep Copy Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subSrc = Kokkos::subview( A, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview( B, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( team_policy(N, Kokkos::AUTO), KOKKOS_LAMBDA(const member_type& teamMember) { int lid = teamMember.league_rank(); // returns a number between 0 and N auto subDst = Kokkos::subview( B, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(teamMember, subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_1(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_2(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_3(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_4(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_5(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_6(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::deep_copy(subA, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview(A, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- template void impl_test_local_deepcopy_rangepolicy_rank_7(const int N) { // Allocate matrices on device. ViewType A("A", N, N, N, N, N, N, N, N); ViewType B("B", N, N, N, N, N, N, N, N); // Create host mirrors of device views. typename ViewType::HostMirror h_A = Kokkos::create_mirror_view(A); typename ViewType::HostMirror h_B = Kokkos::create_mirror_view(B); // Initialize A matrix. Kokkos::deep_copy(A, 10.0); // Deep Copy Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subSrc = Kokkos::subview( A, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); auto subDst = Kokkos::subview( B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, subSrc); }); Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_B, B); bool test = true; for (size_t i = 0; i < A.span(); i++) { if (h_A.data()[i] != h_B.data()[i]) { test = false; break; } } ASSERT_EQ(test, true); // Fill Kokkos::deep_copy(B, 0.0); Kokkos::parallel_for( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int& i) { auto subDst = Kokkos::subview( B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(subDst, 20.0); }); Kokkos::deep_copy(h_B, B); double sum_all = 0.0; for (size_t i = 0; i < B.span(); i++) { sum_all += h_B.data()[i]; } ASSERT_EQ(sum_all, 20.0 * N * N * N * N * N * N * N * N); } //------------------------------------------------------------------------------------------------------------- #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; #if defined(KOKKOS_ENABLE_CUDA) && \ defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 if (std::is_same_v) GTEST_SKIP() << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; #endif using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_teampolicy_rank_1(8); } { // Rank-2 impl_test_local_deepcopy_teampolicy_rank_2(8); } { // Rank-3 impl_test_local_deepcopy_teampolicy_rank_3(8); } { // Rank-4 impl_test_local_deepcopy_teampolicy_rank_4(8); } { // Rank-5 impl_test_local_deepcopy_teampolicy_rank_5(8); } { // Rank-6 impl_test_local_deepcopy_teampolicy_rank_6(8); } { // Rank-7 impl_test_local_deepcopy_teampolicy_rank_7(8); } } //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; #if defined(KOKKOS_ENABLE_CUDA) && \ defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 if (std::is_same_v) GTEST_SKIP() << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; #endif using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_rangepolicy_rank_1(8); } { // Rank-2 impl_test_local_deepcopy_rangepolicy_rank_2(8); } { // Rank-3 impl_test_local_deepcopy_rangepolicy_rank_3(8); } { // Rank-4 impl_test_local_deepcopy_rangepolicy_rank_4(8); } { // Rank-5 impl_test_local_deepcopy_rangepolicy_rank_5(8); } { // Rank-6 impl_test_local_deepcopy_rangepolicy_rank_6(8); } { // Rank-7 impl_test_local_deepcopy_rangepolicy_rank_7(8); } } //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; #if defined(KOKKOS_ENABLE_CUDA) && \ defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 if (std::is_same_v) GTEST_SKIP() << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; #endif using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_teampolicy_rank_1(8); } { // Rank-2 impl_test_local_deepcopy_teampolicy_rank_2(8); } { // Rank-3 impl_test_local_deepcopy_teampolicy_rank_3(8); } { // Rank-4 impl_test_local_deepcopy_teampolicy_rank_4(8); } { // Rank-5 impl_test_local_deepcopy_teampolicy_rank_5(8); } { // Rank-6 impl_test_local_deepcopy_teampolicy_rank_6(8); } { // Rank-7 impl_test_local_deepcopy_teampolicy_rank_7(8); } } //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; #if defined(KOKKOS_ENABLE_CUDA) && \ defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 if (std::is_same_v) GTEST_SKIP() << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; #endif using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_rangepolicy_rank_1(8); } { // Rank-2 impl_test_local_deepcopy_rangepolicy_rank_2(8); } { // Rank-3 impl_test_local_deepcopy_rangepolicy_rank_3(8); } { // Rank-4 impl_test_local_deepcopy_rangepolicy_rank_4(8); } { // Rank-5 impl_test_local_deepcopy_rangepolicy_rank_5(8); } { // Rank-6 impl_test_local_deepcopy_rangepolicy_rank_6(8); } { // Rank-7 impl_test_local_deepcopy_rangepolicy_rank_7(8); } } #endif namespace Impl { template using ShMemView = Kokkos::View; struct DeepCopyScratchFunctor { DeepCopyScratchFunctor( Kokkos::View check_view_1, Kokkos::View check_view_2) : check_view_1_(check_view_1), check_view_2_(check_view_2), N_(check_view_1.extent(0)) {} KOKKOS_INLINE_FUNCTION void operator()( Kokkos::TeamPolicy>::member_type team) const { using ShmemType = TEST_EXECSPACE::scratch_memory_space; auto shview = Impl::ShMemView(team.team_scratch(1), N_, 1); Kokkos::parallel_for( Kokkos::TeamThreadRange(team, N_), KOKKOS_LAMBDA(const size_t& index) { auto thread_shview = Kokkos::subview(shview, index, Kokkos::ALL()); Kokkos::Experimental::local_deep_copy(thread_shview, index); }); Kokkos::Experimental::local_deep_copy( team, check_view_1_, Kokkos::subview(shview, Kokkos::ALL(), 0)); Kokkos::Experimental::local_deep_copy(team, shview, 6.); Kokkos::Experimental::local_deep_copy( team, check_view_2_, Kokkos::subview(shview, Kokkos::ALL(), 0)); } Kokkos::View check_view_1_; Kokkos::View check_view_2_; int const N_; }; } // namespace Impl TEST(TEST_CATEGORY, deep_copy_scratch) { using TestDeviceTeamPolicy = Kokkos::TeamPolicy; const int N = 8; const int bytes_per_team = Impl::ShMemView::shmem_size(N, 1); TestDeviceTeamPolicy policy(1, Kokkos::AUTO); auto team_exec = policy.set_scratch_size(1, Kokkos::PerTeam(bytes_per_team)); Kokkos::View check_view_1("check_1", N); Kokkos::View check_view_2("check_2", N); Kokkos::parallel_for( team_exec, Impl::DeepCopyScratchFunctor{check_view_1, check_view_2}); auto host_copy_1 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), check_view_1); auto host_copy_2 = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), check_view_2); for (unsigned int i = 0; i < N; ++i) { ASSERT_EQ(host_copy_1(i), i); ASSERT_EQ(host_copy_2(i), 6.0); } } } // namespace Test