//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include namespace { template class TestUniqueToken { public: using execution_space = typename Space::execution_space; using view_type = Kokkos::View; Kokkos::Experimental::UniqueToken tokens; view_type verify; view_type counts; view_type errors; struct count_test_start_tag {}; struct count_test_check_tag {}; KOKKOS_INLINE_FUNCTION void operator()(long) const { Kokkos::Experimental::AcquireUniqueToken token_val( tokens); const int32_t t = token_val.value(); bool ok = true; ok = ok && 0 <= t; ok = ok && t < tokens.size(); ok = ok && 0 == Kokkos::atomic_fetch_add(&verify(t), 1); Kokkos::atomic_fetch_add(&counts(t), 1); ok = ok && 1 == Kokkos::atomic_fetch_add(&verify(t), -1); if (!ok) { Kokkos::atomic_fetch_add(&errors(0), 1); } } KOKKOS_INLINE_FUNCTION void operator()(count_test_start_tag, long) const { constexpr int R = 10; int id = tokens.acquire(); for (int j = 0; j < R; j++) counts(id)++; tokens.release(id); } KOKKOS_INLINE_FUNCTION void operator()(count_test_check_tag, long i, int64_t& lsum) const { lsum += counts(i); } TestUniqueToken() : tokens(execution_space()), verify("TestUniqueTokenVerify", tokens.size()), counts("TestUniqueTokenCounts", tokens.size()), errors("TestUniqueTokenErrors", 1) {} static void run() { using policy = Kokkos::RangePolicy; TestUniqueToken self; { const int duplicate = 100; const long n = duplicate * self.tokens.size(); Kokkos::parallel_for(policy(0, n), self); Kokkos::parallel_for(policy(0, n), self); Kokkos::parallel_for(policy(0, n), self); Kokkos::fence(); } typename view_type::HostMirror host_counts = Kokkos::create_mirror_view(self.counts); Kokkos::deep_copy(host_counts, self.counts); int32_t max = 0; { const long n = host_counts.extent(0); for (long i = 0; i < n; ++i) { if (max < host_counts[i]) max = host_counts[i]; } } // FIXME_SYCL wrong result on NVIDIA GPUs but correct on host and Intel GPUs #ifndef KOKKOS_ENABLE_SYCL // Count test for pull request #3260 { constexpr int N = 1000000; constexpr int R = 10; int num = self.tokens.size(); Kokkos::resize(self.counts, num); Kokkos::deep_copy(self.counts, 0); Kokkos::parallel_for( "Start", Kokkos::RangePolicy(0, N), self); int64_t sum = 0; Kokkos::parallel_reduce( "Check", Kokkos::RangePolicy(0, num), self, sum); ASSERT_EQ(sum, int64_t(N) * R); } #endif typename view_type::HostMirror host_errors = Kokkos::create_mirror_view(self.errors); Kokkos::deep_copy(host_errors, self.errors); ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max; } }; TEST(TEST_CATEGORY, unique_token_global) { TestUniqueToken::run(); } TEST(TEST_CATEGORY, unique_token_instance) { TestUniqueToken::run(); } template class TestAcquireTeamUniqueToken { public: using execution_space = typename Space::execution_space; using view_type = Kokkos::View; using scratch_view = Kokkos::View; using team_policy_type = Kokkos::TeamPolicy; using team_member_type = typename team_policy_type::member_type; using tokens_type = Kokkos::Experimental::UniqueToken; tokens_type tokens; view_type verify; view_type counts; view_type errors; KOKKOS_INLINE_FUNCTION void operator()(team_member_type team) const { Kokkos::Experimental::AcquireTeamUniqueToken token_val( tokens, team); scratch_view team_rank_0_token_val(team.team_scratch(0)); const int32_t t = token_val.value(); bool ok = true; ok = ok && 0 <= t; ok = ok && t < tokens.size(); Kokkos::single(Kokkos::PerTeam(team), [&]() { ok = ok && 0 == Kokkos::atomic_fetch_add(&verify(t), 1); Kokkos::atomic_fetch_add(&counts(t), 1); ok = ok && 1 == Kokkos::atomic_fetch_add(&verify(t), -1); }); if (team.team_rank() == 0) { team_rank_0_token_val() = t; } team.team_barrier(); ok = ok && team_rank_0_token_val() == t; if (!ok) { Kokkos::atomic_fetch_add(&errors(0), 1); } } TestAcquireTeamUniqueToken(int team_size) : tokens(execution_space().concurrency() / team_size, execution_space()), verify("TestAcquireTeamUniqueTokenVerify", tokens.size()), counts("TestAcquireTeamUniqueTokenCounts", tokens.size()), errors("TestAcquireTeamUniqueTokenErrors", 1) {} static void run() { const int max_team_size = team_policy_type(1, 1).team_size_max( TestAcquireTeamUniqueToken(1), Kokkos::ParallelForTag()); const int team_size = std::min(2, max_team_size); TestAcquireTeamUniqueToken self(team_size); { const int duplicate = 100; // FIXME_SYCL The number of workgroups on CUDA devices can not be larger // than 65535 #ifdef KOKKOS_ENABLE_SYCL const long n = std::min(65535, duplicate * self.tokens.size()); #else const long n = duplicate * self.tokens.size(); #endif team_policy_type team_policy(n, team_size); team_policy.set_scratch_size( 0, Kokkos::PerTeam(Kokkos::Experimental::AcquireTeamUniqueToken< team_policy_type>::shmem_size() + scratch_view::shmem_size())); Kokkos::parallel_for(team_policy, self); Kokkos::fence(); } typename view_type::HostMirror host_counts = Kokkos::create_mirror_view(self.counts); Kokkos::deep_copy(host_counts, self.counts); int32_t max = 0; { const long n = host_counts.extent(0); for (long i = 0; i < n; ++i) { if (max < host_counts[i]) max = host_counts[i]; } } typename view_type::HostMirror host_errors = Kokkos::create_mirror_view(self.errors); Kokkos::deep_copy(host_errors, self.errors); ASSERT_EQ(host_errors(0), 0) << "max reuse was " << max; } }; TEST(TEST_CATEGORY, unique_token_team_acquire) { #ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET if constexpr (std::is_same::value) { GTEST_SKIP() << "skipping because OpenMPTarget does not implement yet a " "specialization of AcquireTeamUniqueToken"; } else #endif TestAcquireTeamUniqueToken::run(); } } // namespace