//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include #include #include #include /*--------------------------------------------------------------------------*/ namespace Test { struct SharedAllocDestroy { volatile int* count; SharedAllocDestroy() = default; SharedAllocDestroy(int* arg) : count(arg) {} void destroy_shared_allocation() { Kokkos::atomic_increment(count); } }; template void test_shared_alloc() { using Header = const Kokkos::Impl::SharedAllocationHeader; using Tracker = Kokkos::Impl::SharedAllocationTracker; using RecordBase = Kokkos::Impl::SharedAllocationRecord; using RecordMemS = Kokkos::Impl::SharedAllocationRecord; using RecordFull = Kokkos::Impl::SharedAllocationRecord; static_assert(sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!"); MemorySpace s; const size_t N = 1200; const size_t size = 8; RecordMemS* rarray[N]; Header* harray[N]; RecordMemS** const r = rarray; Header** const h = harray; Kokkos::RangePolicy range(0, N); { // Since always executed on host space, leave [=] Kokkos::parallel_for(range, [=](int i) { char name[64]; snprintf(name, 64, "test_%.2d", i); r[i] = RecordMemS::allocate(s, name, size * (i + 1)); h[i] = Header::get_header(r[i]->data()); ASSERT_EQ(r[i]->use_count(), 0); for (int j = 0; j < (i / 10) + 1; ++j) RecordBase::increment(r[i]); ASSERT_EQ(r[i]->use_count(), (i / 10) + 1); ASSERT_EQ(r[i], RecordMemS::get_record(r[i]->data())); }); Kokkos::fence(); #ifdef KOKKOS_ENABLE_DEBUG // Sanity check for the whole set of allocation records to which this record // belongs. RecordBase::is_sane(r[0]); // RecordMemS::print_records( std::cout, s, true ); #endif // This must be a plain for-loop since deallocation (which can be triggered // by RecordBase::decrement) fences all execution space instances. If this // is a parallel_for, the test can hang with the parallel_for blocking // waiting for itself to complete. for (size_t i = range.begin(); i < range.end(); ++i) { while (nullptr != (r[i] = static_cast(RecordBase::decrement(r[i])))) { #ifdef KOKKOS_ENABLE_DEBUG if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]); #endif } } Kokkos::fence(); } { int destroy_count = 0; SharedAllocDestroy counter(&destroy_count); Kokkos::parallel_for(range, [=](size_t i) { char name[64]; snprintf(name, 64, "test_%.2d", int(i)); RecordFull* rec = RecordFull::allocate(s, name, size * (i + 1)); rec->m_destroy = counter; r[i] = rec; h[i] = Header::get_header(r[i]->data()); ASSERT_EQ(r[i]->use_count(), 0); for (size_t j = 0; j < (i / 10) + 1; ++j) RecordBase::increment(r[i]); ASSERT_EQ(r[i]->use_count(), int((i / 10) + 1)); ASSERT_EQ(r[i], RecordMemS::get_record(r[i]->data())); }); Kokkos::fence(); #ifdef KOKKOS_ENABLE_DEBUG RecordBase::is_sane(r[0]); #endif // This must be a plain for-loop since deallocation (which can be triggered // by RecordBase::decrement) fences all execution space instances. If this // is a parallel_for, the test can hang with the parallel_for blocking // waiting for itself to complete. for (size_t i = range.begin(); i < range.end(); ++i) { while (nullptr != (r[i] = static_cast(RecordBase::decrement(r[i])))) { #ifdef KOKKOS_ENABLE_DEBUG if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]); #endif } } Kokkos::fence(); ASSERT_EQ(destroy_count, int(N)); } { int destroy_count = 0; { RecordFull* rec = RecordFull::allocate(s, "test", size); // ... Construction of the allocated { rec->data(), rec->size() } // Copy destruction function object into the allocation record. rec->m_destroy = SharedAllocDestroy(&destroy_count); ASSERT_EQ(rec->use_count(), 0); // Start tracking, increments the use count from 0 to 1. Tracker track; track.assign_allocated_record_to_uninitialized(rec); ASSERT_EQ(rec->use_count(), 1); ASSERT_EQ(track.use_count(), 1); // Verify construction / destruction increment. for (size_t i = 0; i < N; ++i) { ASSERT_EQ(rec->use_count(), 1); { Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized(rec); ASSERT_EQ(rec->use_count(), 2); ASSERT_EQ(local_tracker.use_count(), 2); } ASSERT_EQ(rec->use_count(), 1); ASSERT_EQ(track.use_count(), 1); } Kokkos::parallel_for(range, [=](size_t) { Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized(rec); ASSERT_GT(rec->use_count(), 1); }); Kokkos::fence(); ASSERT_EQ(rec->use_count(), 1); ASSERT_EQ(track.use_count(), 1); // Destruction of 'track' object deallocates the 'rec' and invokes the // destroy function object. } ASSERT_EQ(destroy_count, 1); } } TEST(TEST_CATEGORY, impl_shared_alloc) { #ifdef TEST_CATEGORY_NUMBER #if (TEST_CATEGORY_NUMBER < 4) // serial threads openmp hpx test_shared_alloc(); #elif (TEST_CATEGORY_NUMBER == 4) // openmptarget test_shared_alloc(); #elif (TEST_CATEGORY_NUMBER == 5) // cuda test_shared_alloc(); #elif (TEST_CATEGORY_NUMBER == 6) // hip test_shared_alloc(); #elif (TEST_CATEGORY_NUMBER == 7) // sycl test_shared_alloc(); #elif (TEST_CATEGORY_NUMBER == 8) // openacc test_shared_alloc(); #endif #else test_shared_alloc(); #endif } } // namespace Test