//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_BITSET_HPP #define KOKKOS_BITSET_HPP #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET #endif #include #include #include namespace Kokkos { template class Bitset; template class ConstBitset; template void deep_copy(Bitset& dst, Bitset const& src); template void deep_copy(Bitset& dst, ConstBitset const& src); template void deep_copy(ConstBitset& dst, ConstBitset const& src); /// A thread safe view to a bitset template class Bitset { public: using execution_space = typename Device::execution_space; using size_type = unsigned int; static constexpr unsigned BIT_SCAN_REVERSE = 1u; static constexpr unsigned MOVE_HINT_BACKWARD = 2u; static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u; static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE; static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD; static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD; private: enum : unsigned { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; enum : unsigned { block_mask = block_size - 1u }; enum : unsigned { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; //! Type of @ref m_blocks. using block_view_type = View>; public: Bitset() = default; /// arg_size := number of bit in set Bitset(unsigned arg_size) : Bitset(Kokkos::view_alloc(), arg_size) {} template Bitset(const Impl::ViewCtorProp& arg_prop, unsigned arg_size) : m_size(arg_size), m_last_block_mask(0u) { //! Ensure that allocation properties are consistent. using alloc_prop_t = std::decay_t; static_assert(alloc_prop_t::initialize, "Allocation property 'initialize' should be true."); static_assert( !alloc_prop_t::has_pointer, "Allocation properties should not contain the 'pointer' property."); //! Update 'label' property and allocate. const auto prop_copy = Impl::with_properties_if_unset(arg_prop, std::string("Bitset")); m_blocks = block_view_type(prop_copy, ((m_size + block_mask) >> block_shift)); for (int i = 0, end = static_cast(m_size & block_mask); i < end; ++i) { m_last_block_mask |= 1u << i; } } KOKKOS_DEFAULTED_FUNCTION Bitset(const Bitset&) = default; KOKKOS_DEFAULTED_FUNCTION Bitset& operator=(const Bitset&) = default; KOKKOS_DEFAULTED_FUNCTION Bitset(Bitset&&) = default; KOKKOS_DEFAULTED_FUNCTION Bitset& operator=(Bitset&&) = default; KOKKOS_DEFAULTED_FUNCTION ~Bitset() = default; /// number of bits in the set /// can be call from the host or the device KOKKOS_FORCEINLINE_FUNCTION unsigned size() const { return m_size; } /// number of bits which are set to 1 /// can only be called from the host unsigned count() const { Impl::BitsetCount> f(*this); return f.apply(); } /// set all bits to 1 /// can only be called from the host void set() { Kokkos::deep_copy(m_blocks, ~0u); if (m_last_block_mask) { // clear the unused bits in the last block Kokkos::Impl::DeepCopy( m_blocks.data() + (m_blocks.extent(0) - 1u), &m_last_block_mask, sizeof(unsigned)); Kokkos::fence( "Bitset::set: fence after clearing unused bits copying from " "HostSpace"); } } /// set all bits to 0 /// can only be called from the host void reset() { Kokkos::deep_copy(m_blocks, 0u); } /// set all bits to 0 /// can only be called from the host void clear() { Kokkos::deep_copy(m_blocks, 0u); } /// set i'th bit to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION bool set(unsigned i) const { if (i < m_size) { unsigned* block_ptr = &m_blocks[i >> block_shift]; const unsigned mask = 1u << static_cast(i & block_mask); return !(atomic_fetch_or(block_ptr, mask) & mask); } return false; } /// set i'th bit to 0 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION bool reset(unsigned i) const { if (i < m_size) { unsigned* block_ptr = &m_blocks[i >> block_shift]; const unsigned mask = 1u << static_cast(i & block_mask); return atomic_fetch_and(block_ptr, ~mask) & mask; } return false; } /// return true if the i'th bit set to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const { if (i < m_size) { #ifdef KOKKOS_ENABLE_SYCL const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]); #else const unsigned block = volatile_load(&m_blocks[i >> block_shift]); #endif const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; } /// used with find_any_set_near or find_any_unset_near functions /// returns the max number of times those functions should be call /// when searching for an available bit KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const { return m_blocks.extent(0); } /// find a bit set to 1 near the hint /// returns a pair< bool, unsigned> where if result.first is true then /// result.second is the bit found and if result.first is false the /// result.second is a new hint KOKKOS_INLINE_FUNCTION Kokkos::pair find_any_set_near( unsigned hint, unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; const unsigned offset = hint & block_mask; #ifdef KOKKOS_ENABLE_SYCL unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]); #else unsigned block = volatile_load(&m_blocks[block_idx]); #endif block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) ? block : block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } /// find a bit set to 0 near the hint /// returns a pair< bool, unsigned> where if result.first is true then /// result.second is the bit found and if result.first is false the /// result.second is a new hint KOKKOS_INLINE_FUNCTION Kokkos::pair find_any_unset_near( unsigned hint, unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { const unsigned block_idx = hint >> block_shift; const unsigned offset = hint & block_mask; #ifdef KOKKOS_ENABLE_SYCL unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]); #else unsigned block = volatile_load(&m_blocks[block_idx]); #endif block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) ? ~block : ~block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const { return m_blocks.is_allocated(); } private: KOKKOS_FORCEINLINE_FUNCTION Kokkos::pair find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const { Kokkos::pair result(block > 0u, 0); if (!result.first) { result.second = update_hint(block_idx, offset, scan_direction); } else { result.second = scan_block((block_idx << block_shift), offset, block, scan_direction); } return result; } KOKKOS_FORCEINLINE_FUNCTION unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction) const { offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; block = Impl::rotate_right(block, offset); return (((!(scan_direction & BIT_SCAN_REVERSE) ? Impl::bit_scan_forward(block) : Impl::int_log2(block)) + offset) & block_mask) + block_start; } KOKKOS_FORCEINLINE_FUNCTION unsigned update_hint(long long block_idx, unsigned offset, unsigned scan_direction) const { block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1; block_idx = block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; return static_cast(block_idx) * block_size + offset; } private: unsigned m_size = 0; unsigned m_last_block_mask = 0; block_view_type m_blocks; private: template friend class Bitset; template friend class ConstBitset; template friend struct Impl::BitsetCount; template friend void deep_copy(Bitset& dst, Bitset const& src); template friend void deep_copy(Bitset& dst, ConstBitset const& src); }; /// a thread-safe view to a const bitset /// i.e. can only test bits template class ConstBitset { public: using execution_space = typename Device::execution_space; using size_type = unsigned int; using block_view_type = typename Bitset::block_view_type::const_type; private: enum { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; enum { block_mask = block_size - 1u }; enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; public: KOKKOS_FUNCTION ConstBitset() : m_size(0) {} KOKKOS_FUNCTION ConstBitset(Bitset const& rhs) : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} KOKKOS_FUNCTION ConstBitset(ConstBitset const& rhs) : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} KOKKOS_FUNCTION ConstBitset& operator=(Bitset const& rhs) { this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } KOKKOS_FUNCTION ConstBitset& operator=(ConstBitset const& rhs) { this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } KOKKOS_FORCEINLINE_FUNCTION unsigned size() const { return m_size; } unsigned count() const { Impl::BitsetCount> f(*this); return f.apply(); } KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const { if (i < m_size) { const unsigned block = m_blocks[i >> block_shift]; const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; } private: unsigned m_size; block_view_type m_blocks; private: template friend class ConstBitset; template friend struct Impl::BitsetCount; template friend void deep_copy(Bitset& dst, ConstBitset const& src); template friend void deep_copy(ConstBitset& dst, ConstBitset const& src); }; template void deep_copy(Bitset& dst, Bitset const& src) { if (dst.size() != src.size()) { Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } Kokkos::fence("Bitset::deep_copy: fence before copy operation"); Kokkos::Impl::DeepCopy( dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned) * src.m_blocks.extent(0)); Kokkos::fence("Bitset::deep_copy: fence after copy operation"); } template void deep_copy(Bitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } Kokkos::fence("Bitset::deep_copy: fence before copy operation"); Kokkos::Impl::DeepCopy( dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned) * src.m_blocks.extent(0)); Kokkos::fence("Bitset::deep_copy: fence after copy operation"); } template void deep_copy(ConstBitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { Kokkos::Impl::throw_runtime_exception( "Error: Cannot deep_copy bitsets of different sizes!"); } Kokkos::fence("Bitset::deep_copy: fence before copy operation"); Kokkos::Impl::DeepCopy( dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned) * src.m_blocks.extent(0)); Kokkos::fence("Bitset::deep_copy: fence after copy operation"); } } // namespace Kokkos #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET #endif #endif // KOKKOS_BITSET_HPP