//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_BIT_MANIPULATION_HPP #define KOKKOS_BIT_MANIPULATION_HPP #include #include #include // CHAR_BIT #include //memcpy #include namespace Kokkos::Impl { template KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) { if constexpr (sizeof(T) > 1) { using U = std::make_unsigned_t; size_t shift = CHAR_BIT * (sizeof(T) - 1); U lo_mask = static_cast(~0); U hi_mask = lo_mask << shift; U val = x; for (size_t i = 0; i < sizeof(T) / 2; ++i) { U lo_val = val & lo_mask; U hi_val = val & hi_mask; val = (val & ~lo_mask) | (hi_val >> shift); val = (val & ~hi_mask) | (lo_val << shift); lo_mask <<= CHAR_BIT; hi_mask >>= CHAR_BIT; shift -= 2 * CHAR_BIT; } return val; } // sizeof(T) == 1 return x; } template KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) { // From Hacker's Delight (2nd edition) section 5-3 unsigned int y = 0; using ::Kokkos::Experimental::digits_v; int n = digits_v; int c = digits_v / 2; do { y = x >> c; if (y != 0) { n -= c; x = y; } c >>= 1; } while (c != 0); return n - static_cast(x); } template KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) { using ::Kokkos::Experimental::digits_v; return digits_v - countl_zero_fallback(static_cast( static_cast(~x) & static_cast(x - 1))); } template KOKKOS_FUNCTION constexpr int popcount_fallback(T x) { int c = 0; for (; x != 0; x &= x - 1) { ++c; } return c; } template inline constexpr bool is_standard_unsigned_integer_type_v = std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v; } // namespace Kokkos::Impl namespace Kokkos { // #if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \ __INTEL_LLVM_COMPILER < 20240000 using sycl::detail::bit_cast; #else template KOKKOS_FUNCTION std::enable_if_t && std::is_trivially_copyable_v, To> bit_cast(From const& from) noexcept { #if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \ __INTEL_LLVM_COMPILER >= 20240000 return sycl::bit_cast(from); #else To to; memcpy(static_cast(&to), static_cast(&from), sizeof(To)); return to; #endif } #endif // // template KOKKOS_FUNCTION constexpr std::enable_if_t, T> byteswap( T value) noexcept { return Impl::byteswap_fallback(value); } // // template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, int> countl_zero(T x) noexcept { using ::Kokkos::Experimental::digits_v; if (x == 0) return digits_v; // TODO use compiler intrinsics when available return Impl::countl_zero_fallback(x); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, int> countl_one(T x) noexcept { using ::Kokkos::Experimental::digits_v; using ::Kokkos::Experimental::finite_max_v; if (x == finite_max_v) return digits_v; return countl_zero(static_cast(~x)); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, int> countr_zero(T x) noexcept { using ::Kokkos::Experimental::digits_v; if (x == 0) return digits_v; // TODO use compiler intrinsics when available return Impl::countr_zero_fallback(x); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, int> countr_one(T x) noexcept { using ::Kokkos::Experimental::digits_v; using ::Kokkos::Experimental::finite_max_v; if (x == finite_max_v) return digits_v; return countr_zero(static_cast(~x)); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, int> popcount(T x) noexcept { if (x == 0) return 0; // TODO use compiler intrinsics when available return Impl::popcount_fallback(x); } // // template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, bool> has_single_bit(T x) noexcept { return x != 0 && (((x & (x - 1)) == 0)); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, T> bit_ceil(T x) noexcept { if (x <= 1) return 1; using ::Kokkos::Experimental::digits_v; return T{1} << (digits_v - countl_zero(static_cast(x - 1))); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, T> bit_floor(T x) noexcept { if (x == 0) return 0; using ::Kokkos::Experimental::digits_v; return T{1} << (digits_v - 1 - countl_zero(x)); } template KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, T> bit_width(T x) noexcept { if (x == 0) return 0; using ::Kokkos::Experimental::digits_v; return digits_v - countl_zero(x); } // // template [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, T> rotl(T x, int s) noexcept { using Experimental::digits_v; constexpr auto dig = digits_v; int const rem = s % dig; if (rem == 0) return x; if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig)); return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem) } template [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t< Impl::is_standard_unsigned_integer_type_v, T> rotr(T x, int s) noexcept { using Experimental::digits_v; constexpr auto dig = digits_v; int const rem = s % dig; if (rem == 0) return x; if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig)); return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem) } // } // namespace Kokkos namespace Kokkos::Impl { #if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \ defined(KOKKOS_COMPILER_GNU) #define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS #endif template KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept { return byteswap_fallback(x); } template KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept { #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS if constexpr (sizeof(T) == 1) { return x; } else if constexpr (sizeof(T) == 2) { return __builtin_bswap16(x); } else if constexpr (sizeof(T) == 4) { return __builtin_bswap32(x); } else if constexpr (sizeof(T) == 8) { return __builtin_bswap64(x); } else if constexpr (sizeof(T) == 16) { #if defined(__has_builtin) #if __has_builtin(__builtin_bswap128) return __builtin_bswap128(x); #endif #endif return (__builtin_bswap64(x >> 64) | (static_cast(__builtin_bswap64(x)) << 64)); } #endif return byteswap_fallback(x); } template KOKKOS_IMPL_DEVICE_FUNCTION std::enable_if_t, int> countl_zero_builtin_device(T x) noexcept { #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) if constexpr (sizeof(T) == sizeof(long long int)) return __clzll(reinterpret_cast(x)); if constexpr (sizeof(T) == sizeof(int)) return __clz(reinterpret_cast(x)); using ::Kokkos::Experimental::digits_v; constexpr int shift = digits_v - digits_v; return __clz(x) - shift; #elif defined(KOKKOS_ENABLE_SYCL) return sycl::clz(x); #else return countl_zero_fallback(x); #endif } template KOKKOS_IMPL_HOST_FUNCTION std::enable_if_t, int> countl_zero_builtin_host(T x) noexcept { using ::Kokkos::Experimental::digits_v; if (x == 0) return digits_v; #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS if constexpr (std::is_same_v) { return __builtin_clzll(x); } else if constexpr (std::is_same_v) { return __builtin_clzl(x); } else if constexpr (std::is_same_v) { return __builtin_clz(x); } else { constexpr int shift = digits_v - digits_v; return __builtin_clz(x) - shift; } #else return countl_zero_fallback(x); #endif } template KOKKOS_IMPL_DEVICE_FUNCTION std::enable_if_t, int> countr_zero_builtin_device(T x) noexcept { using ::Kokkos::Experimental::digits_v; if (x == 0) return digits_v; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) if constexpr (sizeof(T) == sizeof(long long int)) return __ffsll(reinterpret_cast(x)) - 1; return __ffs(reinterpret_cast(x)) - 1; #elif defined(KOKKOS_ENABLE_SYCL) return sycl::ctz(x); #else return countr_zero_fallback(x); #endif } template KOKKOS_IMPL_HOST_FUNCTION std::enable_if_t, int> countr_zero_builtin_host(T x) noexcept { using ::Kokkos::Experimental::digits_v; if (x == 0) return digits_v; #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS if constexpr (std::is_same_v) { return __builtin_ctzll(x); } else if constexpr (std::is_same_v) { return __builtin_ctzl(x); } else { return __builtin_ctz(x); } #else return countr_zero_fallback(x); #endif } template KOKKOS_IMPL_DEVICE_FUNCTION std::enable_if_t, int> popcount_builtin_device(T x) noexcept { #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) if constexpr (sizeof(T) == sizeof(long long int)) return __popcll(x); return __popc(x); #elif defined(KOKKOS_ENABLE_SYCL) return sycl::popcount(x); #else return popcount_fallback(x); #endif } template KOKKOS_IMPL_HOST_FUNCTION std::enable_if_t, int> popcount_builtin_host(T x) noexcept { #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS if constexpr (std::is_same_v) { return __builtin_popcountll(x); } else if constexpr (std::is_same_v) { return __builtin_popcountl(x); } else { return __builtin_popcount(x); } #else return popcount_fallback(x); #endif } #undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS } // namespace Kokkos::Impl namespace Kokkos::Experimental { template KOKKOS_FUNCTION std::enable_if_t && std::is_trivially_copyable_v, To> bit_cast_builtin(From const& from) noexcept { // qualify the call to avoid ADL return Kokkos::bit_cast(from); // no benefit to call the _builtin variant } template KOKKOS_FUNCTION std::enable_if_t, T> byteswap_builtin( T x) noexcept { KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);)) KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);)) } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, int> countl_zero_builtin(T x) noexcept { KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);)) KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);)) } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, int> countl_one_builtin(T x) noexcept { if (x == finite_max_v) return digits_v; return countl_zero_builtin(static_cast(~x)); } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, int> countr_zero_builtin(T x) noexcept { KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);)) KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);)) } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, int> countr_one_builtin(T x) noexcept { if (x == finite_max_v) return digits_v; return countr_zero_builtin(static_cast(~x)); } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, int> popcount_builtin(T x) noexcept { KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);)) KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);)) } template KOKKOS_FUNCTION std::enable_if_t< ::Kokkos::Impl::is_standard_unsigned_integer_type_v, bool> has_single_bit_builtin(T x) noexcept { return has_single_bit(x); // no benefit to call the _builtin variant } template KOKKOS_FUNCTION std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v, T> bit_ceil_builtin(T x) noexcept { if (x <= 1) return 1; return T{1} << (digits_v - countl_zero_builtin(static_cast(x - 1))); } template KOKKOS_FUNCTION std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v, T> bit_floor_builtin(T x) noexcept { if (x == 0) return 0; return T{1} << (digits_v - 1 - countl_zero_builtin(x)); } template KOKKOS_FUNCTION std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v, T> bit_width_builtin(T x) noexcept { if (x == 0) return 0; return digits_v - countl_zero_builtin(x); } template [[nodiscard]] KOKKOS_FUNCTION std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v, T> rotl_builtin(T x, int s) noexcept { return rotl(x, s); // no benefit to call the _builtin variant } template [[nodiscard]] KOKKOS_FUNCTION std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v, T> rotr_builtin(T x, int s) noexcept { return rotr(x, s); // no benefit to call the _builtin variant } } // namespace Kokkos::Experimental #endif