//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #include static_assert(false, "Including non-public Kokkos header files is not allowed."); #endif #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP #include #include #include #include #include #include #include namespace Kokkos { // ------------------------------------------------------------------ // // Moved to Kokkos_Layout.hpp for more general accessibility /* enum class Iterate { Default, // Default for the device Left, // Left indices stride fastest Right, // Right indices stride fastest }; */ template struct default_outer_direction { using type = Iterate; static constexpr Iterate value = Iterate::Right; }; template struct default_inner_direction { using type = Iterate; static constexpr Iterate value = Iterate::Right; }; namespace Impl { // NOTE the comparison below is encapsulated to silent warnings about pointless // comparison of unsigned integer with zero template constexpr std::enable_if_t::value, bool> is_less_than_value_initialized_variable(T) { return false; } template constexpr std::enable_if_t::value, bool> is_less_than_value_initialized_variable(T arg) { return arg < T{}; } // Checked narrowing conversion that calls abort if the cast changes the value template constexpr To checked_narrow_cast(From arg, std::size_t idx) { constexpr const bool is_different_signedness = (std::is_signed::value != std::is_signed::value); auto const ret = static_cast(arg); if (static_cast(ret) != arg || (is_different_signedness && is_less_than_value_initialized_variable(arg) != is_less_than_value_initialized_variable(ret))) { auto msg = "Kokkos::MDRangePolicy bound type error: an unsafe implicit conversion " "is performed on a bound (" + std::to_string(arg) + ") in dimension (" + std::to_string(idx) + "), which may not preserve its original value.\n"; Kokkos::abort(msg.c_str()); } return ret; } // NOTE prefer C array U[M] to std::initalizer_list so that the number of // elements can be deduced (https://stackoverflow.com/q/40241370) // NOTE for some unfortunate reason the policy bounds are stored as signed // integer arrays (point_type which is Kokkos::Array) so we // specify the index type (actual policy index_type from the traits) and check // ahead of time that narrowing conversions will be safe. template constexpr Array to_array_potentially_narrowing(const U (&init)[M]) { using T = typename Array::value_type; Array a{}; constexpr std::size_t N = a.size(); static_assert(M <= N); auto* ptr = a.data(); // NOTE equivalent to // std::transform(std::begin(init), std::end(init), a.data(), // [](U x) { return static_cast(x); }); // except that std::transform is not constexpr. for (std::size_t i = 0; i < M; ++i) { *ptr++ = checked_narrow_cast(init[i], i); (void)checked_narrow_cast(init[i], i); // see note above } return a; } // NOTE Making a copy even when std::is_same>::value // is true to reduce code complexity. You may change this if you have a good // reason to. Intentionally not enabling std::array at this time but this may // change too. template constexpr NVCC_WONT_LET_ME_CALL_YOU_Array to_array_potentially_narrowing( Kokkos::Array const& other) { using T = typename NVCC_WONT_LET_ME_CALL_YOU_Array::value_type; NVCC_WONT_LET_ME_CALL_YOU_Array a{}; constexpr std::size_t N = a.size(); static_assert(M <= N); for (std::size_t i = 0; i < M; ++i) { a[i] = checked_narrow_cast(other[i], i); (void)checked_narrow_cast(other[i], i); // see note above } return a; } struct TileSizeProperties { int max_threads; int default_largest_tile_size; int default_tile_size; int max_total_tile_size; }; template TileSizeProperties get_tile_size_properties(const ExecutionSpace&) { // Host settings TileSizeProperties properties; properties.max_threads = std::numeric_limits::max(); properties.default_largest_tile_size = 0; properties.default_tile_size = 2; properties.max_total_tile_size = std::numeric_limits::max(); return properties; } } // namespace Impl // multi-dimensional iteration pattern template struct MDRangePolicy; // Note: If MDRangePolicy has a primary template, implicit CTAD (deduction // guides) are generated -> MDRangePolicy<> by some compilers, which is // incorrect. By making it a template specialization instead, no implicit CTAD // is generated. This works because there has to be at least one property // specified (which is Rank<...>); otherwise, we'd get the static_assert // "Kokkos::Error: MD iteration pattern not defined". This template // specialization uses in all places for correctness. template struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { using traits = Kokkos::Impl::PolicyTraits; using range_policy = RangePolicy; typename traits::execution_space m_space; using impl_range_policy = RangePolicy; using execution_policy = MDRangePolicy; // needed for is_execution_policy // interrogation template friend struct MDRangePolicy; static_assert(!std::is_void::value, "Kokkos Error: MD iteration pattern not defined"); using iteration_pattern = typename traits::iteration_pattern; using work_tag = typename traits::work_tag; using launch_bounds = typename traits::launch_bounds; using member_type = typename range_policy::member_type; static constexpr int rank = iteration_pattern::rank; static_assert(rank < 7, "Kokkos MDRangePolicy Error: Unsupported rank..."); using index_type = typename traits::index_type; using array_index_type = std::int64_t; using point_type = Kokkos::Array; // was index_type using tile_type = Kokkos::Array; // If point_type or tile_type is not templated on a signed integral type (if // it is unsigned), then if user passes in intializer_list of // runtime-determined values of signed integral type that are not const will // receive a compiler error due to an invalid case for implicit conversion - // "conversion from integer or unscoped enumeration type to integer type that // cannot represent all values of the original, except where source is a // constant expression whose value can be stored exactly in the target type" // This would require the user to either pass a matching index_type parameter // as template parameter to the MDRangePolicy or static_cast the individual // values point_type m_lower = {}; point_type m_upper = {}; tile_type m_tile = {}; point_type m_tile_end = {}; index_type m_num_tiles = 1; index_type m_prod_tile_dims = 1; bool m_tune_tile_size = false; static constexpr auto outer_direction = (iteration_pattern::outer_direction != Iterate::Default) ? iteration_pattern::outer_direction : default_outer_direction::value; static constexpr auto inner_direction = iteration_pattern::inner_direction != Iterate::Default ? iteration_pattern::inner_direction : default_inner_direction::value; static constexpr auto Right = Iterate::Right; static constexpr auto Left = Iterate::Left; KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const { return m_space; } MDRangePolicy() = default; template ::value && std::is_integral::value && std::is_integral::value>> MDRangePolicy(const LT (&lower)[LN], const UT (&upper)[UN], const TT (&tile)[TN] = {}) : MDRangePolicy( Impl::to_array_potentially_narrowing( lower), Impl::to_array_potentially_narrowing( upper), Impl::to_array_potentially_narrowing( tile)) { static_assert( LN == rank && UN == rank && TN <= rank, "MDRangePolicy: Constructor initializer lists have wrong size"); } template ::value && std::is_integral::value && std::is_integral::value>> MDRangePolicy(const typename traits::execution_space& work_space, const LT (&lower)[LN], const UT (&upper)[UN], const TT (&tile)[TN] = {}) : MDRangePolicy( work_space, Impl::to_array_potentially_narrowing( lower), Impl::to_array_potentially_narrowing( upper), Impl::to_array_potentially_narrowing( tile)) { static_assert( LN == rank && UN == rank && TN <= rank, "MDRangePolicy: Constructor initializer lists have wrong size"); } // NOTE: Keeping these two constructor despite the templated constructors // from Kokkos arrays for backwards compability to allow construction from // double-braced initializer lists. MDRangePolicy(point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{}) : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {} MDRangePolicy(const typename traits::execution_space& work_space, point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{}) : m_space(work_space), m_lower(lower), m_upper(upper), m_tile(tile) { init_helper(Impl::get_tile_size_properties(work_space)); } template ::value>> MDRangePolicy(Kokkos::Array const& lower, Kokkos::Array const& upper, Kokkos::Array const& tile = Kokkos::Array{}) : MDRangePolicy(typename traits::execution_space(), lower, upper, tile) {} template ::value>> MDRangePolicy(const typename traits::execution_space& work_space, Kokkos::Array const& lower, Kokkos::Array const& upper, Kokkos::Array const& tile = Kokkos::Array{}) : MDRangePolicy( work_space, Impl::to_array_potentially_narrowing( lower), Impl::to_array_potentially_narrowing( upper), Impl::to_array_potentially_narrowing( tile)) {} template MDRangePolicy(const MDRangePolicy p) : traits(p), // base class may contain data such as desired occupancy m_space(p.m_space), m_lower(p.m_lower), m_upper(p.m_upper), m_tile(p.m_tile), m_tile_end(p.m_tile_end), m_num_tiles(p.m_num_tiles), m_prod_tile_dims(p.m_prod_tile_dims), m_tune_tile_size(p.m_tune_tile_size) {} void impl_change_tile_size(const point_type& tile) { m_tile = tile; init_helper(Impl::get_tile_size_properties(m_space)); } bool impl_tune_tile_size() const { return m_tune_tile_size; } private: void init_helper(Impl::TileSizeProperties properties) { m_prod_tile_dims = 1; int increment = 1; int rank_start = 0; int rank_end = rank; if (inner_direction == Iterate::Right) { increment = -1; rank_start = rank - 1; rank_end = -1; } for (int i = rank_start; i != rank_end; i += increment) { const index_type length = m_upper[i] - m_lower[i]; if (m_upper[i] < m_lower[i]) { std::string msg = "Kokkos::MDRangePolicy bounds error: The lower bound (" + std::to_string(m_lower[i]) + ") is greater than its upper bound (" + std::to_string(m_upper[i]) + ") in dimension " + std::to_string(i) + ".\n"; #if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) Kokkos::abort(msg.c_str()); #elif defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS) Kokkos::Impl::log_warning(msg); #endif } if (m_tile[i] <= 0) { m_tune_tile_size = true; if ((inner_direction == Iterate::Right && (i < rank - 1)) || (inner_direction == Iterate::Left && (i > 0))) { if (m_prod_tile_dims * properties.default_tile_size < static_cast(properties.max_total_tile_size)) { m_tile[i] = properties.default_tile_size; } else { m_tile[i] = 1; } } else { m_tile[i] = properties.default_largest_tile_size == 0 ? std::max(length, 1) : properties.default_largest_tile_size; } } m_tile_end[i] = static_cast((length + m_tile[i] - 1) / m_tile[i]); m_num_tiles *= m_tile_end[i]; m_prod_tile_dims *= m_tile[i]; } if (m_prod_tile_dims > static_cast(properties.max_threads)) { printf(" Product of tile dimensions exceed maximum limit: %d\n", static_cast(properties.max_threads)); Kokkos::abort( "ExecSpace Error: MDRange tile dims exceed maximum number " "of threads per block - choose smaller tile dims"); } } }; template MDRangePolicy(const LT (&)[N], const UT (&)[N])->MDRangePolicy>; template MDRangePolicy(const LT (&)[N], const UT (&)[N], const TT (&)[TN]) ->MDRangePolicy>; template MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N]) ->MDRangePolicy>; template MDRangePolicy(DefaultExecutionSpace const&, const LT (&)[N], const UT (&)[N], const TT (&)[TN]) ->MDRangePolicy>; template >> MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N]) ->MDRangePolicy>; template >> MDRangePolicy(ES const&, const LT (&)[N], const UT (&)[N], const TT (&)[TN]) ->MDRangePolicy>; template MDRangePolicy(Array const&, Array const&)->MDRangePolicy>; template MDRangePolicy(Array const&, Array const&, Array const&) ->MDRangePolicy>; template MDRangePolicy(DefaultExecutionSpace const&, Array const&, Array const&) ->MDRangePolicy>; template MDRangePolicy(DefaultExecutionSpace const&, Array const&, Array const&, Array const&) ->MDRangePolicy>; template >> MDRangePolicy(ES const&, Array const&, Array const&) ->MDRangePolicy>; template >> MDRangePolicy(ES const&, Array const&, Array const&, Array const&) ->MDRangePolicy>; } // namespace Kokkos #endif // KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP