//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_TEST_SIMD_REDUCTIONS_HPP #define KOKKOS_TEST_SIMD_REDUCTIONS_HPP #include #include template inline void host_check_reduction_one_loader(ReductionOp reduce_op, std::size_t n, T const* args) { Loader loader; using simd_type = Kokkos::Experimental::simd; using mask_type = typename Kokkos::Experimental::simd::mask_type; constexpr std::size_t width = simd_type::size(); for (std::size_t i = 0; i < n; i += width) { std::size_t const nremaining = n - i; std::size_t const nlanes = Kokkos::min(nremaining, width); simd_type arg; bool const loaded_arg = loader.host_load(args + i, nlanes, arg); if (!loaded_arg) continue; mask_type mask(false); for (std::size_t j = 0; j < n; ++j) { mask[j] = true; } auto value = where(mask, arg); auto expected = reduce_op.on_host_serial(value); auto computed = reduce_op.on_host(value); gtest_checker().equality(expected, computed); } } template inline void host_check_reduction_all_loaders(ReductionOp reduce_op, std::size_t n, T const* args) { host_check_reduction_one_loader(reduce_op, n, args); host_check_reduction_one_loader(reduce_op, n, args); host_check_reduction_one_loader(reduce_op, n, args); } template inline void host_check_all_reductions(const DataType (&args)[n]) { host_check_reduction_all_loaders(hmin(), n, args); host_check_reduction_all_loaders(hmax(), n, args); host_check_reduction_all_loaders(reduce(), n, args); } template inline void host_check_reductions() { if constexpr (is_type_v>) { constexpr size_t n = 16; if constexpr (std::is_signed_v) { DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2, -15, 5, 17, -22, 20}; host_check_all_reductions(args); } else { DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2, 15, 5, 17, 22, 20}; host_check_all_reductions(args); } } } template inline void host_check_reductions_all_types( Kokkos::Experimental::Impl::data_types) { (host_check_reductions(), ...); } template inline void host_check_reductions_all_abis( Kokkos::Experimental::Impl::abi_set) { using DataTypes = Kokkos::Experimental::Impl::data_type_set; (host_check_reductions_all_types(DataTypes()), ...); } template KOKKOS_INLINE_FUNCTION void device_check_reduction_one_loader( ReductionOp reduce_op, std::size_t n, T const* args) { Loader loader; using simd_type = Kokkos::Experimental::simd; using mask_type = typename Kokkos::Experimental::simd::mask_type; constexpr std::size_t width = simd_type::size(); for (std::size_t i = 0; i < n; i += width) { std::size_t const nremaining = n - i; std::size_t const nlanes = Kokkos::min(nremaining, width); simd_type arg; bool const loaded_arg = loader.device_load(args + i, nlanes, arg); if (!loaded_arg) continue; mask_type mask(false); for (std::size_t j = 0; j < n; ++j) { mask[j] = true; } auto value = where(mask, arg); auto expected = reduce_op.on_device_serial(value); auto computed = reduce_op.on_device(value); kokkos_checker().equality(expected, computed); } } template KOKKOS_INLINE_FUNCTION void device_check_reduction_all_loaders( ReductionOp reduce_op, std::size_t n, T const* args) { device_check_reduction_one_loader(reduce_op, n, args); device_check_reduction_one_loader(reduce_op, n, args); device_check_reduction_one_loader(reduce_op, n, args); } template KOKKOS_INLINE_FUNCTION void device_check_all_reductions( const DataType (&args)[n]) { device_check_reduction_all_loaders(hmin(), n, args); device_check_reduction_all_loaders(hmax(), n, args); device_check_reduction_all_loaders(reduce(), n, args); } template KOKKOS_INLINE_FUNCTION void device_check_reductions() { if constexpr (is_type_v>) { constexpr size_t n = 16; if constexpr (std::is_signed_v) { DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2, -15, 5, 17, -22, 20}; device_check_all_reductions(args); } else { DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2, 15, 5, 17, 22, 20}; device_check_all_reductions(args); } } } template KOKKOS_INLINE_FUNCTION void device_check_reductions_all_types( Kokkos::Experimental::Impl::data_types) { (device_check_reductions(), ...); } template KOKKOS_INLINE_FUNCTION void device_check_reductions_all_abis( Kokkos::Experimental::Impl::abi_set) { using DataTypes = Kokkos::Experimental::Impl::data_type_set; (device_check_reductions_all_types(DataTypes()), ...); } class simd_device_reduction_functor { public: KOKKOS_INLINE_FUNCTION void operator()(int) const { device_check_reductions_all_abis( Kokkos::Experimental::Impl::device_abi_set()); } }; TEST(simd, host_reductions) { host_check_reductions_all_abis(Kokkos::Experimental::Impl::host_abi_set()); } TEST(simd, device_reductions) { #ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET GTEST_SKIP() << "skipping because of a non-deterministic failure reporting: " "Failure to synchronize stream (nil): Error in " "cuStreamSynchronize: an illegal memory access was encountered"; #endif Kokkos::parallel_for(1, simd_device_reduction_functor()); } #endif