//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE #include static_assert(false, "Including non-public Kokkos header files is not allowed."); #endif #ifndef KOKKOS_CRS_HPP #define KOKKOS_CRS_HPP #include #include namespace Kokkos { /// \class Crs /// \brief Compressed row storage array. /// /// \tparam DataType The type of stored entries. If a Crs is /// used as the graph of a sparse matrix, then this is usually an /// integer type, the type of the column indices in the sparse /// matrix. /// /// \tparam Arg1Type The second template parameter, corresponding /// either to the Device type (if there are no more template /// parameters) or to the Layout type (if there is at least one more /// template parameter). /// /// \tparam Arg2Type The third template parameter, which if provided /// corresponds to the Device type. /// /// \tparam SizeType The type of row offsets. Usually the default /// parameter suffices. However, setting a nondefault value is /// necessary in some cases, for example, if you want to have a /// sparse matrices with dimensions (and therefore column indices) /// that fit in \c int, but want to store more than INT_MAX /// entries in the sparse matrix. /// /// A row has a range of entries: ///
    ///
  • row_map[i0] <= entry < row_map[i0+1]
  • ///
  • 0 <= i1 < row_map[i0+1] - row_map[i0]
  • ///
  • entries( entry , i2 , i3 , ... );
  • ///
  • entries( row_map[i0] + i1 , i2 , i3 , ... );
  • ///
template ::size_type> class Crs { protected: using traits = ViewTraits; public: using data_type = DataType; using array_layout = typename traits::array_layout; using execution_space = typename traits::execution_space; using memory_space = typename traits::memory_space; using device_type = typename traits::device_type; using size_type = SizeType; using staticcrsgraph_type = Crs; using HostMirror = Crs; using row_map_type = View; using entries_type = View; row_map_type row_map; entries_type entries; /* * Default Constructors, operators and destructor */ KOKKOS_DEFAULTED_FUNCTION Crs() = default; KOKKOS_DEFAULTED_FUNCTION Crs(Crs const&) = default; KOKKOS_DEFAULTED_FUNCTION Crs(Crs&&) = default; KOKKOS_DEFAULTED_FUNCTION Crs& operator=(Crs const&) = default; KOKKOS_DEFAULTED_FUNCTION Crs& operator=(Crs&&) = default; KOKKOS_DEFAULTED_FUNCTION ~Crs() = default; /** \brief Assign to a view of the rhs array. * If the old view is the last view * then allocated memory is deallocated. */ template KOKKOS_INLINE_FUNCTION Crs(const RowMapType& row_map_, const EntriesType& entries_) : row_map(row_map_), entries(entries_) {} /** \brief Return number of rows in the graph */ KOKKOS_INLINE_FUNCTION size_type numRows() const { return (row_map.extent(0) != 0) ? row_map.extent(0) - static_cast(1) : static_cast(0); } }; /*--------------------------------------------------------------------------*/ template void get_crs_transpose_counts( OutCounts& out, Crs const& in, std::string const& name = "transpose_counts"); template typename OutCounts::value_type get_crs_row_map_from_counts( OutCounts& out, InCrs const& in, std::string const& name = "row_map"); template void transpose_crs(Crs& out, Crs const& in); } // namespace Kokkos /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { template class GetCrsTransposeCounts { public: using execution_space = typename InCrs::execution_space; using self_type = GetCrsTransposeCounts; using index_type = typename InCrs::size_type; private: InCrs in; OutCounts out; public: KOKKOS_INLINE_FUNCTION void operator()(index_type i) const { atomic_increment(&out[in.entries(i)]); } GetCrsTransposeCounts(InCrs const& arg_in, OutCounts const& arg_out) : in(arg_in), out(arg_out) { using policy_type = RangePolicy; using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.entries.size()))); closure.execute(); execution_space().fence( "Kokkos::Impl::GetCrsTransposeCounts::GetCrsTransposeCounts: fence " "after functor execution"); } }; template class CrsRowMapFromCounts { public: using execution_space = typename InCounts::execution_space; using value_type = typename OutRowMap::value_type; using index_type = typename InCounts::size_type; using last_value_type = Kokkos::View; private: InCounts m_in; OutRowMap m_out; last_value_type m_last_value; public: KOKKOS_INLINE_FUNCTION void operator()(index_type i, value_type& update, bool final_pass) const { if (i < static_cast(m_in.size())) { update += m_in(i); if (final_pass) m_out(i + 1) = update; } else if (final_pass) { m_out(0) = 0; m_last_value() = update; } } KOKKOS_INLINE_FUNCTION void init(value_type& update) const { update = 0; } KOKKOS_INLINE_FUNCTION void join(value_type& update, const value_type& input) const { update += input; } using self_type = CrsRowMapFromCounts; CrsRowMapFromCounts(InCounts const& arg_in, OutRowMap const& arg_out) : m_in(arg_in), m_out(arg_out), m_last_value("last_value") {} value_type execute() { using policy_type = RangePolicy; using closure_type = Kokkos::Impl::ParallelScan; closure_type closure(*this, policy_type(0, m_in.size() + 1)); closure.execute(); auto last_value = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, m_last_value); return last_value(); } }; template class FillCrsTransposeEntries { public: using execution_space = typename InCrs::execution_space; using memory_space = typename InCrs::memory_space; using value_type = typename OutCrs::entries_type::value_type; using index_type = typename InCrs::size_type; private: using counters_type = View; InCrs in; OutCrs out; counters_type counters; public: KOKKOS_INLINE_FUNCTION void operator()(index_type i) const { auto begin = in.row_map(i); auto end = in.row_map(i + 1); for (auto j = begin; j < end; ++j) { auto ti = in.entries(j); auto tbegin = out.row_map(ti); auto tj = atomic_fetch_add(&counters(ti), 1); out.entries(tbegin + tj) = i; } } using self_type = FillCrsTransposeEntries; FillCrsTransposeEntries(InCrs const& arg_in, OutCrs const& arg_out) : in(arg_in), out(arg_out), counters("counters", arg_out.numRows()) { using policy_type = RangePolicy; using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.numRows()))); closure.execute(); execution_space().fence( "Kokkos::Impl::FillCrsTransposeEntries::FillCrsTransposeEntries: fence " "after functor execution"); } }; } // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { template void get_crs_transpose_counts( OutCounts& out, Crs const& in, std::string const& name) { using InCrs = Crs; out = OutCounts(name, in.numRows()); Kokkos::Impl::GetCrsTransposeCounts functor(in, out); } template typename OutRowMap::value_type get_crs_row_map_from_counts( OutRowMap& out, InCounts const& in, std::string const& name) { out = OutRowMap(view_alloc(WithoutInitializing, name), in.size() + 1); Kokkos::Impl::CrsRowMapFromCounts functor(in, out); return functor.execute(); } template void transpose_crs(Crs& out, Crs const& in) { using crs_type = Crs; using memory_space = typename crs_type::memory_space; using counts_type = View; { counts_type counts; Kokkos::get_crs_transpose_counts(counts, in); Kokkos::get_crs_row_map_from_counts(out.row_map, counts, "tranpose_row_map"); } out.entries = decltype(out.entries)("transpose_entries", in.entries.size()); Kokkos::Impl::FillCrsTransposeEntries entries_functor( in, out); } template struct CountAndFillBase; template struct CountAndFillBase { using data_type = typename CrsType::data_type; using size_type = typename CrsType::size_type; using row_map_type = typename CrsType::row_map_type; using counts_type = row_map_type; CrsType m_crs; Functor m_functor; counts_type m_counts; struct Count {}; KOKKOS_FUNCTION void operator()(Count, size_type i) const { m_counts(i) = m_functor(i, nullptr); } struct Fill {}; KOKKOS_FUNCTION void operator()(Fill, size_type i) const { auto j = m_crs.row_map(i); /* we don't want to access entries(entries.size()), even if its just to get its address and never use it. this can happen when row (i) is empty and all rows after it are also empty. we could compare to row_map(i + 1), but that is a read from global memory, whereas dimension_0() should be part of the View in registers (or constant memory) */ data_type* fill = (j == static_cast(m_crs.entries.extent(0))) ? nullptr : (&(m_crs.entries(j))); m_functor(i, fill); } CountAndFillBase(CrsType& crs, Functor const& f) : m_crs(crs), m_functor(f) {} }; template struct CountAndFill : public CountAndFillBase { using base_type = CountAndFillBase; using typename base_type::Count; using typename base_type::counts_type; using typename base_type::data_type; using typename base_type::Fill; using typename base_type::size_type; using entries_type = typename CrsType::entries_type; using self_type = CountAndFill; CountAndFill(CrsType& crs, size_type nrows, Functor const& f) : base_type(crs, f) { using execution_space = typename CrsType::execution_space; this->m_counts = counts_type("counts", nrows); { using count_policy_type = RangePolicy; using count_closure_type = Kokkos::Impl::ParallelFor; const count_closure_type closure(*this, count_policy_type(0, nrows)); closure.execute(); } auto nentries = Kokkos::get_crs_row_map_from_counts(this->m_crs.row_map, this->m_counts); this->m_counts = counts_type(); this->m_crs.entries = entries_type("entries", nentries); { using fill_policy_type = RangePolicy; using fill_closure_type = Kokkos::Impl::ParallelFor; const fill_closure_type closure(*this, fill_policy_type(0, nrows)); closure.execute(); } crs = this->m_crs; } }; template void count_and_fill_crs(CrsType& crs, typename CrsType::size_type nrows, Functor const& f) { Kokkos::CountAndFill(crs, nrows, f); } } // namespace Kokkos #endif /* #define KOKKOS_CRS_HPP */