42#ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
43#define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
45#include "Tpetra_CrsGraph.hpp"
46#include "Tpetra_Export.hpp"
48#include "Tpetra_Details_makeColMap.hpp"
50#include "Teuchos_ParameterList.hpp"
51#include "Teuchos_TimeMonitor.hpp"
52#include "KokkosSparse_Utils.hpp"
53#include "KokkosKernels_Handle.hpp"
54#include "KokkosSparse_spadd.hpp"
59 typename LocalIndicesType,
60 typename GlobalIndicesType,
62 struct ConvertLocalToGlobalFunctor
64 ConvertLocalToGlobalFunctor(
65 const LocalIndicesType& colindsOrig_,
66 const GlobalIndicesType& colindsConverted_,
67 const ColMapType& colmap_) :
68 colindsOrig (colindsOrig_),
69 colindsConverted (colindsConverted_),
72 KOKKOS_INLINE_FUNCTION
void
73 operator() (
const GO i)
const
75 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
77 LocalIndicesType colindsOrig;
78 GlobalIndicesType colindsConverted;
82 template<
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
83 struct ConvertGlobalToLocalFunctor
85 ConvertGlobalToLocalFunctor(LOView& lids_,
const GOView& gids_,
const LocalMap localColMap_)
86 : lids(lids_), gids(gids_), localColMap(localColMap_)
89 KOKKOS_FUNCTION
void operator() (
const GO i)
const
91 lids(i) = localColMap.getLocalElement(gids(i));
96 const LocalMap localColMap;
100 template <
typename size_type,
typename ordinal_type,
101 typename ArowptrsT,
typename BrowptrsT,
typename CrowptrsT,
102 typename AcolindsT,
typename BcolindsT,
typename CcolindsT>
103 struct SortedNumericIndicesOnlyFunctor {
105 SortedNumericIndicesOnlyFunctor(
const ArowptrsT& Arowptrs_,
106 const BrowptrsT& Browptrs_,
107 const CrowptrsT& Crowptrs_,
108 const AcolindsT& Acolinds_,
109 const BcolindsT& Bcolinds_,
110 const CcolindsT& Ccolinds_)
111 : Arowptrs(Arowptrs_),
116 Ccolinds(Ccolinds_) {}
118 KOKKOS_INLINE_FUNCTION
void operator()(
const ordinal_type i)
const
120 const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
125 size_type Arowstart = Arowptrs(i);
126 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
127 size_type Browstart = Browptrs(i);
128 size_type Browlen = Browptrs(i + 1) - Browstart;
129 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
130 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
131 size_type Coffset = Crowptrs(i);
132 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX)
134 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
141 Acol = Acolinds(Arowstart + ai);
149 Bcol = Bcolinds(Browstart + bi);
151 Ccolinds(Coffset) = Ccol;
156 const ArowptrsT Arowptrs;
157 const BrowptrsT Browptrs;
158 const CrowptrsT Crowptrs;
159 const AcolindsT Acolinds;
160 const BcolindsT Bcolinds;
164 template <
typename size_type,
typename ordinal_type,
165 typename ArowptrsT,
typename BrowptrsT,
typename CrowptrsT,
166 typename AcolindsT,
typename BcolindsT,
typename CcolindsT>
167 struct UnsortedNumericIndicesOnlyFunctor {
169 UnsortedNumericIndicesOnlyFunctor(
170 const ArowptrsT Arowptrs_,
const BrowptrsT Browptrs_,
const CrowptrsT Crowptrs_,
171 const AcolindsT Acolinds_,
const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
172 const CcolindsT Apos_,
const CcolindsT Bpos_)
173 : Arowptrs(Arowptrs_),
182 KOKKOS_INLINE_FUNCTION
void operator()(
const ordinal_type i)
const {
183 size_type CrowStart = Crowptrs(i);
184 size_type ArowStart = Arowptrs(i);
185 size_type ArowEnd = Arowptrs(i + 1);
186 size_type BrowStart = Browptrs(i);
187 size_type BrowEnd = Browptrs(i + 1);
189 for (size_type j = ArowStart; j < ArowEnd; j++) {
190 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
193 for (size_type j = BrowStart; j < BrowEnd; j++) {
194 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
197 const ArowptrsT Arowptrs;
198 const BrowptrsT Browptrs;
199 const CrowptrsT Crowptrs;
200 const AcolindsT Acolinds;
201 const BcolindsT Bcolinds;
203 const CcolindsT Apos;
204 const CcolindsT Bpos;
208 template<
class LocalOrdinal,
213 const std::string& label)
220 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
225 using device_type =
typename Node::device_type;
226 using execution_space =
typename device_type::execution_space;
227 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
229 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
230 using row_ptrs_array =
typename local_graph_device_type::row_map_type::non_const_type ;
231 using col_inds_array =
typename local_graph_device_type::entries_type::non_const_type;
232 using local_map_type =
typename map_type::local_map_type;
233 using global_col_inds_array =
typename Kokkos::View<GlobalOrdinal*, device_type>;
235 auto graph = origGraph_;
262 typename Node::execution_space,
typename Node::memory_space,
typename Node::memory_space>;
264 typename Node::execution_space,
typename Node::memory_space,
typename Node::memory_space>;
271 auto nrows =
rowptrs.extent(0) - 1;
272 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"row ptrs sym"), nrows + 1);
279 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"colinds (converted)"),
colinds.extent(0));
281 Kokkos::parallel_for(
"colInds (converted)", range_type(0,
colinds.extent(0)), convert);
285 global_col_inds_array
colindsTConverted(Kokkos::ViewAllocateWithoutInitializing(
"colindsT (converted)"),
colindsT.extent(0));
287 Kokkos::parallel_for(
"colIndsT (converted)", range_type(0,
colindsT.extent(0)),
convertT);
291 handle.create_spadd_handle(
false);
296 KokkosSparse::Experimental::spadd_symbolic
298 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"global colinds sym"),
addHandle->get_c_nnz());
300 UnsortedNumericIndicesOnlyFunctor<
302 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
303 typename global_col_inds_array::const_type,
typename global_col_inds_array::const_type, global_col_inds_array>
307 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputNotSorted",
311 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
327 KokkosSparse::Experimental::spadd_symbolic
329 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"),
addHandle->get_c_nnz());
332 SortedNumericIndicesOnlyFunctor<
334 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
335 typename col_inds_array::const_type,
typename col_inds_array::const_type, col_inds_array>
338 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputSorted",
342 UnsortedNumericIndicesOnlyFunctor<
344 typename row_ptrs_array::const_type,
typename row_ptrs_array::const_type, row_ptrs_array,
345 typename col_inds_array::const_type,
typename col_inds_array::const_type, col_inds_array>
349 Kokkos::parallel_for(
"KokkosSparse::SpAdd:Numeric::InputNotSorted",
360 KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(
rowptrsSym,
colindsSym);
383 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
391#ifdef HAVE_TPETRA_MMM_TIMINGS
392 const std::string
prefix = std::string (
"Tpetra ") + label_ +
": ";
393 using Teuchos::TimeMonitor;
408#ifdef HAVE_TPETRA_MMM_TIMINGS
412 const char paramName[] =
"compute global constants";
420 Teuchos::null, Teuchos::rcpFromRef (
labelList));
427 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
433 using Teuchos::rcp_dynamic_cast;
439#ifdef HAVE_TPETRA_MMM_TIMINGS
440 std::string
prefix = std::string(
"Tpetra ") + label_ +
": ";
441 using Teuchos::TimeMonitor;
445 const bool sort = [&] () {
453 local_graph_device_type
lclGraph = origGraph_->getLocalGraphDevice ();
456 using c_rowmap_t =
typename local_graph_device_type::row_map_type;
457 using c_entries_t =
typename local_graph_device_type::entries_type;
458 using rowmap_t =
typename local_graph_device_type::row_map_type::non_const_type;
459 using entries_t =
typename local_graph_device_type::entries_type::non_const_type;
460 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
463 Kokkos::ViewAllocateWithoutInitializing(
"Transpose entries"),
lclGraph.entries.extent(0));
464 KokkosSparse::Impl::transpose_graph<
467 rowmap_t,
typename local_graph_device_type::execution_space>(
473 KokkosSparse::sort_crs_graph<
474 typename local_graph_device_type::execution_space,
484 const auto origExport = origGraph_->getExporter ();
487 const auto origImport = origGraph_->getImporter ();
498 origGraph_->getColMap (),
499 origGraph_->getRowMap (),
500 origGraph_->getRangeMap (),
501 origGraph_->getDomainMap (),
511#define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO,GO,NODE) \
512 template class CrsGraphTransposer< LO , GO , NODE >;
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.