Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraphTransposer_def.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
43#define TPETRA_CRSGRAPHTRANSPOSER_DEF_HPP
44
45#include "Tpetra_CrsGraph.hpp"
46#include "Tpetra_Export.hpp"
48#include "Tpetra_Details_makeColMap.hpp"
50#include "Teuchos_ParameterList.hpp"
51#include "Teuchos_TimeMonitor.hpp"
52#include "KokkosSparse_Utils.hpp"
53#include "KokkosKernels_Handle.hpp"
54#include "KokkosSparse_spadd.hpp"
55
56namespace Tpetra {
57
58 template<typename GO,
59 typename LocalIndicesType,
60 typename GlobalIndicesType,
61 typename ColMapType>
62 struct ConvertLocalToGlobalFunctor
63 {
64 ConvertLocalToGlobalFunctor(
65 const LocalIndicesType& colindsOrig_,
66 const GlobalIndicesType& colindsConverted_,
67 const ColMapType& colmap_) :
68 colindsOrig (colindsOrig_),
69 colindsConverted (colindsConverted_),
70 colmap (colmap_)
71 {}
72 KOKKOS_INLINE_FUNCTION void
73 operator() (const GO i) const
74 {
75 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
76 }
77 LocalIndicesType colindsOrig;
78 GlobalIndicesType colindsConverted;
79 ColMapType colmap;
80 };
81
82 template<class LO, class GO, class LOView, class GOView, class LocalMap>
83 struct ConvertGlobalToLocalFunctor
84 {
85 ConvertGlobalToLocalFunctor(LOView& lids_, const GOView& gids_, const LocalMap localColMap_)
86 : lids(lids_), gids(gids_), localColMap(localColMap_)
87 {}
88
89 KOKKOS_FUNCTION void operator() (const GO i) const
90 {
91 lids(i) = localColMap.getLocalElement(gids(i));
92 }
93
94 LOView lids;
95 const GOView gids;
96 const LocalMap localColMap;
97 };
98
99
100 template <typename size_type, typename ordinal_type,
101 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
102 typename AcolindsT, typename BcolindsT, typename CcolindsT>
103 struct SortedNumericIndicesOnlyFunctor {
104
105 SortedNumericIndicesOnlyFunctor(const ArowptrsT& Arowptrs_,
106 const BrowptrsT& Browptrs_,
107 const CrowptrsT& Crowptrs_,
108 const AcolindsT& Acolinds_,
109 const BcolindsT& Bcolinds_,
110 const CcolindsT& Ccolinds_)
111 : Arowptrs(Arowptrs_),
112 Browptrs(Browptrs_),
113 Crowptrs(Crowptrs_),
114 Acolinds(Acolinds_),
115 Bcolinds(Bcolinds_),
116 Ccolinds(Ccolinds_) {}
117
118 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const
119 {
120 const ordinal_type ORDINAL_MAX = Kokkos::ArithTraits<ordinal_type>::max();
121
122 // count the union of nonzeros in Arow and Brow
123 size_type ai = 0;
124 size_type bi = 0;
125 size_type Arowstart = Arowptrs(i);
126 size_type Arowlen = Arowptrs(i + 1) - Arowstart;
127 size_type Browstart = Browptrs(i);
128 size_type Browlen = Browptrs(i + 1) - Browstart;
129 ordinal_type Acol = (Arowlen == 0) ? ORDINAL_MAX : Acolinds(Arowstart);
130 ordinal_type Bcol = (Browlen == 0) ? ORDINAL_MAX : Bcolinds(Browstart);
131 size_type Coffset = Crowptrs(i);
132 while (Acol != ORDINAL_MAX || Bcol != ORDINAL_MAX)
133 {
134 ordinal_type Ccol = (Acol < Bcol) ? Acol : Bcol;
135 while(Acol == Ccol)
136 {
137 ai++;
138 if(ai == Arowlen)
139 Acol = ORDINAL_MAX;
140 else
141 Acol = Acolinds(Arowstart + ai);
142 }
143 while(Bcol == Ccol)
144 {
145 bi++;
146 if(bi == Browlen)
147 Bcol = ORDINAL_MAX;
148 else
149 Bcol = Bcolinds(Browstart + bi);
150 }
151 Ccolinds(Coffset) = Ccol;
152 Coffset++;
153 }
154 }
155
156 const ArowptrsT Arowptrs;
157 const BrowptrsT Browptrs;
158 const CrowptrsT Crowptrs;
159 const AcolindsT Acolinds;
160 const BcolindsT Bcolinds;
161 CcolindsT Ccolinds;
162 };
163
164 template <typename size_type, typename ordinal_type,
165 typename ArowptrsT, typename BrowptrsT, typename CrowptrsT,
166 typename AcolindsT, typename BcolindsT, typename CcolindsT>
167 struct UnsortedNumericIndicesOnlyFunctor {
168
169 UnsortedNumericIndicesOnlyFunctor(
170 const ArowptrsT Arowptrs_, const BrowptrsT Browptrs_, const CrowptrsT Crowptrs_,
171 const AcolindsT Acolinds_, const BcolindsT Bcolinds_, CcolindsT Ccolinds_,
172 const CcolindsT Apos_, const CcolindsT Bpos_)
173 : Arowptrs(Arowptrs_),
174 Browptrs(Browptrs_),
175 Crowptrs(Crowptrs_),
176 Acolinds(Acolinds_),
177 Bcolinds(Bcolinds_),
178 Ccolinds(Ccolinds_),
179 Apos(Apos_),
180 Bpos(Bpos_) {}
181
182 KOKKOS_INLINE_FUNCTION void operator()(const ordinal_type i) const {
183 size_type CrowStart = Crowptrs(i);
184 size_type ArowStart = Arowptrs(i);
185 size_type ArowEnd = Arowptrs(i + 1);
186 size_type BrowStart = Browptrs(i);
187 size_type BrowEnd = Browptrs(i + 1);
188 // add in A entries, while setting C colinds
189 for (size_type j = ArowStart; j < ArowEnd; j++) {
190 Ccolinds(CrowStart + Apos(j)) = Acolinds(j);
191 }
192 // add in B entries, while setting C colinds
193 for (size_type j = BrowStart; j < BrowEnd; j++) {
194 Ccolinds(CrowStart + Bpos(j)) = Bcolinds(j);
195 }
196 }
197 const ArowptrsT Arowptrs;
198 const BrowptrsT Browptrs;
199 const CrowptrsT Crowptrs;
200 const AcolindsT Acolinds;
201 const BcolindsT Bcolinds;
202 CcolindsT Ccolinds;
203 const CcolindsT Apos;
204 const CcolindsT Bpos;
205 };
206
207
208 template<class LocalOrdinal,
209 class GlobalOrdinal,
210 class Node>
212 CrsGraphTransposer (const Teuchos::RCP<const crs_graph_type>& origGraph,
213 const std::string& label)
214 : origGraph_ (origGraph), label_ (label)
215 {}
216
217 template<class LocalOrdinal,
218 class GlobalOrdinal,
219 class Node>
220 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
222 symmetrize (const Teuchos::RCP<Teuchos::ParameterList> &params)
223 {
224 using Teuchos::RCP;
225 using device_type = typename Node::device_type;
226 using execution_space = typename device_type::execution_space;
227 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
228 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
229 using impl_scalar_type = ::Tpetra::Details::DefaultTypes::scalar_type;
230 using row_ptrs_array = typename local_graph_device_type::row_map_type::non_const_type ;
231 using col_inds_array = typename local_graph_device_type::entries_type::non_const_type;
232 using local_map_type = typename map_type::local_map_type;
233 using global_col_inds_array = typename Kokkos::View<GlobalOrdinal*, device_type>;
234
235 auto graph = origGraph_;
236 auto domain_map = graph->getDomainMap();
237 auto range_map = graph->getRangeMap();
238 auto row_map = graph->getRowMap();
239 auto col_map = graph->getColMap();
242
244 TEUCHOS_ASSERT(domain_map->isSameAs(*row_map));
245
246 // Do the transpose
247 RCP<crs_graph_type> graphT = createTranspose (params);
248
249 auto col_map_T = graphT->getColMap();
250 TEUCHOS_ASSERT(!col_map_T.is_null());
251 TEUCHOS_ASSERT(domain_map->isSameAs(*graphT->getDomainMap()));
252
253 bool graphSorted = graph->isSorted();
254 bool graphTSorted = graphT->isSorted();
256 bool matchingColMaps = col_map->isSameAs(*col_map_T);
257
258 auto lclGraph = graph->getLocalGraphDevice();
259 auto lclGraphT = graphT->getLocalGraphDevice();
260
261 using KKH_LO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, LocalOrdinal, impl_scalar_type,
262 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
263 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GlobalOrdinal, impl_scalar_type,
264 typename Node::execution_space, typename Node::memory_space, typename Node::memory_space>;
265
266 auto rowptrs = lclGraph.row_map;
267 auto rowptrsT = lclGraphT.row_map;
268 auto colinds = lclGraph.entries;
269 auto colindsT = lclGraphT.entries;
270
271 auto nrows = rowptrs.extent(0) - 1;
272 auto rowptrsSym = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing("row ptrs sym"), nrows + 1);
273
274 col_inds_array colindsSym;
275
276 if(!matchingColMaps) {
277 // convert indices of local graph to GlobalOrdinal
278 auto lclColmap = col_map->getLocalMap();
279 global_col_inds_array colindsConverted(Kokkos::ViewAllocateWithoutInitializing("colinds (converted)"), colinds.extent(0));
281 Kokkos::parallel_for("colInds (converted)", range_type(0, colinds.extent(0)), convert);
282
283 // convert indices of local graphT to GlobalOrdinal
284 auto lclColmapT = col_map_T->getLocalMap();
285 global_col_inds_array colindsTConverted(Kokkos::ViewAllocateWithoutInitializing("colindsT (converted)"), colindsT.extent(0));
287 Kokkos::parallel_for("colIndsT (converted)", range_type(0, colindsT.extent(0)), convertT);
288
289 // sum graph and graphT in GlobalOrdinal
291 handle.create_spadd_handle(false);
292 auto addHandle = handle.get_spadd_handle();
293
294 global_col_inds_array globalColindsSym;
295
296 KokkosSparse::Experimental::spadd_symbolic
297 (&handle, rowptrs, colindsConverted, rowptrsT, colindsTConverted, rowptrsSym);
298 globalColindsSym = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing("global colinds sym"), addHandle->get_c_nnz());
299
300 UnsortedNumericIndicesOnlyFunctor<
302 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
303 typename global_col_inds_array::const_type, typename global_col_inds_array::const_type, global_col_inds_array>
305 colindsConverted, colindsTConverted, globalColindsSym,
306 addHandle->get_a_pos(), addHandle->get_b_pos());
307 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
308 range_type(0, nrows), unsortedNumeric);
309
310 // build column map for graphSym
311 Tpetra::Details::makeColMap<LocalOrdinal, GlobalOrdinal, Node>
313
314 // convert indices of local graphSym to LocalOrdinal
315 auto lclColmapSym = col_map_sym->getLocalMap();
316 colindsSym = col_inds_array("colindsSym", globalColindsSym.extent(0));
318 Kokkos::parallel_for(range_type(0, globalColindsSym.extent(0)), convertSym);
319
320 } else {
321
322 // sum graph and graphT in LocalOrdinal
324 handle.create_spadd_handle(sorted);
325 auto addHandle = handle.get_spadd_handle();
326
327 KokkosSparse::Experimental::spadd_symbolic
329 colindsSym = col_inds_array(Kokkos::ViewAllocateWithoutInitializing("C colinds"), addHandle->get_c_nnz());
330
331 if (sorted) {
332 SortedNumericIndicesOnlyFunctor<
334 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
335 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
338 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputSorted",
339 range_type(0, nrows), sortedNumeric);
340
341 } else {
342 UnsortedNumericIndicesOnlyFunctor<
344 typename row_ptrs_array::const_type, typename row_ptrs_array::const_type, row_ptrs_array,
345 typename col_inds_array::const_type, typename col_inds_array::const_type, col_inds_array>
348 addHandle->get_a_pos(), addHandle->get_b_pos());
349 Kokkos::parallel_for("KokkosSparse::SpAdd:Numeric::InputNotSorted",
350 range_type(0, nrows), unsortedNumeric);
351 }
352
353 // column map for graphSym is graph's column map
355 importer = graph->getImporter();
356 }
357
358 bool sort = true;
359 if (sort)
360 KokkosSparse::sort_crs_graph<execution_space, row_ptrs_array, col_inds_array>(rowptrsSym, colindsSym);
361
362 local_graph_device_type lclGraphSym = local_graph_device_type(colindsSym, rowptrsSym);
363
365 if(!sort) {
366 graphParams = rcp(new Teuchos::ParameterList);
367 graphParams->set("sorted", false);
368 }
369
370 return rcp (new crs_graph_type (lclGraphSym,
371 row_map,
374 range_map,
375 importer,
376 Teuchos::null,
377 graphParams));
378 }
379
380 template<class LocalOrdinal,
381 class GlobalOrdinal,
382 class Node>
383 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
385 createTranspose (const Teuchos::RCP<Teuchos::ParameterList> &params)
386 {
387 using Teuchos::RCP;
388 // Do the local transpose
389 RCP<crs_graph_type> transGraphWithSharedRows = createTransposeLocal (params);
390
391#ifdef HAVE_TPETRA_MMM_TIMINGS
392 const std::string prefix = std::string ("Tpetra ") + label_ + ": ";
393 using Teuchos::TimeMonitor;
394 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose TAFC"));
395#endif
396
397 // If transGraphWithSharedRows has an exporter, that's what we
398 // want. If it doesn't, the rows aren't actually shared, and we're
399 // done!
402 transGraphWithSharedRows->getExporter ();
403 if (exporter.is_null ()) {
405 }
406 else {
407 Teuchos::ParameterList labelList;
408#ifdef HAVE_TPETRA_MMM_TIMINGS
409 labelList.set("Timer Label", label_);
410#endif
411 if(! params.is_null ()) {
412 const char paramName[] = "compute global constants";
413 labelList.set (paramName, params->get (paramName, true));
414 }
415 // Use the Export object to do a fused Export and fillComplete.
416 // This always sorts the local graph after communication, so
417 // no need to set "sorted = false" in parameters.
419 (transGraphWithSharedRows, *exporter, Teuchos::null,
420 Teuchos::null, Teuchos::rcpFromRef (labelList));
421 }
422 }
423
424 template<class LocalOrdinal,
425 class GlobalOrdinal,
426 class Node>
427 Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
429 createTransposeLocal (const Teuchos::RCP<Teuchos::ParameterList> &params)
430 {
431 using Teuchos::RCP;
432 using Teuchos::rcp;
433 using Teuchos::rcp_dynamic_cast;
434 using LO = LocalOrdinal;
435 using GO = GlobalOrdinal;
436 using import_type = Tpetra::Import<LO, GO, Node>;
437 using export_type = Tpetra::Export<LO, GO, Node>;
438
439#ifdef HAVE_TPETRA_MMM_TIMINGS
440 std::string prefix = std::string("Tpetra ") + label_ + ": ";
441 using Teuchos::TimeMonitor;
442 TimeMonitor MM (*TimeMonitor::getNewTimer (prefix + "Transpose Local"));
443#endif
444
445 const bool sort = [&] () {
446 constexpr bool sortDefault = true; // see #4607 discussion
447 const char sortParamName[] = "sort";
448 return params.get () == nullptr ? sortDefault :
450 } ();
451
452 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
453 local_graph_device_type lclGraph = origGraph_->getLocalGraphDevice ();
454
455 //Allocate views and call the other version of transpose_graph
456 using c_rowmap_t = typename local_graph_device_type::row_map_type;
457 using c_entries_t = typename local_graph_device_type::entries_type;
458 using rowmap_t = typename local_graph_device_type::row_map_type::non_const_type;
459 using entries_t = typename local_graph_device_type::entries_type::non_const_type;
460 LocalOrdinal numCols = origGraph_->getColMap()->getLocalNumElements();
461 rowmap_t lclGraphT_rowmap("Transpose rowmap", numCols + 1);
463 Kokkos::ViewAllocateWithoutInitializing("Transpose entries"), lclGraph.entries.extent(0));
464 KokkosSparse::Impl::transpose_graph<
467 rowmap_t, typename local_graph_device_type::execution_space>(
468 lclGraph.numRows(), numCols,
469 lclGraph.row_map, lclGraph.entries,
471
472 if (sort)
473 KokkosSparse::sort_crs_graph<
474 typename local_graph_device_type::execution_space,
478
479 //And construct the transpose local_graph_device_type
480 local_graph_device_type lclGraphT = local_graph_device_type(lclGraphT_entries, lclGraphT_rowmap);
481
482 // Prebuild the importers and exporters the no-communication way,
483 // flipping the importers and exporters around.
484 const auto origExport = origGraph_->getExporter ();
486 Teuchos::null : rcp (new import_type (*origExport));
487 const auto origImport = origGraph_->getImporter ();
489 Teuchos::null : rcp (new export_type (*origImport));
490
492 if(!sort) {
493 graphParams = rcp(new Teuchos::ParameterList);
494 graphParams->set("sorted", false);
495 }
496
497 return rcp (new crs_graph_type (lclGraphT,
498 origGraph_->getColMap (),
499 origGraph_->getRowMap (),
500 origGraph_->getRangeMap (),
501 origGraph_->getDomainMap (),
503 }
504
505 //
506 // Explicit instantiation macro
507 //
508 // Must be expanded from within the Tpetra namespace!
509 //
510
511#define TPETRA_CRSGRAPHTRANSPOSER_INSTANT(LO,GO,NODE) \
512 template class CrsGraphTransposer< LO , GO , NODE >;
513
514} // namespace Tpetra
515
516#endif
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
CrsGraphTransposer(const Teuchos::RCP< const crs_graph_type > &origGraph, const std::string &label=std::string())
Constructor that takes the graph to transpose.
Teuchos::RCP< crs_graph_type > createTransposeLocal(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > createTranspose(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return the transpose of the graph given to the constructor.
Teuchos::RCP< crs_graph_type > symmetrize(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute and return graph+graph^T of the graph given to the constructor.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Struct that holds views of the contents of a CrsMatrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.