Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_SYCL.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42
43// This is a verbatim copy of the other TpetraExt_MatrixMatrix_*.hpp files
44// replacing the execution/memory space by the ones corresponding to SYCL.
45#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
46#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
47
48#ifdef HAVE_TPETRA_INST_SYCL
49namespace Tpetra {
50namespace MMdetails {
51
52/*********************************************************************************************************/
53// MMM KernelWrappers for Partial Specialization to SYCL
54template<class Scalar,
55 class LocalOrdinal,
56 class GlobalOrdinal,
57 class LocalOrdinalViewType>
58struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
59 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69
70
71
72 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82
83};
84
85// Jacobi KernelWrappers for Partial Specialization to SYCL
86template<class Scalar,
87 class LocalOrdinal,
88 class GlobalOrdinal, class LocalOrdinalViewType>
89struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
90 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
91 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
92 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
93 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
94 const LocalOrdinalViewType & Acol2Brow,
95 const LocalOrdinalViewType & Acol2Irow,
96 const LocalOrdinalViewType & Bcol2Ccol,
97 const LocalOrdinalViewType & Icol2Ccol,
98 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
99 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
100 const std::string& label = std::string(),
101 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
102
103 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
104 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
106 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
107 const LocalOrdinalViewType & Acol2Brow,
108 const LocalOrdinalViewType & Acol2Irow,
109 const LocalOrdinalViewType & Bcol2Ccol,
110 const LocalOrdinalViewType & Icol2Ccol,
111 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
112 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
113 const std::string& label = std::string(),
114 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
115
116 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
117 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
118 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
119 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
120 const LocalOrdinalViewType & Acol2Brow,
121 const LocalOrdinalViewType & Acol2Irow,
122 const LocalOrdinalViewType & Bcol2Ccol,
123 const LocalOrdinalViewType & Icol2Ccol,
124 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
125 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
126 const std::string& label = std::string(),
127 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
128};
129
130
131/*********************************************************************************************************/
132// AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
133template<class Scalar,
134 class LocalOrdinal,
135 class GlobalOrdinal,
136 class LocalOrdinalViewType>
137void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
138 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
139 const LocalOrdinalViewType & Acol2Brow,
140 const LocalOrdinalViewType & Acol2Irow,
141 const LocalOrdinalViewType & Bcol2Ccol,
142 const LocalOrdinalViewType & Icol2Ccol,
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
144 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
145 const std::string& label,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) {
147
148
149#ifdef HAVE_TPETRA_MMM_TIMINGS
150 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
151 using Teuchos::TimeMonitor;
152 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
153#endif
154 // Node-specific code
155 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
156 std::string nodename("SYCL");
157
158 // Lots and lots of typedefs
159 using Teuchos::RCP;
161 typedef typename KCRS::device_type device_t;
162 typedef typename KCRS::StaticCrsGraphType graph_t;
163 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
164 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
165 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
166 typedef typename KCRS::values_type::non_const_type scalar_view_t;
167 //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
168
169 // Options
170 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
171 std::string myalg("SPGEMM_KK_MEMORY");
172 if(!params.is_null()) {
173 if(params->isParameter("sycl: algorithm"))
174 myalg = params->get("sycl: algorithm",myalg);
175 if(params->isParameter("sycl: team work size"))
176 team_work_size = params->get("sycl: team work size",team_work_size);
177 }
178
179 // KokkosKernelsHandle
180 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
181 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
182 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
183
184 // Grab the Kokkos::SparseCrsMatrices
185 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
186 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
187
188 c_lno_view_t Arowptr = Amat.graph.row_map,
189 Browptr = Bmat.graph.row_map;
190 const lno_nnz_view_t Acolind = Amat.graph.entries,
191 Bcolind = Bmat.graph.entries;
192 const scalar_view_t Avals = Amat.values,
193 Bvals = Bmat.values;
194
195 c_lno_view_t Irowptr;
196 lno_nnz_view_t Icolind;
197 scalar_view_t Ivals;
198 if(!Bview.importMatrix.is_null()) {
199 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
200 Irowptr = lclB.graph.row_map;
201 Icolind = lclB.graph.entries;
202 Ivals = lclB.values;
203 }
204
205
206 // Get the algorithm mode
207 std::string alg = nodename+std::string(" algorithm");
208 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
209 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
210 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
211
212 // Merge the B and Bimport matrices
213 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
214
215#ifdef HAVE_TPETRA_MMM_TIMINGS
216 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
221 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
222 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
223
224 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
225 lno_nnz_view_t entriesC;
226 scalar_view_t valuesC;
227 KernelHandle kh;
228 kh.create_spgemm_handle(alg_enum);
229 kh.set_team_work_size(team_work_size);
230
231 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
232
233 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
234 if (c_nnz_size){
235 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
236 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
237 }
238 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
239 kh.destroy_spgemm_handle();
240
241#ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
243#endif
244
245 // Sort & set values
246 if (params.is_null() || params->get("sort entries",true))
247 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
248 C.setAllValues(row_mapC,entriesC,valuesC);
249
250#ifdef HAVE_TPETRA_MMM_TIMINGS
251 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
252#endif
253
254 // Final Fillcomplete
255 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
256 labelList->set("Timer Label",label);
257 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
258 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
259 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
260}
261
262
263/*********************************************************************************************************/
264template<class Scalar,
265 class LocalOrdinal,
266 class GlobalOrdinal,
267 class LocalOrdinalViewType>
268void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
269 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
270 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
271 const LocalOrdinalViewType & targetMapToOrigRow_dev,
272 const LocalOrdinalViewType & targetMapToImportRow_dev,
273 const LocalOrdinalViewType & Bcol2Ccol_dev,
274 const LocalOrdinalViewType & Icol2Ccol_dev,
275 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
276 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
277 const std::string& label,
278 const Teuchos::RCP<Teuchos::ParameterList>& params) {
279
280 // FIXME: Right now, this is a cut-and-paste of the serial kernel
281 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
282
283#ifdef HAVE_TPETRA_MMM_TIMINGS
284 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
285 using Teuchos::TimeMonitor;
286 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
287 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
288#endif
289 using Teuchos::RCP;
290 using Teuchos::rcp;
291
292
293 // Lots and lots of typedefs
294 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
295 typedef typename KCRS::StaticCrsGraphType graph_t;
296 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
297 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
298 typedef typename KCRS::values_type::non_const_type scalar_view_t;
299
300 typedef Scalar SC;
301 typedef LocalOrdinal LO;
302 typedef GlobalOrdinal GO;
303 typedef Node NO;
304 typedef Map<LO,GO,NO> map_type;
305 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
306 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
307 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
308
309 // Since this is being run on SYCL, we need to fence because the below code will use UVM
310 // typename graph_t::execution_space().fence();
311
312 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
313 // KDDKDD UVM Ideally, this function would run on device and use
314 // KDDKDD UVM KokkosKernels instead of this host implementation.
315 auto targetMapToOrigRow =
316 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 targetMapToOrigRow_dev);
318 auto targetMapToImportRow =
319 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320 targetMapToImportRow_dev);
321 auto Bcol2Ccol =
322 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
323 Bcol2Ccol_dev);
324 auto Icol2Ccol =
325 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
326 Icol2Ccol_dev);
327
328 // Sizes
329 RCP<const map_type> Ccolmap = C.getColMap();
330 size_t m = Aview.origMatrix->getLocalNumRows();
331 size_t n = Ccolmap->getLocalNumElements();
332
333 // Grab the Kokkos::SparseCrsMatrices & inner stuff
334 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
335 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
336 const KCRS & Cmat = C.getLocalMatrixHost();
337
338 c_lno_view_t Arowptr = Amat.graph.row_map,
339 Browptr = Bmat.graph.row_map,
340 Crowptr = Cmat.graph.row_map;
341 const lno_nnz_view_t Acolind = Amat.graph.entries,
342 Bcolind = Bmat.graph.entries,
343 Ccolind = Cmat.graph.entries;
344 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
345 scalar_view_t Cvals = Cmat.values;
346
347 c_lno_view_t Irowptr;
348 lno_nnz_view_t Icolind;
349 scalar_view_t Ivals;
350 if(!Bview.importMatrix.is_null()) {
351 auto lclB = Bview.importMatrix->getLocalMatrixHost();
352 Irowptr = lclB.graph.row_map;
353 Icolind = lclB.graph.entries;
354 Ivals = lclB.values;
355 }
356
357#ifdef HAVE_TPETRA_MMM_TIMINGS
358 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
359#endif
360
361 // Classic csr assembly (low memory edition)
362 // mfh 27 Sep 2016: The c_status array is an implementation detail
363 // of the local sparse matrix-matrix multiply routine.
364
365 // The status array will contain the index into colind where this entry was last deposited.
366 // c_status[i] < CSR_ip - not in the row yet
367 // c_status[i] >= CSR_ip - this is the entry where you can find the data
368 // We start with this filled with INVALID's indicating that there are no entries yet.
369 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
370 std::vector<size_t> c_status(n, ST_INVALID);
371
372 // For each row of A/C
373 size_t CSR_ip = 0, OLD_ip = 0;
374 for (size_t i = 0; i < m; i++) {
375 // First fill the c_status array w/ locations where we're allowed to
376 // generate nonzeros for this row
377 OLD_ip = Crowptr[i];
378 CSR_ip = Crowptr[i+1];
379 for (size_t k = OLD_ip; k < CSR_ip; k++) {
380 c_status[Ccolind[k]] = k;
381
382 // Reset values in the row of C
383 Cvals[k] = SC_ZERO;
384 }
385
386 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
387 LO Aik = Acolind[k];
388 const SC Aval = Avals[k];
389 if (Aval == SC_ZERO)
390 continue;
391
392 if (targetMapToOrigRow[Aik] != LO_INVALID) {
393 // Local matrix
394 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
395
396 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
397 LO Bkj = Bcolind[j];
398 LO Cij = Bcol2Ccol[Bkj];
399
400 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
401 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
402 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
403
404 Cvals[c_status[Cij]] += Aval * Bvals[j];
405 }
406
407 } else {
408 // Remote matrix
409 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
410 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
411 LO Ikj = Icolind[j];
412 LO Cij = Icol2Ccol[Ikj];
413
414 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
415 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
416 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
417
418 Cvals[c_status[Cij]] += Aval * Ivals[j];
419 }
420 }
421 }
422 }
423
424 C.fillComplete(C.getDomainMap(), C.getRangeMap());
425}
426
427/*********************************************************************************************************/
428template<class Scalar,
429 class LocalOrdinal,
430 class GlobalOrdinal,
431 class LocalOrdinalViewType>
432void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
433 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
434 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
435 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
436 const LocalOrdinalViewType & Acol2Brow,
437 const LocalOrdinalViewType & Acol2Irow,
438 const LocalOrdinalViewType & Bcol2Ccol,
439 const LocalOrdinalViewType & Icol2Ccol,
440 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
441 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
442 const std::string& label,
443 const Teuchos::RCP<Teuchos::ParameterList>& params) {
444
445#ifdef HAVE_TPETRA_MMM_TIMINGS
446 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
447 using Teuchos::TimeMonitor;
448 Teuchos::RCP<TimeMonitor> MM;
449#endif
450
451 // Node-specific code
452 using Teuchos::RCP;
453
454 // Options
455 //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
456 std::string myalg("KK");
457 if(!params.is_null()) {
458 if(params->isParameter("sycl: jacobi algorithm"))
459 myalg = params->get("sycl: jacobi algorithm",myalg);
460 }
461
462 if(myalg == "MSAK") {
463 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
464 }
465 else if(myalg == "KK") {
466 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
467 }
468 else {
469 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
470 }
471
472#ifdef HAVE_TPETRA_MMM_TIMINGS
473 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
474#endif
475
476 // Final Fillcomplete
477 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
478 labelList->set("Timer Label",label);
479 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
480
481 // NOTE: MSAK already fillCompletes, so we have to check here
482 if(!C.isFillComplete()) {
483 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
484 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
485 }
486
487}
488
489
490
491/*********************************************************************************************************/
492template<class Scalar,
493 class LocalOrdinal,
494 class GlobalOrdinal,
495 class LocalOrdinalViewType>
496void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
497 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
498 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
499 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
500 const LocalOrdinalViewType & targetMapToOrigRow_dev,
501 const LocalOrdinalViewType & targetMapToImportRow_dev,
502 const LocalOrdinalViewType & Bcol2Ccol_dev,
503 const LocalOrdinalViewType & Icol2Ccol_dev,
504 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
505 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
506 const std::string& label,
507 const Teuchos::RCP<Teuchos::ParameterList>& params) {
508
509 // FIXME: Right now, this is a cut-and-paste of the serial kernel
510 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
511
512#ifdef HAVE_TPETRA_MMM_TIMINGS
513 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
514 using Teuchos::TimeMonitor;
515 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
516 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
517#endif
518 using Teuchos::RCP;
519 using Teuchos::rcp;
520
521 // Lots and lots of typedefs
522 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
523 typedef typename KCRS::StaticCrsGraphType graph_t;
524 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
525 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
526 typedef typename KCRS::values_type::non_const_type scalar_view_t;
527 typedef typename scalar_view_t::memory_space scalar_memory_space;
528
529 typedef Scalar SC;
530 typedef LocalOrdinal LO;
531 typedef GlobalOrdinal GO;
532 typedef Node NO;
533 typedef Map<LO,GO,NO> map_type;
534 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
535 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
536 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
537
538 // Since this is being run on SYCL, we need to fence because the below host code will use UVM
539 // KDDKDD typename graph_t::execution_space().fence();
540
541 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
542 // KDDKDD UVM Ideally, this function would run on device and use
543 // KDDKDD UVM KokkosKernels instead of this host implementation.
544 auto targetMapToOrigRow =
545 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 targetMapToOrigRow_dev);
547 auto targetMapToImportRow =
548 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 targetMapToImportRow_dev);
550 auto Bcol2Ccol =
551 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
552 Bcol2Ccol_dev);
553 auto Icol2Ccol =
554 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
555 Icol2Ccol_dev);
556
557
558 // Sizes
559 RCP<const map_type> Ccolmap = C.getColMap();
560 size_t m = Aview.origMatrix->getLocalNumRows();
561 size_t n = Ccolmap->getLocalNumElements();
562
563 // Grab the Kokkos::SparseCrsMatrices & inner stuff
564 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
565 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
566 const KCRS & Cmat = C.getLocalMatrixHost();
567
568 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
569 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
570 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
571 scalar_view_t Cvals = Cmat.values;
572
573 c_lno_view_t Irowptr;
574 lno_nnz_view_t Icolind;
575 scalar_view_t Ivals;
576 if(!Bview.importMatrix.is_null()) {
577 auto lclB = Bview.importMatrix->getLocalMatrixHost();
578 Irowptr = lclB.graph.row_map;
579 Icolind = lclB.graph.entries;
580 Ivals = lclB.values;
581 }
582
583 // Jacobi-specific inner stuff
584 auto Dvals =
585 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
586
587#ifdef HAVE_TPETRA_MMM_TIMINGS
588 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
589#endif
590
591 // The status array will contain the index into colind where this entry was last deposited.
592 // c_status[i] < CSR_ip - not in the row yet
593 // c_status[i] >= CSR_ip - this is the entry where you can find the data
594 // We start with this filled with INVALID's indicating that there are no entries yet.
595 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
596 std::vector<size_t> c_status(n, ST_INVALID);
597
598 // For each row of A/C
599 size_t CSR_ip = 0, OLD_ip = 0;
600 for (size_t i = 0; i < m; i++) {
601
602 // First fill the c_status array w/ locations where we're allowed to
603 // generate nonzeros for this row
604 OLD_ip = Crowptr[i];
605 CSR_ip = Crowptr[i+1];
606 for (size_t k = OLD_ip; k < CSR_ip; k++) {
607 c_status[Ccolind[k]] = k;
608
609 // Reset values in the row of C
610 Cvals[k] = SC_ZERO;
611 }
612
613 SC minusOmegaDval = -omega*Dvals(i,0);
614
615 // Entries of B
616 for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
617 Scalar Bval = Bvals[j];
618 if (Bval == SC_ZERO)
619 continue;
620 LO Bij = Bcolind[j];
621 LO Cij = Bcol2Ccol[Bij];
622
623 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
624 std::runtime_error, "Trying to insert a new entry into a static graph");
625
626 Cvals[c_status[Cij]] = Bvals[j];
627 }
628
629 // Entries of -omega * Dinv * A * B
630 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
631 LO Aik = Acolind[k];
632 const SC Aval = Avals[k];
633 if (Aval == SC_ZERO)
634 continue;
635
636 if (targetMapToOrigRow[Aik] != LO_INVALID) {
637 // Local matrix
638 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
639
640 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
641 LO Bkj = Bcolind[j];
642 LO Cij = Bcol2Ccol[Bkj];
643
644 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
645 std::runtime_error, "Trying to insert a new entry into a static graph");
646
647 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
648 }
649
650 } else {
651 // Remote matrix
652 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
653 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
654 LO Ikj = Icolind[j];
655 LO Cij = Icol2Ccol[Ikj];
656
657 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
658 std::runtime_error, "Trying to insert a new entry into a static graph");
659
660 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
661 }
662 }
663 }
664 }
665
666#ifdef HAVE_TPETRA_MMM_TIMINGS
667 MM2= Teuchos::null;
668 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
669#endif
670
671 C.fillComplete(C.getDomainMap(), C.getRangeMap());
672
673}
674
675/*********************************************************************************************************/
676template<class Scalar,
677 class LocalOrdinal,
678 class GlobalOrdinal,
679 class LocalOrdinalViewType>
680void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
681 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
682 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
683 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
684 const LocalOrdinalViewType & Acol2Brow,
685 const LocalOrdinalViewType & Acol2Irow,
686 const LocalOrdinalViewType & Bcol2Ccol,
687 const LocalOrdinalViewType & Icol2Ccol,
688 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
689 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
690 const std::string& label,
691 const Teuchos::RCP<Teuchos::ParameterList>& params) {
692
693#ifdef HAVE_TPETRA_MMM_TIMINGS
694 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
695 using Teuchos::TimeMonitor;
696 Teuchos::RCP<TimeMonitor> MM;
697#endif
698
699 // Check if the diagonal entries exist in debug mode
700 const bool debug = Tpetra::Details::Behavior::debug();
701 if(debug) {
702
703 auto rowMap = Aview.origMatrix->getRowMap();
704 Tpetra::Vector<Scalar> diags(rowMap);
705 Aview.origMatrix->getLocalDiagCopy(diags);
706 size_t diagLength = rowMap->getLocalNumElements();
707 Teuchos::Array<Scalar> diagonal(diagLength);
708 diags.get1dCopy(diagonal());
709
710 for(size_t i = 0; i < diagLength; ++i) {
711 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
712 std::runtime_error,
713 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
714 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
715 }
716 }
717
718 // Usings
719 using device_t = typename Kokkos::Compat::KokkosSYCLWrapperNode::device_type;
721 using graph_t = typename matrix_t::StaticCrsGraphType;
722 using lno_view_t = typename graph_t::row_map_type::non_const_type;
723 using c_lno_view_t = typename graph_t::row_map_type::const_type;
724 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
725 using scalar_view_t = typename matrix_t::values_type::non_const_type;
726
727 // KokkosKernels handle
728 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
729 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
730 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
731
732 // Get the rowPtr, colInd and vals of importMatrix
733 c_lno_view_t Irowptr;
734 lno_nnz_view_t Icolind;
735 scalar_view_t Ivals;
736 if(!Bview.importMatrix.is_null()) {
737 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
738 Irowptr = lclB.graph.row_map;
739 Icolind = lclB.graph.entries;
740 Ivals = lclB.values;
741 }
742
743 // Merge the B and Bimport matrices
744 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
745
746 // Get the properties and arrays of input matrices
747 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
748 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
749
750 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
751 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
752 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
753
754 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
755 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
756 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
757
758 // Arrays of the output matrix
759 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
760 lno_nnz_view_t entriesC;
761 scalar_view_t valuesC;
762
763 // Options
764 int team_work_size = 16;
765 std::string myalg("SPGEMM_KK_MEMORY");
766 if(!params.is_null()) {
767 if(params->isParameter("sycl: algorithm"))
768 myalg = params->get("sycl: algorithm",myalg);
769 if(params->isParameter("sycl: team work size"))
770 team_work_size = params->get("sycl: team work size",team_work_size);
771 }
772
773 // Get the algorithm mode
774 std::string nodename("SYCL");
775 std::string alg = nodename + std::string(" algorithm");
776 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
777 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
778
779
780 // KokkosKernels call
781 handle_t kh;
782 kh.create_spgemm_handle(alg_enum);
783 kh.set_team_work_size(team_work_size);
784
785 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786 Arowptr, Acolind, false,
787 Browptr, Bcolind, false,
788 row_mapC);
789
790 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
791 if (c_nnz_size){
792 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
793 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
794 }
795
796 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
797 Arowptr, Acolind, Avals, false,
798 Browptr, Bcolind, Bvals, false,
799 row_mapC, entriesC, valuesC,
800 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
801 kh.destroy_spgemm_handle();
802
803#ifdef HAVE_TPETRA_MMM_TIMINGS
804 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
805#endif
806
807 // Sort & set values
808 if (params.is_null() || params->get("sort entries",true))
809 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
810 C.setAllValues(row_mapC,entriesC,valuesC);
811
812#ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
814#endif
815
816 // Final Fillcomplete
817 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
818 labelList->set("Timer Label",label);
819 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
820 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
821 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
822}
823
824 }//MMdetails
825}//Tpetra
826
827#endif//SYCL
828
829#endif
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.