42#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
45#ifdef HAVE_TPETRA_INST_OPENMP
53 class GlobalOrdinal,
class LocalOrdinalViewType>
54struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
84 class GlobalOrdinal,
class LocalOrdinalViewType>
85struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label = std::string(),
97 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
99 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103 const LocalOrdinalViewType & Acol2Brow,
104 const LocalOrdinalViewType & Acol2Irow,
105 const LocalOrdinalViewType & Bcol2Ccol,
106 const LocalOrdinalViewType & Icol2Ccol,
107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109 const std::string& label = std::string(),
110 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
112 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116 const LocalOrdinalViewType & Acol2Brow,
117 const LocalOrdinalViewType & Acol2Irow,
118 const LocalOrdinalViewType & Bcol2Ccol,
119 const LocalOrdinalViewType & Icol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122 const std::string& label = std::string(),
123 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
129template<
class Scalar,
131 class GlobalOrdinal,
class LocalOrdinalViewType>
132struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
136 const LocalOrdinalViewType & Acol2Prow,
137 const LocalOrdinalViewType & Acol2PIrow,
138 const LocalOrdinalViewType & Pcol2Ccol,
139 const LocalOrdinalViewType & PIcol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
141 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
142 const std::string& label = std::string(),
143 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
145static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
146 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
147 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
148 const LocalOrdinalViewType & Acol2Prow,
149 const LocalOrdinalViewType & Acol2PIrow,
150 const LocalOrdinalViewType & Pcol2Ccol,
151 const LocalOrdinalViewType & PIcol2Ccol,
152 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
153 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
154 const std::string& label = std::string(),
155 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
157 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
158 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
159 const LocalOrdinalViewType & Acol2Prow,
160 const LocalOrdinalViewType & Acol2PIrow,
161 const LocalOrdinalViewType & Pcol2Ccol,
162 const LocalOrdinalViewType & PIcol2Ccol,
163 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
164 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
165 const std::string& label = std::string(),
166 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
168 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
169 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
170 const LocalOrdinalViewType & Acol2Prow,
171 const LocalOrdinalViewType & Acol2PIrow,
172 const LocalOrdinalViewType & Pcol2Ccol,
173 const LocalOrdinalViewType & PIcol2Ccol,
174 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
175 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
176 const std::string& label = std::string(),
177 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
182template<
class Scalar,
185 class LocalOrdinalViewType>
186void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
187 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
188 const LocalOrdinalViewType & Acol2Brow,
189 const LocalOrdinalViewType & Acol2Irow,
190 const LocalOrdinalViewType & Bcol2Ccol,
191 const LocalOrdinalViewType & Icol2Ccol,
192 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
193 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
194 const std::string& label,
195 const Teuchos::RCP<Teuchos::ParameterList>& params) {
197#ifdef HAVE_TPETRA_MMM_TIMINGS
198 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
199 using Teuchos::TimeMonitor;
200 Teuchos::RCP<TimeMonitor> MM;
204 std::string nodename(
"OpenMP");
209 typedef typename KCRS::device_type device_t;
210 typedef typename KCRS::StaticCrsGraphType graph_t;
211 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213 typedef typename KCRS::values_type::non_const_type scalar_view_t;
216 int team_work_size = 16;
217 std::string myalg(
"SPGEMM_KK_MEMORY");
220 if(!params.is_null()) {
221 if(params->isParameter(
"openmp: algorithm"))
222 myalg = params->get(
"openmp: algorithm",myalg);
223 if(params->isParameter(
"openmp: team work size"))
224 team_work_size = params->get(
"openmp: team work size",team_work_size);
229 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
233#ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper"))));
237 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
239 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
242 const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
246 std::string alg = nodename+std::string(
" algorithm");
248 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
252 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
254#ifdef HAVE_TPETRA_MMM_TIMINGS
255 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
259 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
262 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
266 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
267 lno_nnz_view_t entriesC;
268 scalar_view_t valuesC;
270 kh.create_spgemm_handle(alg_enum);
271 kh.set_team_work_size(team_work_size);
273 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
275 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
277 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
278 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
281 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
282 kh.destroy_spgemm_handle();
284#ifdef HAVE_TPETRA_MMM_TIMINGS
285 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
288 if (params.is_null() || params->get(
"sort entries",
true))
289 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290 C.setAllValues(row_mapC,entriesC,valuesC);
294#ifdef HAVE_TPETRA_MMM_TIMINGS
295 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
299 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
300 labelList->set(
"Timer Label",label);
301 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
302 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
303 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
307 Teuchos::ArrayRCP< const size_t > Crowptr;
308 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309 Teuchos::ArrayRCP< const Scalar > Cvalues;
310 C.getAllValues(Crowptr,Ccolind,Cvalues);
313 int MyPID = C->getComm()->getRank();
314 printf(
"[%d] Crowptr = ",MyPID);
315 for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
316 printf(
"%3d ",(
int)Crowptr.getConst()[i]);
319 printf(
"[%d] Ccolind = ",MyPID);
320 for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
321 printf(
"%3d ",(
int)Ccolind.getConst()[i]);
332template<
class Scalar,
335 class LocalOrdinalViewType>
336void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
337 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
338 const LocalOrdinalViewType & Acol2Brow,
339 const LocalOrdinalViewType & Acol2Irow,
340 const LocalOrdinalViewType & Bcol2Ccol,
341 const LocalOrdinalViewType & Icol2Ccol,
342 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
343 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
344 const std::string& label,
345 const Teuchos::RCP<Teuchos::ParameterList>& params) {
346#ifdef HAVE_TPETRA_MMM_TIMINGS
347 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
348 using Teuchos::TimeMonitor;
349 Teuchos::RCP<TimeMonitor> MM;
356 int team_work_size = 16;
357 std::string myalg(
"LTG");
358 if(!params.is_null()) {
359 if(params->isParameter(
"openmp: algorithm"))
360 myalg = params->get(
"openmp: algorithm",myalg);
361 if(params->isParameter(
"openmp: team work size"))
362 team_work_size = params->get(
"openmp: team work size",team_work_size);
367 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
370 throw std::runtime_error(
"Tpetra::MatrixMatrix::MMM reuse unknown kernel");
373#ifdef HAVE_TPETRA_MMM_TIMINGS
374 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse OpenMPESFC"))));
376 C.fillComplete(C.getDomainMap(), C.getRangeMap());
381template<
class Scalar,
384 class LocalOrdinalViewType>
385void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
387 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
388 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
389 const LocalOrdinalViewType & Acol2Brow,
390 const LocalOrdinalViewType & Acol2Irow,
391 const LocalOrdinalViewType & Bcol2Ccol,
392 const LocalOrdinalViewType & Icol2Ccol,
393 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
394 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
395 const std::string& label,
396 const Teuchos::RCP<Teuchos::ParameterList>& params) {
398#ifdef HAVE_TPETRA_MMM_TIMINGS
399 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
400 using Teuchos::TimeMonitor;
401 Teuchos::RCP<TimeMonitor> MM;
408 int team_work_size = 16;
409 std::string myalg(
"LTG");
410 if(!params.is_null()) {
411 if(params->isParameter(
"openmp: jacobi algorithm"))
412 myalg = params->get(
"openmp: jacobi algorithm",myalg);
413 if(params->isParameter(
"openmp: team work size"))
414 team_work_size = params->get(
"openmp: team work size",team_work_size);
419 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
421 else if(myalg ==
"MSAK") {
422 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
424 else if(myalg ==
"KK") {
425 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
428 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
431#ifdef HAVE_TPETRA_MMM_TIMINGS
432 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
436 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
437 labelList->set(
"Timer Label",label);
438 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
441 if(!C.isFillComplete()) {
442 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
443 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
451template<
class Scalar,
454 class LocalOrdinalViewType>
455void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
457 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
458 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
459 const LocalOrdinalViewType & Acol2Brow,
460 const LocalOrdinalViewType & Acol2Irow,
461 const LocalOrdinalViewType & Bcol2Ccol,
462 const LocalOrdinalViewType & Icol2Ccol,
463 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
464 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
465 const std::string& label,
466 const Teuchos::RCP<Teuchos::ParameterList>& params) {
468#ifdef HAVE_TPETRA_MMM_TIMINGS
469 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
470 using Teuchos::TimeMonitor;
471 Teuchos::RCP<TimeMonitor> MM;
478 int team_work_size = 16;
479 std::string myalg(
"LTG");
480 if(!params.is_null()) {
481 if(params->isParameter(
"openmp: jacobi algorithm"))
482 myalg = params->get(
"openmp: jacobi algorithm",myalg);
483 if(params->isParameter(
"openmp: team work size"))
484 team_work_size = params->get(
"openmp: team work size",team_work_size);
489 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
492 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
495#ifdef HAVE_TPETRA_MMM_TIMINGS
496 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse OpenMPESFC"))));
498 C.fillComplete(C.getDomainMap(), C.getRangeMap());
503template<
class Scalar,
506 class LocalOrdinalViewType>
507void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
509 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
510 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
511 const LocalOrdinalViewType & Acol2Brow,
512 const LocalOrdinalViewType & Acol2Irow,
513 const LocalOrdinalViewType & Bcol2Ccol,
514 const LocalOrdinalViewType & Icol2Ccol,
515 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
516 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
517 const std::string& label,
518 const Teuchos::RCP<Teuchos::ParameterList>& params) {
520#ifdef HAVE_TPETRA_MMM_TIMINGS
521 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
522 using Teuchos::TimeMonitor;
523 Teuchos::RCP<TimeMonitor> MM;
530 auto rowMap = Aview.origMatrix->getRowMap();
532 Aview.origMatrix->getLocalDiagCopy(diags);
533 size_t diagLength = rowMap->getLocalNumElements();
534 Teuchos::Array<Scalar> diagonal(diagLength);
535 diags.get1dCopy(diagonal());
537 for(
size_t i = 0; i < diagLength; ++i) {
538 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
540 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
546 using device_t =
typename Kokkos::Compat::KokkosOpenMPWrapperNode::device_type;
548 using graph_t =
typename matrix_t::StaticCrsGraphType;
549 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
550 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
551 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
552 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
555 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
556 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
557 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
560 c_lno_view_t Irowptr;
561 lno_nnz_view_t Icolind;
563 if(!Bview.importMatrix.is_null()) {
564 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
565 Irowptr = lclB.graph.row_map;
566 Icolind = lclB.graph.entries;
571 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
574 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
575 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
577 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
578 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
579 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
581 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
582 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
583 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
586 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
587 lno_nnz_view_t entriesC;
588 scalar_view_t valuesC;
591 int team_work_size = 16;
592 std::string myalg(
"SPGEMM_KK_MEMORY");
593 if(!params.is_null()) {
594 if(params->isParameter(
"cuda: algorithm"))
595 myalg = params->get(
"cuda: algorithm",myalg);
596 if(params->isParameter(
"cuda: team work size"))
597 team_work_size = params->get(
"cuda: team work size",team_work_size);
601 std::string nodename(
"OpenMP");
602 std::string alg = nodename + std::string(
" algorithm");
603 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
604 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
609 kh.create_spgemm_handle(alg_enum);
610 kh.set_team_work_size(team_work_size);
612 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
613 Arowptr, Acolind,
false,
614 Browptr, Bcolind,
false,
617 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
619 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
620 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
623 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
624 Arowptr, Acolind, Avals,
false,
625 Browptr, Bcolind, Bvals,
false,
626 row_mapC, entriesC, valuesC,
627 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
628 kh.destroy_spgemm_handle();
630#ifdef HAVE_TPETRA_MMM_TIMINGS
631 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPSort"))));
635 if (params.is_null() || params->get(
"sort entries",
true))
636 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
637 C.setAllValues(row_mapC,entriesC,valuesC);
639#ifdef HAVE_TPETRA_MMM_TIMINGS
640 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
644 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
645 labelList->set(
"Timer Label",label);
646 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
647 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
648 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
653template<
class Scalar,
656 class LocalOrdinalViewType>
657void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
658 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
659 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
660 const LocalOrdinalViewType & Acol2Prow,
661 const LocalOrdinalViewType & Acol2PIrow,
662 const LocalOrdinalViewType & Pcol2Accol,
663 const LocalOrdinalViewType & PIcol2Accol,
664 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
665 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
666 const std::string& label,
667 const Teuchos::RCP<Teuchos::ParameterList>& params) {
671#ifdef HAVE_TPETRA_MMM_TIMINGS
672 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
673 using Teuchos::TimeMonitor;
674 Teuchos::RCP<TimeMonitor> MM;
678 std::string nodename(
"OpenMP");
681 std::string myalg(
"LTG");
683 if(!params.is_null()) {
684 if(params->isParameter(
"openmp: rap algorithm"))
685 myalg = params->get(
"openmp: rap algorithm",myalg);
690 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
693 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
698template<
class Scalar,
701 class LocalOrdinalViewType>
702void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
703 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
704 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
705 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
707 const LocalOrdinalViewType & Acol2Prow,
708 const LocalOrdinalViewType & Acol2Irow,
709 const LocalOrdinalViewType & Pcol2Ccol,
710 const LocalOrdinalViewType & Icol2Ccol,
711 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
712 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
713 const std::string& label,
714 const Teuchos::RCP<Teuchos::ParameterList>& params) {
716#ifdef HAVE_TPETRA_MMM_TIMINGS
717 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
718 using Teuchos::TimeMonitor;
719 Teuchos::RCP<TimeMonitor> MM;
726 std::string myalg(
"LTG");
727 if(!params.is_null()) {
728 if(params->isParameter(
"openmp: rap algorithm"))
729 myalg = params->get(
"openmp: rap algorithm",myalg);
734 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
737 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
740#ifdef HAVE_TPETRA_MMM_TIMINGS
741 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse OpenMPESFC"))));
743 C.fillComplete(C.getDomainMap(), C.getRangeMap());
750template<
class Scalar,
753 class LocalOrdinalViewType>
754void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
756 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
757 const LocalOrdinalViewType & Acol2Prow,
758 const LocalOrdinalViewType & Acol2PIrow,
759 const LocalOrdinalViewType & Pcol2Accol,
760 const LocalOrdinalViewType & PIcol2Accol,
761 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
762 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
763 const std::string& label,
764 const Teuchos::RCP<Teuchos::ParameterList>& params) {
767#ifdef HAVE_TPETRA_MMM_TIMINGS
768 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
769 using Teuchos::TimeMonitor;
770 Teuchos::RCP<TimeMonitor> MM;
774 std::string nodename(
"OpenMP");
777 std::string myalg(
"LTG");
779 if(!params.is_null()) {
780 if(params->isParameter(
"openmp: ptap algorithm"))
781 myalg = params->get(
"openmp: ptap algorithm",myalg);
785#ifdef HAVE_TPETRA_MMM_TIMINGS
786 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
789 using Teuchos::ParameterList;
791 using LO = LocalOrdinal;
792 using GO = GlobalOrdinal;
796 using transposer_type =
797 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
798 transposer_type transposer (Pview.origMatrix, label +
"XP: ");
799 RCP<ParameterList> transposeParams (
new ParameterList);
800 if (! params.is_null ()) {
801 transposeParams->set (
"compute global constants",
802 params->get (
"compute global constants: temporaries",
805 transposeParams->set (
"sort",
false);
806 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
807 transposer.createTransposeLocal (transposeParams);
808 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
809 Rview.origMatrix = Ptrans;
811 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
812 mult_R_A_P_newmatrix_LowThreadGustavsonKernel
813 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
814 PIcol2Accol, Ac, Acimport, label, params);
817 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
822template<
class Scalar,
825 class LocalOrdinalViewType>
826void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
828 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
829 const LocalOrdinalViewType & Acol2Prow,
830 const LocalOrdinalViewType & Acol2PIrow,
831 const LocalOrdinalViewType & Pcol2Accol,
832 const LocalOrdinalViewType & PIcol2Accol,
833 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
834 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
835 const std::string& label,
836 const Teuchos::RCP<Teuchos::ParameterList>& params) {
839#ifdef HAVE_TPETRA_MMM_TIMINGS
840 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
841 using Teuchos::TimeMonitor;
842 Teuchos::RCP<TimeMonitor> MM;
846 std::string nodename(
"OpenMP");
849 std::string myalg(
"LTG");
851 if(!params.is_null()) {
852 if(params->isParameter(
"openmp: ptap algorithm"))
853 myalg = params->get(
"openmp: ptap algorithm",myalg);
857#ifdef HAVE_TPETRA_MMM_TIMINGS
858 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
861 using Teuchos::ParameterList;
863 using LO = LocalOrdinal;
864 using GO = GlobalOrdinal;
868 using transposer_type =
869 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
870 transposer_type transposer (Pview.origMatrix, label +
"XP: ");
871 RCP<ParameterList> transposeParams (
new ParameterList);
872 if (! params.is_null ()) {
873 transposeParams->set (
"compute global constants",
874 params->get (
"compute global constants: temporaries",
877 transposeParams->set (
"sort",
false);
878 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
879 transposer.createTransposeLocal (transposeParams);
880 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
881 Rview.origMatrix = Ptrans;
883 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
884 mult_R_A_P_reuse_LowThreadGustavsonKernel
885 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
886 PIcol2Accol, Ac, Acimport, label, params);
889 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
891 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
Struct that holds views of the contents of a CrsMatrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.