40#ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
41#define TPETRA_BLOCKCRSMATRIX_DEF_HPP
49#include "Tpetra_BlockMultiVector.hpp"
52#include "Teuchos_TimeMonitor.hpp"
53#ifdef HAVE_TPETRA_DEBUG
69#if defined(__CUDACC__)
71# if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
72# undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75#elif defined(__GNUC__)
77# if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
78# undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
84# if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
85# define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
95 struct BlockCrsRowStruct {
96 T totalNumEntries, totalNumBytes, maxRowLength;
97 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
98 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
100 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
105 KOKKOS_INLINE_FUNCTION
106 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
107 a.totalNumEntries += b.totalNumEntries;
108 a.totalNumBytes += b.totalNumBytes;
109 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
112 template<
typename T,
typename ExecSpace>
113 struct BlockCrsReducer {
114 typedef BlockCrsReducer reducer;
115 typedef T value_type;
116 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
119 KOKKOS_INLINE_FUNCTION
120 BlockCrsReducer(value_type &val) : value(&val) {}
122 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
123 KOKKOS_INLINE_FUNCTION
void join(value_type &dst,
const value_type &src)
const { dst += src; }
124 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
125 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
126 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
129 template<
class AlphaCoeffType,
131 class MatrixValuesType,
136 class BcrsApplyNoTransFunctor {
138 static_assert (Kokkos::is_view<MatrixValuesType>::value,
139 "MatrixValuesType must be a Kokkos::View.");
140 static_assert (Kokkos::is_view<OutVecType>::value,
141 "OutVecType must be a Kokkos::View.");
142 static_assert (Kokkos::is_view<InVecType>::value,
143 "InVecType must be a Kokkos::View.");
144 static_assert (std::is_same<MatrixValuesType,
145 typename MatrixValuesType::const_type>::value,
146 "MatrixValuesType must be a const Kokkos::View.");
147 static_assert (std::is_same<
typename OutVecType::value_type,
148 typename OutVecType::non_const_value_type>::value,
149 "OutVecType must be a nonconst Kokkos::View.");
150 static_assert (std::is_same<
typename InVecType::value_type,
151 typename InVecType::const_value_type>::value,
152 "InVecType must be a const Kokkos::View.");
153 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
154 "MatrixValuesType must be a rank-1 Kokkos::View.");
155 static_assert (
static_cast<int> (InVecType::rank) == 1,
156 "InVecType must be a rank-1 Kokkos::View.");
157 static_assert (
static_cast<int> (OutVecType::rank) == 1,
158 "OutVecType must be a rank-1 Kokkos::View.");
159 typedef typename MatrixValuesType::non_const_value_type scalar_type;
162 typedef typename GraphType::device_type device_type;
165 typedef typename std::remove_const<typename GraphType::data_type>::type
174 void setX (
const InVecType& X) { X_ = X; }
182 void setY (
const OutVecType& Y) { Y_ = Y; }
184 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
185 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
188 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
189 const GraphType& graph,
190 const MatrixValuesType& val,
191 const local_ordinal_type blockSize,
193 const beta_coeff_type& beta,
194 const OutVecType& Y) :
196 ptr_ (graph.row_map),
197 ind_ (graph.entries),
199 blockSize_ (blockSize),
206 KOKKOS_INLINE_FUNCTION
void
207 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
209 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
213 KOKKOS_INLINE_FUNCTION
void
214 operator () (
const local_ordinal_type& lclRow)
const
216 using ::Tpetra::FILL;
217 using ::Tpetra::SCAL;
218 using ::Tpetra::GEMV;
219 using Kokkos::Details::ArithTraits;
225 using Kokkos::parallel_for;
226 using Kokkos::subview;
227 typedef typename decltype (ptr_)::non_const_value_type offset_type;
228 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
231 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
234 const offset_type Y_ptBeg = lclRow * blockSize_;
235 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
236 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
240 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
241 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
243 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
247 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
248 const offset_type blkBeg = ptr_[lclRow];
249 const offset_type blkEnd = ptr_[lclRow+1];
251 const offset_type bs2 = blockSize_ * blockSize_;
252 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
254 little_block_type A_cur (val_.data () + absBlkOff * bs2,
255 blockSize_, blockSize_);
256 const offset_type X_blkCol = ind_[absBlkOff];
257 const offset_type X_ptBeg = X_blkCol * blockSize_;
258 const offset_type X_ptEnd = X_ptBeg + blockSize_;
259 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
261 GEMV (alpha_, A_cur, X_cur, Y_cur);
267 alpha_coeff_type alpha_;
268 typename GraphType::row_map_type::const_type ptr_;
269 typename GraphType::entries_type::const_type ind_;
270 MatrixValuesType val_;
273 beta_coeff_type beta_;
277 template<
class AlphaCoeffType,
279 class MatrixValuesType,
283 class BcrsApplyNoTransFunctor<AlphaCoeffType,
291 static_assert (Kokkos::is_view<MatrixValuesType>::value,
292 "MatrixValuesType must be a Kokkos::View.");
293 static_assert (Kokkos::is_view<OutVecType>::value,
294 "OutVecType must be a Kokkos::View.");
295 static_assert (Kokkos::is_view<InVecType>::value,
296 "InVecType must be a Kokkos::View.");
297 static_assert (std::is_same<MatrixValuesType,
298 typename MatrixValuesType::const_type>::value,
299 "MatrixValuesType must be a const Kokkos::View.");
300 static_assert (std::is_same<
typename OutVecType::value_type,
301 typename OutVecType::non_const_value_type>::value,
302 "OutVecType must be a nonconst Kokkos::View.");
303 static_assert (std::is_same<
typename InVecType::value_type,
304 typename InVecType::const_value_type>::value,
305 "InVecType must be a const Kokkos::View.");
306 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
307 "MatrixValuesType must be a rank-1 Kokkos::View.");
308 static_assert (
static_cast<int> (InVecType::rank) == 1,
309 "InVecType must be a rank-1 Kokkos::View.");
310 static_assert (
static_cast<int> (OutVecType::rank) == 1,
311 "OutVecType must be a rank-1 Kokkos::View.");
312 typedef typename MatrixValuesType::non_const_value_type scalar_type;
315 typedef typename GraphType::device_type device_type;
318 static constexpr bool runOnHost = !std::is_same_v<typename device_type::execution_space, Kokkos::DefaultExecutionSpace> || std::is_same_v<Kokkos::DefaultExecutionSpace, Kokkos::DefaultHostExecutionSpace>;
323 typedef typename std::remove_const<typename GraphType::data_type>::type
332 void setX (
const InVecType& X) { X_ = X; }
340 void setY (
const OutVecType& Y) { Y_ = Y; }
342 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
343 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
346 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
347 const GraphType& graph,
348 const MatrixValuesType& val,
349 const local_ordinal_type blockSize,
351 const beta_coeff_type& beta,
352 const OutVecType& Y) :
354 ptr_ (graph.row_map),
355 ind_ (graph.entries),
357 blockSize_ (blockSize),
364 KOKKOS_INLINE_FUNCTION
void
365 operator () (
const local_ordinal_type& lclRow)
const
367 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
371 KOKKOS_INLINE_FUNCTION
void
372 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
377 using Kokkos::Details::ArithTraits;
383 using Kokkos::parallel_for;
384 using Kokkos::subview;
385 typedef typename decltype (ptr_)::non_const_value_type offset_type;
386 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
389 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
392 const offset_type Y_ptBeg = lclRow * blockSize_;
393 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
394 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
398 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
399 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
400 [&](
const local_ordinal_type &i) {
401 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
404 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
405 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
406 [&](
const local_ordinal_type &i) {
410 member.team_barrier();
412 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
413 const offset_type blkBeg = ptr_[lclRow];
414 const offset_type blkEnd = ptr_[lclRow+1];
416 const offset_type bs2 = blockSize_ * blockSize_;
417 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
418 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
420 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
421 [&](
const local_ordinal_type &absBlkOff) {
422 A_cur.assign_data(val_.data () + absBlkOff * bs2);
423 const offset_type X_blkCol = ind_[absBlkOff];
424 const offset_type X_ptBeg = X_blkCol * blockSize_;
425 X_cur.assign_data(&X_(X_ptBeg));
428 (Kokkos::ThreadVectorRange(member, blockSize_),
429 [&](
const local_ordinal_type &k0) {
431 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
432 val += A_cur(k0,k1)*X_cur(k1);
433 if constexpr(runOnHost) {
435 Y_cur(k0) += alpha_*val;
442 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
450 alpha_coeff_type alpha_;
451 typename GraphType::row_map_type::const_type ptr_;
452 typename GraphType::entries_type::const_type ind_;
453 MatrixValuesType val_;
456 beta_coeff_type beta_;
461 template<
class AlphaCoeffType,
463 class MatrixValuesType,
464 class InMultiVecType,
466 class OutMultiVecType>
468 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
469 const GraphType& graph,
470 const MatrixValuesType& val,
471 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
472 const InMultiVecType& X,
473 const BetaCoeffType& beta,
474 const OutMultiVecType& Y
477 static_assert (Kokkos::is_view<MatrixValuesType>::value,
478 "MatrixValuesType must be a Kokkos::View.");
479 static_assert (Kokkos::is_view<OutMultiVecType>::value,
480 "OutMultiVecType must be a Kokkos::View.");
481 static_assert (Kokkos::is_view<InMultiVecType>::value,
482 "InMultiVecType must be a Kokkos::View.");
483 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
484 "MatrixValuesType must be a rank-1 Kokkos::View.");
485 static_assert (
static_cast<int> (OutMultiVecType::rank) == 2,
486 "OutMultiVecType must be a rank-2 Kokkos::View.");
487 static_assert (
static_cast<int> (InMultiVecType::rank) == 2,
488 "InMultiVecType must be a rank-2 Kokkos::View.");
490 typedef typename MatrixValuesType::device_type::execution_space execution_space;
491 typedef typename MatrixValuesType::device_type::memory_space memory_space;
492 typedef typename MatrixValuesType::const_type matrix_values_type;
493 typedef typename OutMultiVecType::non_const_type out_multivec_type;
494 typedef typename InMultiVecType::const_type in_multivec_type;
495 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
496 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
497 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
499 constexpr bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
500 constexpr bool is_host_memory_space = std::is_same<memory_space,Kokkos::HostSpace>::value;
501 constexpr bool use_team_policy = (is_builtin_type_enabled && !is_host_memory_space);
503 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
504 static_cast<LO
> (0) :
505 static_cast<LO> (graph.row_map.extent (0) - 1);
506 const LO numVecs = Y.extent (1);
507 if (numLocalMeshRows == 0 || numVecs == 0) {
514 in_multivec_type X_in = X;
515 out_multivec_type Y_out = Y;
520 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
521 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
522 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
523 matrix_values_type, in_vec_type, beta_type, out_vec_type,
524 use_team_policy> functor_type;
526 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
527 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
530 if (use_team_policy) {
531 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
533 typedef Kokkos::TeamPolicy<execution_space> policy_type;
534 policy_type policy(1,1);
535#if defined(KOKKOS_ENABLE_CUDA)
536 constexpr bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
538 constexpr bool is_cuda =
false;
541 LO team_size = 8, vector_size = 1;
542 if (blockSize <= 5) vector_size = 4;
543 else if (blockSize <= 9) vector_size = 8;
544 else if (blockSize <= 12) vector_size = 12;
545 else if (blockSize <= 20) vector_size = 20;
546 else vector_size = 20;
547 policy = policy_type(numLocalMeshRows, team_size, vector_size);
549 policy = policy_type(numLocalMeshRows, 1, 1);
551 Kokkos::parallel_for (policy, functor);
554 for (LO j = 1; j < numVecs; ++j) {
555 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
556 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
559 Kokkos::parallel_for (policy, functor);
562 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
563 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
564 Kokkos::parallel_for (policy, functor);
565 for (LO j = 1; j < numVecs; ++j) {
566 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
567 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
570 Kokkos::parallel_for (policy, functor);
580template<
class Scalar,
class LO,
class GO,
class Node>
581class GetLocalDiagCopy {
583 typedef typename Node::device_type device_type;
584 typedef size_t diag_offset_type;
585 typedef Kokkos::View<
const size_t*, device_type,
586 Kokkos::MemoryUnmanaged> diag_offsets_type;
587 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
588 typedef typename global_graph_type::local_graph_device_type local_graph_device_type;
589 typedef typename local_graph_device_type::row_map_type row_offsets_type;
590 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
591 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
592 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
595 GetLocalDiagCopy (
const diag_type& diag,
596 const values_type& val,
597 const diag_offsets_type& diagOffsets,
598 const row_offsets_type& ptr,
599 const LO blockSize) :
601 diagOffsets_ (diagOffsets),
603 blockSize_ (blockSize),
604 offsetPerBlock_ (blockSize_*blockSize_),
608 KOKKOS_INLINE_FUNCTION
void
609 operator() (
const LO& lclRowInd)
const
614 const size_t absOffset = ptr_[lclRowInd];
617 const size_t relOffset = diagOffsets_[lclRowInd];
620 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
625 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
626 const_little_block_type;
627 const_little_block_type D_in (val_.data () + pointOffset,
628 blockSize_, blockSize_);
629 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
635 diag_offsets_type diagOffsets_;
636 row_offsets_type ptr_;
643 template<
class Scalar,
class LO,
class GO,
class Node>
645 BlockCrsMatrix<Scalar, LO, GO, Node>::
646 markLocalErrorAndGetStream ()
648 * (this->localError_) =
true;
649 if ((*errs_).is_null ()) {
650 *errs_ = Teuchos::rcp (
new std::ostringstream ());
655 template<
class Scalar,
class LO,
class GO,
class Node>
670 template<
class Scalar,
class LO,
class GO,
class Node>
676 rowMeshMap_ (* (
graph.getRowMap ())),
688 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
689 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
690 "rows (isSorted() is false). This class assumes sorted rows.");
692 graphRCP_ = Teuchos::rcpFromRef(graph_);
701 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
702 " <= 0. The block size must be positive.");
712 *pointImporter_ = Teuchos::rcp
718 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
719 Kokkos::deep_copy(ptrHost_,
ptr_h);
722 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
724 Kokkos::deep_copy (indHost_,
ind_h);
727 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
731 template<
class Scalar,
class LO,
class GO,
class Node>
734 const typename local_matrix_device_type::values_type&
values,
738 rowMeshMap_ (* (
graph.getRowMap ())),
749 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
750 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
751 "rows (isSorted() is false). This class assumes sorted rows.");
753 graphRCP_ = Teuchos::rcpFromRef(graph_);
762 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
763 " <= 0. The block size must be positive.");
773 *pointImporter_ = Teuchos::rcp
779 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
780 Kokkos::deep_copy(ptrHost_,
ptr_h);
783 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
784 Kokkos::deep_copy (indHost_,
ind_h);
788 val_ =
decltype (val_) (
values);
792 template<
class Scalar,
class LO,
class GO,
class Node>
800 rowMeshMap_ (* (
graph.getRowMap ())),
812 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
813 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
814 "rows (isSorted() is false). This class assumes sorted rows.");
816 graphRCP_ = Teuchos::rcpFromRef(graph_);
825 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
826 " <= 0. The block size must be positive.");
832 *pointImporter_ = Teuchos::rcp
838 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
840 Kokkos::deep_copy(ptrHost_,
ptr_h);
843 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
845 Kokkos::deep_copy (indHost_,
ind_h);
848 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
853 template<
class Scalar,
class LO,
class GO,
class Node>
854 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
859 return Teuchos::rcp (
new map_type (domainPointMap_));
862 template<
class Scalar,
class LO,
class GO,
class Node>
863 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
868 return Teuchos::rcp (
new map_type (rangePointMap_));
871 template<
class Scalar,
class LO,
class GO,
class Node>
872 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
876 return graph_.getRowMap();
879 template<
class Scalar,
class LO,
class GO,
class Node>
880 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
884 return graph_.getColMap();
887 template<
class Scalar,
class LO,
class GO,
class Node>
892 return graph_.getGlobalNumRows();
895 template<
class Scalar,
class LO,
class GO,
class Node>
900 return graph_.getLocalNumRows();
903 template<
class Scalar,
class LO,
class GO,
class Node>
908 return graph_.getLocalMaxNumRowEntries();
911 template<
class Scalar,
class LO,
class GO,
class Node>
916 Teuchos::ETransp
mode,
922 mode != Teuchos::NO_TRANS &&
mode != Teuchos::TRANS &&
mode != Teuchos::CONJ_TRANS,
923 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
924 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
925 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
934 catch (std::invalid_argument&
e) {
936 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
937 "apply: Either the input MultiVector X or the output MultiVector Y "
938 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
939 "graph. BlockMultiVector's constructor threw the following exception: "
949 }
catch (std::invalid_argument&
e) {
951 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
952 "apply: The implementation method applyBlock complained about having "
953 "an invalid argument. It reported the following: " <<
e.what ());
954 }
catch (std::logic_error&
e) {
956 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
957 "apply: The implementation method applyBlock complained about a "
958 "possible bug in its implementation. It reported the following: "
960 }
catch (std::exception&
e) {
962 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
963 "apply: The implementation method applyBlock threw an exception which "
964 "is neither std::invalid_argument nor std::logic_error, but is a "
965 "subclass of std::exception. It reported the following: "
969 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
970 "apply: The implementation method applyBlock threw an exception which "
971 "is not an instance of a subclass of std::exception. This probably "
972 "indicates a bug. Please report this to the Tpetra developers.");
976 template<
class Scalar,
class LO,
class GO,
class Node>
981 Teuchos::ETransp
mode,
986 X.getBlockSize () !=
Y.getBlockSize (), std::invalid_argument,
987 "Tpetra::BlockCrsMatrix::applyBlock: "
988 "X and Y have different block sizes. X.getBlockSize() = "
989 <<
X.getBlockSize () <<
" != Y.getBlockSize() = "
990 <<
Y.getBlockSize () <<
".");
992 if (
mode == Teuchos::NO_TRANS) {
994 }
else if (
mode == Teuchos::TRANS ||
mode == Teuchos::CONJ_TRANS) {
998 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
999 "applyBlock: Invalid 'mode' argument. Valid values are "
1000 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
1004 template<
class Scalar,
class LO,
class GO,
class Node>
1016 "destMatrix is required to be null.");
1032 template<
class Scalar,
class LO,
class GO,
class Node>
1044 "destMatrix is required to be null.");
1060 template<
class Scalar,
class LO,
class GO,
class Node>
1065 auto val_d = val_.getDeviceView(Access::OverwriteAll);
1069 template<
class Scalar,
class LO,
class GO,
class Node>
1077 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1085 template <
class Scalar,
class LO,
class GO,
class Node>
1089 Kokkos::MemoryUnmanaged>& offsets)
const
1091 graph_.getLocalDiagOffsets (offsets);
1094 template <
class Scalar,
class LO,
class GO,
class Node>
1098 Kokkos::MemoryUnmanaged>& diag,
1100 Kokkos::MemoryUnmanaged>& offsets)
const
1102 using Kokkos::parallel_for;
1103 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1105 const LO
lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getLocalNumElements ());
1106 const LO
blockSize = this->getBlockSize ();
1109 static_cast<LO
> (diag.extent (1)) <
blockSize ||
1110 static_cast<LO
> (diag.extent (2)) <
blockSize,
1111 std::invalid_argument,
prefix <<
1112 "The input Kokkos::View is not big enough to hold all the data.");
1114 (
static_cast<LO
> (offsets.size ()) <
lclNumMeshRows, std::invalid_argument,
1115 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1118 typedef Kokkos::RangePolicy<execution_space, LO>
policy_type;
1125 auto val_d = val_.getDeviceView(Access::ReadOnly);
1128 graph_.getLocalGraphDevice ().row_map, blockSize_));
1131 template<
class Scalar,
class LO,
class GO,
class Node>
1139 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1148 template<
class Scalar,
class LO,
class GO,
class Node>
1156 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1163 template<
class Scalar,
class LO,
class GO,
class Node>
1167 nonconst_local_inds_host_view_type &
Indices,
1168 nonconst_values_host_view_type &
Values,
1175 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1176 <<
numInds <<
" row entries");
1182 for (LO
i=0;
i<
numInds*blockSize_*blockSize_; ++
i) {
1188 template<
class Scalar,
class LO,
class GO,
class Node>
1196 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1201 return static_cast<LO
> (0);
1204 const LO
LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1221 template<
class Scalar,
class LO,
class GO,
class Node>
1229 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1234 return static_cast<LO
> (0);
1241 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1242 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
1248 if (offsets[
k] !=
STINV) {
1259 template<
class Scalar,
class LO,
class GO,
class Node>
1267 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1272 return static_cast<LO
> (0);
1274 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
1275 auto val_out = getValuesHost(localRowInd);
1276 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
1278 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1279 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1280 size_t pointOffset = 0;
1283 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1284 const size_t blockOffset = offsets[k]*perBlockSize;
1285 if (blockOffset != STINV) {
1286 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
1287 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
1296 template<
class Scalar,
class LO,
class GO,
class Node>
1304 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1309 return static_cast<LO
> (0);
1317 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1318 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1334 template<
class Scalar,
class LO,
class GO,
class Node>
1336 impl_scalar_type_dualview::t_host::const_type
1340 return val_.getHostView(Access::ReadOnly);
1343 template<
class Scalar,
class LO,
class GO,
class Node>
1344 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1345 impl_scalar_type_dualview::t_dev::const_type
1346 BlockCrsMatrix<Scalar, LO, GO, Node>::
1347 getValuesDevice ()
const
1349 return val_.getDeviceView(Access::ReadOnly);
1352 template<
class Scalar,
class LO,
class GO,
class Node>
1353 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1354 impl_scalar_type_dualview::t_host
1358 return val_.getHostView(Access::ReadWrite);
1361 template<
class Scalar,
class LO,
class GO,
class Node>
1363 impl_scalar_type_dualview::t_dev
1367 return val_.getDeviceView(Access::ReadWrite);
1370 template<
class Scalar,
class LO,
class GO,
class Node>
1371 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1372 impl_scalar_type_dualview::t_host::const_type
1376 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1377 auto val = val_.getHostView(Access::ReadOnly);
1382 template<
class Scalar,
class LO,
class GO,
class Node>
1384 impl_scalar_type_dualview::t_dev::const_type
1388 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1389 auto val = val_.getDeviceView(Access::ReadOnly);
1394 template<
class Scalar,
class LO,
class GO,
class Node>
1399 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1400 auto val = val_.getHostView(Access::ReadWrite);
1405 template<
class Scalar,
class LO,
class GO,
class Node>
1410 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1411 auto val = val_.getDeviceView(Access::ReadWrite);
1416 template<
class Scalar,
class LO,
class GO,
class Node>
1422 if (
numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1423 return static_cast<LO
> (0);
1429 template<
class Scalar,
class LO,
class GO,
class Node>
1430 typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
1434 auto numCols = this->graph_.getColMap()->getLocalNumElements();
1435 auto val = val_.getDeviceView(Access::ReadWrite);
1436 const LO
blockSize = this->getBlockSize ();
1437 const auto graph = this->graph_.getLocalGraphDevice ();
1439 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
1446 template<
class Scalar,
class LO,
class GO,
class Node>
1451 const Teuchos::ETransp
mode,
1462 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1463 "transpose and conjugate transpose modes are not yet implemented.");
1466 template<
class Scalar,
class LO,
class GO,
class Node>
1468 BlockCrsMatrix<Scalar, LO, GO, Node>::
1469 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1470 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1476 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1477 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1478 const Scalar zero = STS::zero ();
1479 const Scalar one = STS::one ();
1480 RCP<const import_type>
import = graph_.getImporter ();
1482 RCP<const export_type> theExport = graph_.getExporter ();
1483 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1485 if (alpha == zero) {
1489 else if (beta != one) {
1494 const BMV* X_colMap = NULL;
1495 if (
import.is_null ()) {
1499 catch (std::exception& e) {
1500 TEUCHOS_TEST_FOR_EXCEPTION
1501 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1502 "operator= threw an exception: " << std::endl << e.what ());
1512 if ((*X_colMap_).is_null () ||
1513 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1514 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1515 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1516 static_cast<LO
> (X.getNumVectors ())));
1518 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1522 X_colMap = &(**X_colMap_);
1524 catch (std::exception& e) {
1525 TEUCHOS_TEST_FOR_EXCEPTION
1526 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1527 "operator= threw an exception: " << std::endl << e.what ());
1531 BMV* Y_rowMap = NULL;
1532 if (theExport.is_null ()) {
1535 else if ((*Y_rowMap_).is_null () ||
1536 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1537 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1538 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1539 static_cast<LO
> (X.getNumVectors ())));
1541 Y_rowMap = &(**Y_rowMap_);
1543 catch (std::exception& e) {
1544 TEUCHOS_TEST_FOR_EXCEPTION(
1545 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1546 "operator= threw an exception: " << std::endl << e.what ());
1551 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1553 catch (std::exception& e) {
1554 TEUCHOS_TEST_FOR_EXCEPTION
1555 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1556 "an exception: " << e.what ());
1559 TEUCHOS_TEST_FOR_EXCEPTION
1560 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1561 "an exception not a subclass of std::exception.");
1564 if (! theExport.is_null ()) {
1570 template<
class Scalar,
class LO,
class GO,
class Node>
1572 BlockCrsMatrix<Scalar, LO, GO, Node>::
1573 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1574 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1578 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1580 const impl_scalar_type alpha_impl = alpha;
1581 const auto graph = this->graph_.getLocalGraphDevice ();
1582 const impl_scalar_type beta_impl = beta;
1583 const LO blockSize = this->getBlockSize ();
1585 auto X_mv = X.getMultiVectorView ();
1586 auto Y_mv = Y.getMultiVectorView ();
1590 auto X_lcl = X_mv.getLocalViewDevice (Access::ReadOnly);
1591 auto Y_lcl = Y_mv.getLocalViewDevice (Access::ReadWrite);
1592 auto val = val_.getDeviceView(Access::ReadWrite);
1594 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1598 template<
class Scalar,
class LO,
class GO,
class Node>
1600 BlockCrsMatrix<Scalar, LO, GO, Node>::
1601 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1602 const LO colIndexToFind,
1603 const LO hint)
const
1605 const size_t absStartOffset = ptrHost_[localRowIndex];
1606 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1607 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1609 const LO*
const curInd = indHost_.data () + absStartOffset;
1612 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1619 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1624 const LO maxNumEntriesForLinearSearch = 32;
1625 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1630 const LO* beg = curInd;
1631 const LO* end = curInd + numEntriesInRow;
1632 std::pair<const LO*, const LO*> p =
1633 std::equal_range (beg, end, colIndexToFind);
1634 if (p.first != p.second) {
1636 relOffset =
static_cast<LO
> (p.first - beg);
1640 for (LO k = 0; k < numEntriesInRow; ++k) {
1641 if (colIndexToFind == curInd[k]) {
1651 template<
class Scalar,
class LO,
class GO,
class Node>
1653 BlockCrsMatrix<Scalar, LO, GO, Node>::
1654 offsetPerBlock ()
const
1656 return offsetPerBlock_;
1659 template<
class Scalar,
class LO,
class GO,
class Node>
1661 BlockCrsMatrix<Scalar, LO, GO, Node>::
1662 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1663 const size_t pointOffset)
const
1666 const LO rowStride = blockSize_;
1667 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1670 template<
class Scalar,
class LO,
class GO,
class Node>
1672 BlockCrsMatrix<Scalar, LO, GO, Node>::
1673 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1674 const size_t pointOffset)
const
1677 const LO rowStride = blockSize_;
1678 return little_block_type (val + pointOffset, blockSize_, rowStride);
1681 template<
class Scalar,
class LO,
class GO,
class Node>
1682 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1683 BlockCrsMatrix<Scalar, LO, GO, Node>::
1684 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1685 const size_t pointOffset)
const
1688 const LO rowStride = blockSize_;
1689 const size_t bs2 = blockSize_ * blockSize_;
1690 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1693 template<
class Scalar,
class LO,
class GO,
class Node>
1695 BlockCrsMatrix<Scalar, LO, GO, Node>::
1696 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1698 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1700 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1701 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1702 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1703 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1704 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1705 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1709 return little_block_type ();
1713 template<
class Scalar,
class LO,
class GO,
class Node>
1714 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1715 BlockCrsMatrix<Scalar, LO, GO, Node>::
1716 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1718 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1720 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1721 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1722 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1723 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1724 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1725 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1729 return little_block_host_type ();
1734 template<
class Scalar,
class LO,
class GO,
class Node>
1736 BlockCrsMatrix<Scalar, LO, GO, Node>::
1737 checkSizes (const ::Tpetra::SrcDistObject& source)
1740 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1741 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
> (&source);
1744 std::ostream& err = markLocalErrorAndGetStream ();
1745 err <<
"checkSizes: The source object of the Import or Export "
1746 "must be a BlockCrsMatrix with the same template parameters as the "
1747 "target object." << endl;
1752 if (src->getBlockSize () != this->getBlockSize ()) {
1753 std::ostream& err = markLocalErrorAndGetStream ();
1754 err <<
"checkSizes: The source and target objects of the Import or "
1755 <<
"Export must have the same block sizes. The source's block "
1756 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1757 <<
"size = " << this->getBlockSize () <<
"." << endl;
1759 if (! src->graph_.isFillComplete ()) {
1760 std::ostream& err = markLocalErrorAndGetStream ();
1761 err <<
"checkSizes: The source object of the Import or Export is "
1762 "not fill complete. Both source and target objects must be fill "
1763 "complete." << endl;
1765 if (! this->graph_.isFillComplete ()) {
1766 std::ostream& err = markLocalErrorAndGetStream ();
1767 err <<
"checkSizes: The target object of the Import or Export is "
1768 "not fill complete. Both source and target objects must be fill "
1769 "complete." << endl;
1771 if (src->graph_.getColMap ().is_null ()) {
1772 std::ostream& err = markLocalErrorAndGetStream ();
1773 err <<
"checkSizes: The source object of the Import or Export does "
1774 "not have a column Map. Both source and target objects must have "
1775 "column Maps." << endl;
1777 if (this->graph_.getColMap ().is_null ()) {
1778 std::ostream& err = markLocalErrorAndGetStream ();
1779 err <<
"checkSizes: The target object of the Import or Export does "
1780 "not have a column Map. Both source and target objects must have "
1781 "column Maps." << endl;
1785 return ! (* (this->localError_));
1788 template<
class Scalar,
class LO,
class GO,
class Node>
1792 (const ::Tpetra::SrcDistObject&
source,
1793 const size_t numSameIDs,
1800 using ::Tpetra::Details::Behavior;
1801 using ::Tpetra::Details::dualViewStatusToString;
1802 using ::Tpetra::Details::ProfilingRegion;
1806 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1807 const bool debug = Behavior::debug();
1808 const bool verbose = Behavior::verbose();
1813 std::ostringstream
os;
1814 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1815 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::copyAndPermute : " <<
endl;
1820 if (* (this->localError_)) {
1821 std::ostream&
err = this->markLocalErrorAndGetStream ();
1823 <<
"The target object of the Import or Export is already in an error state."
1832 std::ostringstream
os;
1836 std::cerr <<
os.str ();
1843 std::ostream&
err = this->markLocalErrorAndGetStream ();
1851 std::ostream&
err = this->markLocalErrorAndGetStream ();
1853 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1860 std::ostream&
err = this->markLocalErrorAndGetStream ();
1862 <<
"The source (input) object of the Import or "
1863 "Export is either not a BlockCrsMatrix, or does not have the right "
1864 "template parameters. checkSizes() should have caught this. "
1865 "Please report this bug to the Tpetra developers." <<
endl;
1870#ifdef HAVE_TPETRA_DEBUG
1885#ifdef HAVE_TPETRA_DEBUG
1895 std::ostringstream
os;
1897 <<
"canUseLocalColumnIndices: "
1901 std::cerr <<
os.str ();
1910#ifdef HAVE_TPETRA_DEBUG
1911 if (!
srcRowMap.isNodeLocalElement (localRow)) {
1919 values_host_view_type
vals;
1924 if (numEntries > 0) {
1926 if (
err != numEntries) {
1928 if (!
dstRowMap.isNodeLocalElement (localRow)) {
1929#ifdef HAVE_TPETRA_DEBUG
1939 for (LO
k = 0;
k < numEntries; ++
k) {
1942#ifdef HAVE_TPETRA_DEBUG
1958 values_host_view_type
vals;
1961 if (numEntries > 0) {
1963 if (
err != numEntries) {
1965#ifdef HAVE_TPETRA_DEBUG
1966 for (LO
c = 0;
c < numEntries; ++
c) {
1978 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries ();
1984 values_host_view_type
vals;
1991 }
catch (std::exception&
e) {
1993 std::ostringstream
os;
1994 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1995 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1996 << localRow <<
", src->getLocalRowView() threw an exception: "
1998 std::cerr <<
os.str ();
2003 if (numEntries > 0) {
2004 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
2007 std::ostringstream
os;
2008 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2009 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
2010 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2012 std::cerr <<
os.str ();
2019 for (LO
j = 0;
j < numEntries; ++
j) {
2023#ifdef HAVE_TPETRA_DEBUG
2031 }
catch (std::exception&
e) {
2033 std::ostringstream
os;
2034 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2035 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
2036 << localRow <<
", this->replaceLocalValues() threw an exception: "
2038 std::cerr <<
os.str ();
2042 if (
err != numEntries) {
2045 std::ostringstream
os;
2046 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2047 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" "
2048 "localRow " << localRow <<
", this->replaceLocalValues "
2049 "returned " <<
err <<
" instead of numEntries = "
2050 << numEntries <<
endl;
2051 std::cerr <<
os.str ();
2064 values_host_view_type
vals;
2069 }
catch (std::exception&
e) {
2071 std::ostringstream
os;
2072 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2073 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"permute\" "
2075 <<
", src->getLocalRowView() threw an exception: " <<
e.what ();
2076 std::cerr <<
os.str ();
2081 if (numEntries > 0) {
2082 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
2089 for (LO
j = 0;
j < numEntries; ++
j) {
2093#ifdef HAVE_TPETRA_DEBUG
2099 if (
err != numEntries) {
2108 std::ostream&
err = this->markLocalErrorAndGetStream ();
2109#ifdef HAVE_TPETRA_DEBUG
2110 err <<
"copyAndPermute: The graph structure of the source of the "
2111 "Import or Export must be a subset of the graph structure of the "
2113 err <<
"invalidSrcCopyRows = [";
2117 typename std::set<LO>::const_iterator
itp1 =
it;
2123 err <<
"], invalidDstCopyRows = [";
2127 typename std::set<LO>::const_iterator
itp1 =
it;
2133 err <<
"], invalidDstCopyCols = [";
2137 typename std::set<LO>::const_iterator
itp1 =
it;
2143 err <<
"], invalidDstPermuteCols = [";
2147 typename std::set<LO>::const_iterator
itp1 =
it;
2153 err <<
"], invalidPermuteFromRows = [";
2157 typename std::set<LO>::const_iterator
itp1 =
it;
2165 err <<
"copyAndPermute: The graph structure of the source of the "
2166 "Import or Export must be a subset of the graph structure of the "
2172 std::ostringstream
os;
2173 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2174 const bool lclSuccess = ! (* (this->localError_));
2175 os <<
"*** Proc " <<
myRank <<
": copyAndPermute "
2180 os <<
": error messages: " << this->errorMessages ();
2182 std::cerr <<
os.str ();
2206 template<
class LO,
class GO>
2212 using ::Tpetra::Details::PackTraits;
2222 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
2223 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2224 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2225 return numEntLen + gidsLen + valsLen;
2239 template<
class ST,
class LO,
class GO>
2241 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
2242 const size_t offset,
2243 const size_t numBytes,
2246 using Kokkos::subview;
2247 using ::Tpetra::Details::PackTraits;
2249 if (numBytes == 0) {
2251 return static_cast<size_t> (0);
2255 const size_t theNumBytes = PackTraits<LO>::packValueCount (numEntLO);
2256 TEUCHOS_TEST_FOR_EXCEPTION
2257 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2258 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2260 const char*
const inBuf = imports.data () + offset;
2261 const size_t actualNumBytes = PackTraits<LO>::unpackValue (numEntLO, inBuf);
2262 TEUCHOS_TEST_FOR_EXCEPTION
2263 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2264 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2266 return static_cast<size_t> (numEntLO);
2273 template<
class ST,
class LO,
class GO>
2275 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
2276 const size_t offset,
2277 const size_t numEnt,
2278 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
2279 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
2280 const size_t numBytesPerValue,
2281 const size_t blockSize)
2283 using Kokkos::subview;
2284 using ::Tpetra::Details::PackTraits;
2290 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2293 const LO numEntLO =
static_cast<size_t> (numEnt);
2295 const size_t numEntBeg = offset;
2296 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
2297 const size_t gidsBeg = numEntBeg + numEntLen;
2298 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2299 const size_t valsBeg = gidsBeg + gidsLen;
2300 const size_t valsLen = numScalarEnt * numBytesPerValue;
2302 char*
const numEntOut = exports.data () + numEntBeg;
2303 char*
const gidsOut = exports.data () + gidsBeg;
2304 char*
const valsOut = exports.data () + valsBeg;
2306 size_t numBytesOut = 0;
2308 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
2311 Kokkos::pair<int, size_t> p;
2312 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
2313 errorCode += p.first;
2314 numBytesOut += p.second;
2316 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
2317 errorCode += p.first;
2318 numBytesOut += p.second;
2321 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2322 TEUCHOS_TEST_FOR_EXCEPTION
2323 (numBytesOut != expectedNumBytes, std::logic_error,
2324 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2325 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2327 TEUCHOS_TEST_FOR_EXCEPTION
2328 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2329 "PackTraits::packArray returned a nonzero error code");
2335 template<
class ST,
class LO,
class GO>
2337 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
2338 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
2339 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
2340 const size_t offset,
2341 const size_t numBytes,
2342 const size_t numEnt,
2343 const size_t numBytesPerValue,
2344 const size_t blockSize)
2346 using ::Tpetra::Details::PackTraits;
2348 if (numBytes == 0) {
2352 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2353 TEUCHOS_TEST_FOR_EXCEPTION
2354 (
static_cast<size_t> (imports.extent (0)) <= offset,
2355 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2356 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2357 TEUCHOS_TEST_FOR_EXCEPTION
2358 (
static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2359 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2360 << imports.extent (0) <<
" < offset + numBytes = "
2361 << (offset + numBytes) <<
".");
2366 const size_t numEntBeg = offset;
2367 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
2368 const size_t gidsBeg = numEntBeg + numEntLen;
2369 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2370 const size_t valsBeg = gidsBeg + gidsLen;
2371 const size_t valsLen = numScalarEnt * numBytesPerValue;
2373 const char*
const numEntIn = imports.data () + numEntBeg;
2374 const char*
const gidsIn = imports.data () + gidsBeg;
2375 const char*
const valsIn = imports.data () + valsBeg;
2377 size_t numBytesOut = 0;
2380 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
2381 TEUCHOS_TEST_FOR_EXCEPTION
2382 (
static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2383 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2384 <<
" != actual number of entries " << numEntOut <<
".");
2387 Kokkos::pair<int, size_t> p;
2388 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2389 errorCode += p.first;
2390 numBytesOut += p.second;
2392 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2393 errorCode += p.first;
2394 numBytesOut += p.second;
2397 TEUCHOS_TEST_FOR_EXCEPTION
2398 (numBytesOut != numBytes, std::logic_error,
2399 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2400 <<
" != numBytes = " << numBytes <<
".");
2402 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2403 TEUCHOS_TEST_FOR_EXCEPTION
2404 (numBytesOut != expectedNumBytes, std::logic_error,
2405 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2406 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2408 TEUCHOS_TEST_FOR_EXCEPTION
2409 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2410 "PackTraits::unpackArray returned a nonzero error code");
2416 template<
class Scalar,
class LO,
class GO,
class Node>
2420 (const ::Tpetra::SrcDistObject&
source,
2425 Kokkos::DualView<
size_t*,
2429 using ::Tpetra::Details::Behavior;
2430 using ::Tpetra::Details::dualViewStatusToString;
2431 using ::Tpetra::Details::ProfilingRegion;
2432 using ::Tpetra::Details::PackTraits;
2434 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space
host_exec;
2438 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2440 const bool debug = Behavior::debug();
2441 const bool verbose = Behavior::verbose();
2446 std::ostringstream
os;
2447 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2448 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2453 if (* (this->localError_)) {
2454 std::ostream&
err = this->markLocalErrorAndGetStream ();
2456 <<
"The target object of the Import or Export is already in an error state."
2465 std::ostringstream
os;
2467 <<
prefix <<
" " << dualViewStatusToString (
exportLIDs,
"exportLIDs") << std::endl
2468 <<
prefix <<
" " << dualViewStatusToString (exports,
"exports") << std::endl
2470 std::cerr <<
os.str ();
2477 std::ostream&
err = this->markLocalErrorAndGetStream ();
2479 <<
"exportLIDs.extent(0) = " <<
exportLIDs.extent (0)
2481 <<
"." << std::endl;
2485 std::ostream&
err = this->markLocalErrorAndGetStream ();
2486 err <<
prefix <<
"exportLIDs be sync'd to host." << std::endl;
2492 std::ostream&
err = this->markLocalErrorAndGetStream ();
2494 <<
"The source (input) object of the Import or "
2495 "Export is either not a BlockCrsMatrix, or does not have the right "
2496 "template parameters. checkSizes() should have caught this. "
2497 "Please report this bug to the Tpetra developers." << std::endl;
2513 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2517 auto val_host = val_.getHostView(Access::ReadOnly);
2519 PackTraits<impl_scalar_type>
2536 Kokkos::parallel_reduce
2538 [=](
const size_t &
i,
typename reducer_type::value_type &update) {
2558 std::ostringstream
os;
2560 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2562 std::cerr <<
os.str ();
2568 if(exports.extent(0) != totalNumBytes)
2570 const std::string
oldLabel = exports.d_view.label ();
2572 exports = Kokkos::DualView<packet_type*, buffer_device_type>(
newLabel, totalNumBytes);
2574 if (totalNumEntries > 0) {
2579 Kokkos::parallel_scan
2581 [=](
const size_t &
i,
size_t &update,
const bool &
final) {
2582 if (
final)
offset(
i) = update;
2587 std::ostream&
err = this->markLocalErrorAndGetStream ();
2589 <<
"At end of method, the final offset (in bytes) "
2591 << totalNumBytes <<
". "
2592 <<
"Please report this bug to the Tpetra developers." << std::endl;
2606 exports.modify_host();
2608 typedef Kokkos::TeamPolicy<host_exec>
policy_type;
2611 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2612 Kokkos::parallel_for
2614 [=](
const typename policy_type::member_type &
member) {
2615 const size_t i =
member.league_rank();
2616 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2621 values_host_view_type
vals;
2639 (exports.view_host(),
2642 Kokkos::View<const GO*, host_exec>(
gblColInds.data(), maxRowLength),
2651 std::ostringstream
os;
2653 <<
"numBytes computed from packRowForBlockCrs is different from "
2654 <<
"precomputed offset values, LID = " <<
i << std::endl;
2655 std::cerr <<
os.str ();
2663 std::ostringstream
os;
2664 const bool lclSuccess = ! (* (this->localError_));
2668 std::cerr <<
os.str ();
2672 template<
class Scalar,
class LO,
class GO,
class Node>
2680 Kokkos::DualView<
size_t*,
2685 using ::Tpetra::Details::Behavior;
2686 using ::Tpetra::Details::dualViewStatusToString;
2687 using ::Tpetra::Details::ProfilingRegion;
2688 using ::Tpetra::Details::PackTraits;
2691 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2693 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2694 const bool verbose = Behavior::verbose ();
2699 std::ostringstream
os;
2700 auto map = this->graph_.getRowMap ();
2701 auto comm =
map.is_null () ? Teuchos::null :
map->getComm ();
2702 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2703 os <<
"Proc " <<
myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2707 std::cerr <<
os.str ();
2712 if (* (this->localError_)) {
2713 std::ostream&
err = this->markLocalErrorAndGetStream ();
2714 std::ostringstream
os;
2715 os <<
prefix <<
"{Im/Ex}port target is already in error." <<
endl;
2717 std::cerr <<
os.str ();
2727 std::ostream&
err = this->markLocalErrorAndGetStream ();
2728 std::ostringstream
os;
2733 std::cerr <<
os.str ();
2742 std::ostream&
err = this->markLocalErrorAndGetStream ();
2743 std::ostringstream
os;
2745 <<
"Invalid CombineMode value " <<
combineMode <<
". Valid "
2746 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2749 std::cerr <<
os.str ();
2755 if (this->graph_.getColMap ().is_null ()) {
2756 std::ostream&
err = this->markLocalErrorAndGetStream ();
2757 std::ostringstream
os;
2758 os <<
prefix <<
"Target matrix's column Map is null." <<
endl;
2760 std::cerr <<
os.str ();
2772 const size_t blockSize = this->getBlockSize ();
2782 auto val_host = val_.getHostView(Access::ReadOnly);
2784 PackTraits<impl_scalar_type>::packValueCount
2787 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries ();
2791 std::ostringstream
os;
2799 std::cerr <<
os.str ();
2804 std::ostringstream
os;
2805 os <<
prefix <<
"Nothing to unpack. Done!" << std::endl;
2806 std::cerr <<
os.str ();
2813 if (imports.need_sync_host ()) {
2814 imports.sync_host ();
2826 std::ostream&
err = this->markLocalErrorAndGetStream ();
2827 std::ostringstream
os;
2828 os <<
prefix <<
"All input DualViews must be sync'd to host by now. "
2829 <<
"imports_nc.need_sync_host()="
2830 << (imports.need_sync_host () ?
"true" :
"false")
2831 <<
", numPacketsPerLID.need_sync_host()="
2833 <<
", importLIDs.need_sync_host()="
2834 << (
importLIDs.need_sync_host () ?
"true" :
"false")
2837 std::cerr <<
os.str ();
2853 Kokkos::parallel_scan
2854 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets",
policy,
2855 [=] (
const size_t &
i,
size_t &update,
const bool &
final) {
2856 if (
final)
offset(
i) = update;
2867 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2871 using policy_type = Kokkos::TeamPolicy<host_exec>;
2879 using pair_type = Kokkos::pair<size_t, size_t>;
2880 Kokkos::parallel_for
2881 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack",
policy,
2882 [=] (
const typename policy_type::member_type&
member) {
2883 const size_t i =
member.league_rank();
2884 Kokkos::View<GO*, host_scratch_space>
gblColInds
2886 Kokkos::View<LO*, host_scratch_space>
lclColInds
2888 Kokkos::View<impl_scalar_type*, host_scratch_space>
vals
2903 std::ostringstream
os;
2905 <<
"At i = " <<
i <<
", numEnt = " <<
numEnt
2908 std::cerr <<
os.str ();
2927 imports.view_host(),
2931 catch (std::exception&
e) {
2934 std::ostringstream
os;
2935 os <<
prefix <<
"At i = " <<
i <<
", unpackRowForBlockCrs threw: "
2936 <<
e.what () <<
endl;
2937 std::cerr <<
os.str ();
2945 std::ostringstream
os;
2947 <<
"At i = " <<
i <<
", numBytes = " <<
numBytes
2950 std::cerr <<
os.str();
2958 if (
lidsOut(
k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2960 std::ostringstream
os;
2962 <<
"At i = " <<
i <<
", GID " <<
gidsOut(
k)
2963 <<
" is not owned by the calling process."
2965 std::cerr <<
os.str();
2973 const LO*
const lidsRaw =
const_cast<const LO*
> (
lidsOut.data ());
2987 std::ostringstream
os;
2989 <<
" != numCombd = " <<
numCombd <<
"."
2991 std::cerr <<
os.str ();
2999 std::ostream&
err = this->markLocalErrorAndGetStream ();
3002 err <<
" Please run again with the environment variable setting "
3003 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
3009 std::ostringstream
os;
3010 const bool lclSuccess = ! (* (this->localError_));
3014 std::cerr <<
os.str ();
3018 template<
class Scalar,
class LO,
class GO,
class Node>
3022 using Teuchos::TypeNameTraits;
3023 std::ostringstream
os;
3024 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3025 <<
"Template parameters: { "
3032 <<
", Global dimensions: ["
3033 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3034 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3035 <<
", Block size: " << getBlockSize ()
3041 template<
class Scalar,
class LO,
class GO,
class Node>
3045 const Teuchos::EVerbosityLevel
verbLevel)
const
3047 using Teuchos::ArrayRCP;
3048 using Teuchos::CommRequest;
3049 using Teuchos::FancyOStream;
3050 using Teuchos::getFancyOStream;
3051 using Teuchos::ireceive;
3052 using Teuchos::isend;
3053 using Teuchos::outArg;
3054 using Teuchos::TypeNameTraits;
3055 using Teuchos::VERB_DEFAULT;
3056 using Teuchos::VERB_NONE;
3057 using Teuchos::VERB_LOW;
3058 using Teuchos::VERB_MEDIUM;
3060 using Teuchos::VERB_EXTREME;
3062 using Teuchos::wait;
3066 const Teuchos::EVerbosityLevel
vl =
3076 out <<
"\"Tpetra::BlockCrsMatrix\":" <<
endl;
3079 out <<
"Template parameters:" <<
endl;
3088 <<
"Global dimensions: ["
3089 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3090 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" <<
endl;
3097 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3098 const int myRank = comm.getRank ();
3102 getRowMap()->describe (
out,
vl);
3104 if (! getColMap ().
is_null ()) {
3105 if (getColMap() == getRowMap()) {
3107 out <<
"Column Map: same as row Map" <<
endl;
3114 getColMap ()->describe (
out,
vl);
3117 if (! getDomainMap ().
is_null ()) {
3118 if (getDomainMap () == getRowMap ()) {
3120 out <<
"Domain Map: same as row Map" <<
endl;
3123 else if (getDomainMap () == getColMap ()) {
3125 out <<
"Domain Map: same as column Map" <<
endl;
3132 getDomainMap ()->describe (
out,
vl);
3135 if (! getRangeMap ().
is_null ()) {
3136 if (getRangeMap () == getDomainMap ()) {
3138 out <<
"Range Map: same as domain Map" <<
endl;
3141 else if (getRangeMap () == getRowMap ()) {
3143 out <<
"Range Map: same as row Map" <<
endl;
3150 getRangeMap ()->describe (
out,
vl);
3156 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3157 const int myRank = comm.getRank ();
3158 const int numProcs = comm.getSize ();
3165 Teuchos::OSTab
tab2 (
os);
3176 values_host_view_type
vals;
3277 template<
class Scalar,
class LO,
class GO,
class Node>
3278 Teuchos::RCP<const Teuchos::Comm<int> >
3282 return graph_.getComm();
3286 template<
class Scalar,
class LO,
class GO,
class Node>
3291 return graph_.getGlobalNumCols();
3295 template<
class Scalar,
class LO,
class GO,
class Node>
3300 return graph_.getLocalNumCols();
3303 template<
class Scalar,
class LO,
class GO,
class Node>
3308 return graph_.getIndexBase();
3311 template<
class Scalar,
class LO,
class GO,
class Node>
3316 return graph_.getGlobalNumEntries();
3319 template<
class Scalar,
class LO,
class GO,
class Node>
3324 return graph_.getLocalNumEntries();
3327 template<
class Scalar,
class LO,
class GO,
class Node>
3332 return graph_.getNumEntriesInGlobalRow(
globalRow);
3336 template<
class Scalar,
class LO,
class GO,
class Node>
3341 return graph_.getGlobalMaxNumRowEntries();
3344 template<
class Scalar,
class LO,
class GO,
class Node>
3349 return graph_.hasColMap();
3353 template<
class Scalar,
class LO,
class GO,
class Node>
3358 return graph_.isLocallyIndexed();
3361 template<
class Scalar,
class LO,
class GO,
class Node>
3366 return graph_.isGloballyIndexed();
3369 template<
class Scalar,
class LO,
class GO,
class Node>
3374 return graph_.isFillComplete ();
3377 template<
class Scalar,
class LO,
class GO,
class Node>
3385 template<
class Scalar,
class LO,
class GO,
class Node>
3389 nonconst_global_inds_host_view_type &,
3390 nonconst_values_host_view_type &,
3394 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3395 "This class doesn't support global matrix indexing.");
3399 template<
class Scalar,
class LO,
class GO,
class Node>
3403 global_inds_host_view_type &,
3404 values_host_view_type &)
const
3407 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3408 "This class doesn't support global matrix indexing.");
3412 template<
class Scalar,
class LO,
class GO,
class Node>
3416 local_inds_host_view_type &
colInds,
3417 values_host_view_type &
vals)
const
3419 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
3420 colInds = local_inds_host_view_type();
3421 vals = values_host_view_type();
3432 template<
class Scalar,
class LO,
class GO,
class Node>
3436 local_inds_host_view_type &
colInds,
3437 nonconst_values_host_view_type &
vals)
const
3439 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
3440 colInds = nonconst_local_inds_host_view_type();
3441 vals = nonconst_values_host_view_type();
3453 template<
class Scalar,
class LO,
class GO,
class Node>
3474 LO
bs = getBlockSize();
3475 for(
size_t r=0;
r<getLocalNumRows();
r++)
3479 for(
int b=0;
b<
bs;
b++)
3490 template<
class Scalar,
class LO,
class GO,
class Node>
3493 leftScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3496 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3497 "not implemented.");
3501 template<
class Scalar,
class LO,
class GO,
class Node>
3504 rightScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3507 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3508 "not implemented.");
3512 template<
class Scalar,
class LO,
class GO,
class Node>
3513 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3520 template<
class Scalar,
class LO,
class GO,
class Node>
3521 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3526 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3527 "not implemented.");
3537#define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3538 template class BlockCrsMatrix< S, LO, GO, NODE >;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
LO local_ordinal_type
The type of local indices.
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
size_t getLocalNumRows() const override
get the local number of block rows
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_matrix_device_type getLocalMatrixDevice() const
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
std::string description() const override
One-line description of this object.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Kokkos::View< const impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
char packet_type
Implementation detail; tells.
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
global_size_t getGlobalNumRows() const override
get the global number of block rows
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
MultiVector for multiple degrees of freedom per mesh point.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Struct that holds views of the contents of a CrsMatrix.
One or more distributed dense vectors.
int local_ordinal_type
Default value of Scalar template parameter.
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.