40#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
89namespace PackCrsMatrixImpl {
97template<
class OutputOffsetsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
103#ifdef HAVE_TPETRA_DEBUG
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
163 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
165 if (
numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream
os;
168 <<
" != counts.extent(0) = " << counts_.extent (0)
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream
os;
176 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
185 output_offset_type& update,
186 const bool final)
const
189 if (
curInd <
static_cast<local_row_index_type
> (0)) {
197 if (
curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
202 outputOffsets_(
curInd) = update;
205 if (
curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
207 if (
static_cast<size_t> (
lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type
> (
lclRow) <
static_cast<local_row_index_type
> (0)) {
216 const count_type count =
217 static_cast<count_type
> (rowOffsets_(
lclRow+1) - rowOffsets_(
lclRow));
223 const count_type
numBytes = (count == 0) ?
224 static_cast<count_type
> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
242 typedef typename device_type::execution_space execution_space;
243 auto error_h = Kokkos::create_mirror_view (error_);
245 Kokkos::deep_copy (execution_space(),
error_h, error_);
252 typename InputOffsetsViewType::const_type rowOffsets_;
253 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
254 typename InputLocalRowPidsViewType::const_type lclRowPids_;
255 count_type sizeOfLclCount_;
256 count_type sizeOfGblColInd_;
257 count_type sizeOfPid_;
258 count_type sizeOfValue_;
259 Kokkos::View<int, device_type> error_;
276typename CountsViewType::non_const_value_type
282 const typename CountsViewType::non_const_value_type
sizeOfLclCount,
284 const typename CountsViewType::non_const_value_type
sizeOfPid,
285 const typename CountsViewType::non_const_value_type
sizeOfValue)
289 typename InputLocalRowIndicesViewType::const_type,
290 typename InputLocalRowPidsViewType::const_type>
functor_type;
291 typedef typename CountsViewType::non_const_value_type count_type;
292 typedef typename OutputOffsetsViewType::size_type size_type;
293 typedef typename OutputOffsetsViewType::execution_space execution_space;
294 typedef typename functor_type::local_row_index_type LO;
295 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
296 const char prefix[] =
"computeNumPacketsAndOffsets: ";
298 count_type count = 0;
306 (
rowOffsets.extent (0) <=
static_cast<size_type
> (1),
307 std::invalid_argument,
prefix <<
"There is at least one row to pack, "
308 "but the matrix has no rows. lclRowInds.extent(0) = " <<
313 static_cast<size_type
> (
numRowsToPack + 1), std::invalid_argument,
314 prefix <<
"Output dimension does not match number of rows to pack. "
316 <<
" != lclRowInds.extent(0) + 1 = "
331 (
errCode != 0, std::runtime_error,
prefix <<
"parallel_scan error code "
341 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
347 <<
total <<
"." << std::endl;
351 os <<
"outputOffsets: [";
358 os <<
"]" << std::endl;
366 os <<
"]" << std::endl;
380 using Tpetra::Details::getEntryOnHost;
381 return static_cast<count_type
> (getEntryOnHost (
outputOffsets,
401template<
class ST,
class ColumnMap,
class BufferDeviceType>
403Kokkos::pair<int, size_t>
405 const Kokkos::View<char*, BufferDeviceType>& exports,
411 const size_t num_bytes_per_value,
412 const bool pack_pids)
414 using Kokkos::subview;
415 using LO =
typename ColumnMap::local_ordinal_type;
416 using GO =
typename ColumnMap::global_ordinal_type;
434 static_cast<size_t> (0);
466 error_code +=
p.first;
470 if (error_code != 0) {
482template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
483struct PackCrsMatrixFunctor {
486 typedef typename local_matrix_device_type::value_type ST;
489 typedef typename local_matrix_device_type::device_type DT;
491 typedef Kokkos::View<const size_t*, BufferDeviceType>
492 num_packets_per_lid_view_type;
493 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
494 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
498 typedef typename num_packets_per_lid_view_type::non_const_value_type
500 typedef typename offsets_view_type::non_const_value_type
502 typedef Kokkos::pair<int, LO> value_type;
504 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
505 "local_map_type::local_ordinal_type and "
506 "local_matrix_device_type::ordinal_type must be the same.");
508 local_matrix_device_type local_matrix;
509 local_map_type local_col_map;
510 exports_view_type exports;
511 num_packets_per_lid_view_type num_packets_per_lid;
512 export_lids_view_type export_lids;
513 source_pids_view_type source_pids;
514 offsets_view_type offsets;
515 size_t num_bytes_per_value;
539 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
542 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
548 using ::Tpetra::Details::OrdinalTraits;
552 KOKKOS_INLINE_FUNCTION
void
553 join (value_type& dst,
const value_type& src)
const
557 if (src.first != 0 && dst.first == 0) {
562 KOKKOS_INLINE_FUNCTION
563 void operator() (
const LO i, value_type& dst)
const
565 const size_t offset = offsets[i];
566 const LO export_lid = export_lids[i];
567 const size_t buf_size = exports.size();
568 const size_t num_bytes = num_packets_per_lid(i);
569 const size_t num_ent =
570 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
571 - local_matrix.graph.row_map[export_lid]);
581 if (export_lid >= local_matrix.numRows ()) {
582 if (dst.first != 0) {
583 dst = Kokkos::make_pair (1, i);
587 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
588 if (dst.first != 0) {
589 dst = Kokkos::make_pair (2, i);
599 const auto row_beg = local_matrix.graph.row_map[export_lid];
600 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
601 auto vals_in = subview (local_matrix.values,
602 Kokkos::make_pair (row_beg, row_end));
603 auto lids_in = subview (local_matrix.graph.entries,
604 Kokkos::make_pair (row_beg, row_end));
605 typedef local_map_type LMT;
606 typedef BufferDeviceType BDT;
607 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
608 source_pids, vals_in, offset,
609 num_ent, num_bytes_per_value,
611 int error_code_this_row = p.first;
612 size_t num_bytes_packed_this_row = p.second;
613 if (error_code_this_row != 0) {
614 if (dst.first != 0) {
615 dst = Kokkos::make_pair (error_code_this_row, i);
618 else if (num_bytes_packed_this_row != num_bytes) {
619 if (dst.first != 0) {
620 dst = Kokkos::make_pair (3, i);
633template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
637 const Kokkos::View<char*, BufferDeviceType>& exports,
641 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
642 const size_t num_bytes_per_value,
643 const bool pack_pids)
646 using DT =
typename LocalMatrix::device_type;
647 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
648 const char prefix[] =
"Tpetra::Details::do_pack: ";
650 if (export_lids.extent (0) != 0) {
652 (
static_cast<size_t> (offsets.extent (0)) !=
653 static_cast<size_t> (export_lids.extent (0) + 1),
654 std::invalid_argument,
prefix <<
"offsets.extent(0) = "
655 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
656 << export_lids.extent (0) <<
") + 1.");
658 (export_lids.extent (0) != num_packets_per_lid.extent (0),
659 std::invalid_argument,
prefix <<
"export_lids.extent(0) = " <<
660 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
661 << num_packets_per_lid.extent (0) <<
".");
666 (pack_pids && exports.extent (0) != 0 &&
667 source_pids.extent (0) == 0, std::invalid_argument,
prefix <<
668 "pack_pids is true, and exports.extent(0) = " <<
669 exports.extent (0) <<
" != 0, meaning that we need to pack at "
670 "least one matrix entry, but source_pids.extent(0) = 0.");
676 num_packets_per_lid, export_lids,
677 source_pids, offsets, num_bytes_per_value,
680 typename pack_functor_type::value_type
result;
681 range_type
range (0, num_packets_per_lid.extent (0));
688 (
true, std::runtime_error,
prefix <<
"PackCrsMatrixFunctor "
689 "reported error code " <<
result.first <<
" for the first "
690 "bad row " <<
result.second <<
".");
723template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
726 Kokkos::DualView<char*, BufferDeviceType>& exports,
727 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
728 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
729 const Kokkos::View<const int*, typename NT::device_type>&
export_pids,
731 const bool pack_pids)
734 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
739 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
740 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
741 constexpr bool debug =
false;
743 auto local_matrix =
sourceMatrix.getLocalMatrixDevice ();
744 auto local_col_map =
sourceMatrix.getColMap ()->getLocalMap ();
752 static_cast<size_t> (export_lids.extent (0));
755 static_cast<size_t> (num_packets_per_lid.extent (0)),
756 std::invalid_argument,
prefix <<
"num_export_lids.extent(0) = "
758 << num_packets_per_lid.extent (0) <<
".");
761 (num_packets_per_lid.data () ==
NULL, std::invalid_argument,
763 "num_packets_per_lid.data() = "
764 << num_packets_per_lid.data () <<
" == NULL.");
771 size_t num_bytes_per_value = 0;
787 if (local_matrix.values.extent(0) > 0) {
788 const ST&
val = local_matrix.values(0);
791 using Teuchos::reduceAll;
795 Teuchos::outArg (num_bytes_per_value));
799 exports = exports_view_type (
"exports", 0);
809 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
810 local_matrix.graph.row_map, export_lids,
816 if (count >
static_cast<size_t> (exports.extent (0))) {
817 exports = exports_view_type (
"exports", count);
819 std::ostringstream
os;
820 os <<
"*** exports resized to " << count << std::endl;
821 std::cerr <<
os.str ();
825 std::ostringstream
os;
826 os <<
"*** count: " << count <<
", exports.extent(0): "
827 << exports.extent (0) << std::endl;
828 std::cerr <<
os.str ();
835 (pack_pids && exports.extent (0) != 0 &&
837 "pack_pids is true, and exports.extent(0) = " <<
838 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
839 "one matrix entry, but export_pids.extent(0) = 0.");
841 typedef typename std::decay<
decltype (local_matrix)>::type
842 local_matrix_device_type;
843 typedef typename std::decay<
decltype (local_col_map)>::type
846 exports.modify_device ();
849 (local_matrix, local_col_map,
exports_d, num_packets_per_lid,
850 export_lids,
export_pids, offsets, num_bytes_per_value,
857template<
typename ST,
typename LO,
typename GO,
typename NT>
860 Teuchos::Array<char>& exports,
862 const Teuchos::ArrayView<const LO>&
exportLIDs,
868 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
870 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
880 "num_packets_per_lid");
894 Kokkos::DualView<char*, buffer_device_type>
exports_dv;
895 constexpr bool pack_pids =
false;
896 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
913 if (
static_cast<size_t> (exports.size ()) !=
914 static_cast<size_t> (
exports_dv.extent (0))) {
917 Kokkos::View<char*, host_dev_type>
exports_h (exports.getRawPtr (),
923template<
typename ST,
typename LO,
typename GO,
typename NT>
936 Kokkos::View<int*, device_type>
exportPIDs_d (
"exportPIDs", 0);
937 constexpr bool pack_pids =
false;
950 "Tpetra::Details::packCrsMatrixNew",
953 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
958template<
typename ST,
typename LO,
typename GO,
typename NT>
963 const Teuchos::ArrayView<const LO>&
exportLIDs,
964 const Teuchos::ArrayView<const int>&
sourcePIDs,
969 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
970 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace>
host_dev_type;
972 typename local_matrix_device_type::device_type
outputDevice;
973 typedef typename NT::execution_space execution_space;
977 std::unique_ptr<std::string>
prefix;
979 const int myRank = [&] () {
981 if (
map.get () ==
nullptr) {
984 auto comm =
map->getComm ();
985 if (comm.get () ==
nullptr) {
988 return comm->getRank ();
990 std::ostringstream
os;
991 os <<
"Proc " <<
myRank <<
": packCrsMatrixWithOwningPIDs: ";
992 prefix = std::unique_ptr<std::string> (
new std::string (
os.str ()));
994 std::ostringstream
os2;
996 std::cerr <<
os2.str ();
1007 "num_packets_per_lid");
1023 constexpr bool pack_pids =
true;
1025 PackCrsMatrixImpl::packCrsMatrix
1029 catch (std::exception&
e) {
1031 std::ostringstream
os;
1032 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1033 <<
e.what () << std::endl;
1034 std::cerr <<
os.str ();
1040 std::ostringstream
os;
1041 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1042 "not a subclass of std::exception" << std::endl;
1043 std::cerr <<
os.str ();
1057 catch (std::exception&
e) {
1059 std::ostringstream
os;
1060 os << *
prefix <<
"Kokkos::deep_copy threw: " <<
e.what () << std::endl;
1061 std::cerr <<
os.str ();
1067 std::ostringstream
os;
1068 os << *
prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1069 "of std::exception" << std::endl;
1070 std::cerr <<
os.str ();
1077 std::ostringstream
os;
1078 os << *
prefix <<
"done" << std::endl;
1079 std::cerr <<
os.str ();
1086#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1088 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1089 Teuchos::Array<char>&, \
1090 const Teuchos::ArrayView<size_t>&, \
1091 const Teuchos::ArrayView<const LO>&, \
1094 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1100 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1101 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102 const Teuchos::ArrayView<size_t>&, \
1103 const Teuchos::ArrayView<const LO>&, \
1104 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Struct that holds views of the contents of a CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.