Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44// mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45// incorrect (as() is not a device function) and usually irrelevant
46// (it would only matter if LocalOrdinal were bigger than size_t on a
47// particular platform, which is unlikely).
48
49// KDD Aug 2020: In the permute/pack/unpack functors,
50// the Enabled template parameter is specialized in
51// downstream packages like Stokhos using SFINAE to provide partial
52// specializations based on the scalar type of the SrcView and DstView
53// template parameters. See #7898.
54// Do not remove it before checking with Stokhos and other specializing users.
55
56#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
57#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
58
59#include "Kokkos_Core.hpp"
60#include "Kokkos_ArithTraits.hpp"
61#include <sstream>
62#include <stdexcept>
63
64namespace Tpetra {
65namespace KokkosRefactor {
66namespace Details {
67
73namespace Impl {
74
81template<class IntegerType,
82 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
84 static KOKKOS_INLINE_FUNCTION bool
85 test (const IntegerType x,
87};
88
89// Partial specialization for the case where IntegerType IS signed.
90template<class IntegerType>
92 static KOKKOS_INLINE_FUNCTION bool
93 test (const IntegerType x,
95 {
97 }
98};
99
100// Partial specialization for the case where IntegerType is NOT signed.
101template<class IntegerType>
102struct OutOfBounds<IntegerType, false> {
103 static KOKKOS_INLINE_FUNCTION bool
104 test (const IntegerType x,
106 {
107 return x >= exclusiveUpperBound;
108 }
109};
110
113template<class IntegerType>
114KOKKOS_INLINE_FUNCTION bool
119
120} // namespace Impl
121
122 // Functors for implementing packAndPrepare and unpackAndCombine
123 // through parallel_for
124
125 template <typename DstView, typename SrcView, typename IdxView,
126 typename Enabled = void>
127 struct PackArraySingleColumn {
128 typedef typename DstView::execution_space execution_space;
129 typedef typename execution_space::size_type size_type;
130
131 DstView dst;
132 SrcView src;
133 IdxView idx;
134 size_t col;
135
136 PackArraySingleColumn (const DstView& dst_,
137 const SrcView& src_,
138 const IdxView& idx_,
139 const size_t col_) :
140 dst(dst_), src(src_), idx(idx_), col(col_) {}
141
143 operator() (const size_type k) const {
144 dst(k) = src(idx(k), col);
145 }
146
147 static void
148 pack (const DstView& dst,
149 const SrcView& src,
150 const IdxView& idx,
151 const size_t col)
152 {
153 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
154 Kokkos::parallel_for
155 ("Tpetra::MultiVector pack one col",
156 range_type (0, idx.size ()),
157 PackArraySingleColumn (dst, src, idx, col));
158 }
159 };
160
161 template <typename DstView,
162 typename SrcView,
163 typename IdxView,
164 typename SizeType = typename DstView::execution_space::size_type,
165 typename Enabled = void>
166 class PackArraySingleColumnWithBoundsCheck {
167 private:
168 static_assert (Kokkos::is_view<DstView>::value,
169 "DstView must be a Kokkos::View.");
170 static_assert (Kokkos::is_view<SrcView>::value,
171 "SrcView must be a Kokkos::View.");
172 static_assert (Kokkos::is_view<IdxView>::value,
173 "IdxView must be a Kokkos::View.");
174 static_assert (static_cast<int> (DstView::rank) == 1,
175 "DstView must be a rank-1 Kokkos::View.");
176 static_assert (static_cast<int> (SrcView::rank) == 2,
177 "SrcView must be a rank-2 Kokkos::View.");
178 static_assert (static_cast<int> (IdxView::rank) == 1,
179 "IdxView must be a rank-1 Kokkos::View.");
180 static_assert (std::is_integral<SizeType>::value,
181 "SizeType must be a built-in integer type.");
182 public:
183 typedef SizeType size_type;
184 using value_type = size_t;
185
186 private:
187 DstView dst;
188 SrcView src;
189 IdxView idx;
190 size_type col;
191
192 public:
193 PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
194 const SrcView& src_,
195 const IdxView& idx_,
196 const size_type col_) :
197 dst (dst_), src (src_), idx (idx_), col (col_) {}
198
199 KOKKOS_INLINE_FUNCTION void
200 operator() (const size_type k, value_type& lclErrCount) const {
201 using index_type = typename IdxView::non_const_value_type;
202
203 const index_type lclRow = idx(k);
204 if (lclRow < static_cast<index_type> (0) ||
205 lclRow >= static_cast<index_type> (src.extent (0))) {
206 ++lclErrCount;
207 }
208 else {
209 dst(k) = src(lclRow, col);
210 }
211 }
212
213 KOKKOS_INLINE_FUNCTION
214 void init (value_type& initialErrorCount) const {
215 initialErrorCount = 0;
216 }
217
218 KOKKOS_INLINE_FUNCTION void
219 join (value_type& dstErrorCount,
220 const value_type& srcErrorCount) const
221 {
222 dstErrorCount += srcErrorCount;
223 }
224
225 static void
226 pack (const DstView& dst,
227 const SrcView& src,
228 const IdxView& idx,
229 const size_type col)
230 {
231 typedef typename DstView::execution_space execution_space;
232 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
233 typedef typename IdxView::non_const_value_type index_type;
234
235 size_t errorCount = 0;
236 Kokkos::parallel_reduce
237 ("Tpetra::MultiVector pack one col debug only",
238 range_type (0, idx.size ()),
239 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
240 errorCount);
241
242 if (errorCount != 0) {
243 // Go back and find the out-of-bounds entries in the index
244 // array. Performance doesn't matter since we are already in
245 // an error state, so we can do this sequentially, on host.
246 auto idx_h = Kokkos::create_mirror_view (idx);
247
248 // DEEP_COPY REVIEW - NOT TESTED
249 Kokkos::deep_copy (idx_h, idx);
250
251 std::vector<index_type> badIndices;
252 const size_type numInds = idx_h.extent (0);
253 for (size_type k = 0; k < numInds; ++k) {
254 if (idx_h(k) < static_cast<index_type> (0) ||
255 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
256 badIndices.push_back (idx_h(k));
257 }
258 }
259
260 TEUCHOS_TEST_FOR_EXCEPTION
261 (errorCount != badIndices.size (), std::logic_error,
262 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
263 << " != badIndices.size() = " << badIndices.size () << ". This sho"
264 "uld never happen. Please report this to the Tpetra developers.");
265
266 std::ostringstream os;
267 os << "MultiVector single-column pack kernel had "
268 << badIndices.size () << " out-of bounds index/ices. "
269 "Here they are: [";
270 for (size_t k = 0; k < badIndices.size (); ++k) {
271 os << badIndices[k];
272 if (k + 1 < badIndices.size ()) {
273 os << ", ";
274 }
275 }
276 os << "].";
277 throw std::runtime_error (os.str ());
278 }
279 }
280 };
281
282
283 template <typename DstView, typename SrcView, typename IdxView>
284 void
285 pack_array_single_column (const DstView& dst,
286 const SrcView& src,
287 const IdxView& idx,
288 const size_t col,
289 const bool debug = true)
290 {
291 static_assert (Kokkos::is_view<DstView>::value,
292 "DstView must be a Kokkos::View.");
293 static_assert (Kokkos::is_view<SrcView>::value,
294 "SrcView must be a Kokkos::View.");
295 static_assert (Kokkos::is_view<IdxView>::value,
296 "IdxView must be a Kokkos::View.");
297 static_assert (static_cast<int> (DstView::rank) == 1,
298 "DstView must be a rank-1 Kokkos::View.");
299 static_assert (static_cast<int> (SrcView::rank) == 2,
300 "SrcView must be a rank-2 Kokkos::View.");
301 static_assert (static_cast<int> (IdxView::rank) == 1,
302 "IdxView must be a rank-1 Kokkos::View.");
303
304 if (debug) {
305 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
306 impl_type::pack (dst, src, idx, col);
307 }
308 else {
309 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
310 impl_type::pack (dst, src, idx, col);
311 }
312 }
313
314 template <typename DstView, typename SrcView, typename IdxView,
315 typename Enabled = void>
316 struct PackArrayMultiColumn {
317 typedef typename DstView::execution_space execution_space;
318 typedef typename execution_space::size_type size_type;
319
320 DstView dst;
321 SrcView src;
322 IdxView idx;
323 size_t numCols;
324
325 PackArrayMultiColumn (const DstView& dst_,
326 const SrcView& src_,
327 const IdxView& idx_,
328 const size_t numCols_) :
329 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
330
331 KOKKOS_INLINE_FUNCTION void
332 operator() (const size_type k) const {
333 const typename IdxView::value_type localRow = idx(k);
334 const size_t offset = k*numCols;
335 for (size_t j = 0; j < numCols; ++j) {
336 dst(offset + j) = src(localRow, j);
337 }
338 }
339
340 static void pack(const DstView& dst,
341 const SrcView& src,
342 const IdxView& idx,
343 size_t numCols) {
344 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
345 Kokkos::parallel_for
346 ("Tpetra::MultiVector pack multicol const stride",
347 range_type (0, idx.size ()),
348 PackArrayMultiColumn (dst, src, idx, numCols));
349 }
350 };
351
352 template <typename DstView,
353 typename SrcView,
354 typename IdxView,
355 typename SizeType = typename DstView::execution_space::size_type,
356 typename Enabled = void>
357 class PackArrayMultiColumnWithBoundsCheck {
358 public:
359 using size_type = SizeType;
360 using value_type = size_t;
361
362 private:
363 DstView dst;
364 SrcView src;
365 IdxView idx;
366 size_type numCols;
367
368 public:
369 PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
370 const SrcView& src_,
371 const IdxView& idx_,
372 const size_type numCols_) :
373 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
374
375 KOKKOS_INLINE_FUNCTION void
376 operator() (const size_type k, value_type& lclErrorCount) const {
377 typedef typename IdxView::non_const_value_type index_type;
378
379 const index_type lclRow = idx(k);
380 if (lclRow < static_cast<index_type> (0) ||
381 lclRow >= static_cast<index_type> (src.extent (0))) {
382 ++lclErrorCount; // failed
383 }
384 else {
385 const size_type offset = k*numCols;
386 for (size_type j = 0; j < numCols; ++j) {
387 dst(offset + j) = src(lclRow, j);
388 }
389 }
390 }
391
392 KOKKOS_INLINE_FUNCTION
393 void init (value_type& initialErrorCount) const {
394 initialErrorCount = 0;
395 }
396
397 KOKKOS_INLINE_FUNCTION void
398 join (value_type& dstErrorCount,
399 const value_type& srcErrorCount) const
400 {
401 dstErrorCount += srcErrorCount;
402 }
403
404 static void
405 pack (const DstView& dst,
406 const SrcView& src,
407 const IdxView& idx,
408 const size_type numCols)
409 {
410 typedef typename DstView::execution_space execution_space;
411 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
412 typedef typename IdxView::non_const_value_type index_type;
413
414 size_t errorCount = 0;
415 Kokkos::parallel_reduce
416 ("Tpetra::MultiVector pack multicol const stride debug only",
417 range_type (0, idx.size ()),
418 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
419 errorCount);
420 if (errorCount != 0) {
421 // Go back and find the out-of-bounds entries in the index
422 // array. Performance doesn't matter since we are already in
423 // an error state, so we can do this sequentially, on host.
424 auto idx_h = Kokkos::create_mirror_view (idx);
425
426 // DEEP_COPY REVIEW - NOT TESTED
427 Kokkos::deep_copy (idx_h, idx);
428
429 std::vector<index_type> badIndices;
430 const size_type numInds = idx_h.extent (0);
431 for (size_type k = 0; k < numInds; ++k) {
432 if (idx_h(k) < static_cast<index_type> (0) ||
433 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
434 badIndices.push_back (idx_h(k));
435 }
436 }
437
438 TEUCHOS_TEST_FOR_EXCEPTION
439 (errorCount != badIndices.size (), std::logic_error,
440 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
441 << " != badIndices.size() = " << badIndices.size () << ". This sho"
442 "uld never happen. Please report this to the Tpetra developers.");
443
444 std::ostringstream os;
445 os << "Tpetra::MultiVector multiple-column pack kernel had "
446 << badIndices.size () << " out-of bounds index/ices (errorCount = "
447 << errorCount << "): [";
448 for (size_t k = 0; k < badIndices.size (); ++k) {
449 os << badIndices[k];
450 if (k + 1 < badIndices.size ()) {
451 os << ", ";
452 }
453 }
454 os << "].";
455 throw std::runtime_error (os.str ());
456 }
457 }
458 };
459
460
461 template <typename DstView,
462 typename SrcView,
463 typename IdxView>
464 void
465 pack_array_multi_column (const DstView& dst,
466 const SrcView& src,
467 const IdxView& idx,
468 const size_t numCols,
469 const bool debug = true)
470 {
471 static_assert (Kokkos::is_view<DstView>::value,
472 "DstView must be a Kokkos::View.");
473 static_assert (Kokkos::is_view<SrcView>::value,
474 "SrcView must be a Kokkos::View.");
475 static_assert (Kokkos::is_view<IdxView>::value,
476 "IdxView must be a Kokkos::View.");
477 static_assert (static_cast<int> (DstView::rank) == 1,
478 "DstView must be a rank-1 Kokkos::View.");
479 static_assert (static_cast<int> (SrcView::rank) == 2,
480 "SrcView must be a rank-2 Kokkos::View.");
481 static_assert (static_cast<int> (IdxView::rank) == 1,
482 "IdxView must be a rank-1 Kokkos::View.");
483
484 if (debug) {
485 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
486 SrcView, IdxView> impl_type;
487 impl_type::pack (dst, src, idx, numCols);
488 }
489 else {
490 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
491 impl_type::pack (dst, src, idx, numCols);
492 }
493 }
494
495 template <typename DstView, typename SrcView, typename IdxView,
496 typename ColView, typename Enabled = void>
497 struct PackArrayMultiColumnVariableStride {
498 typedef typename DstView::execution_space execution_space;
499 typedef typename execution_space::size_type size_type;
500
501 DstView dst;
502 SrcView src;
503 IdxView idx;
504 ColView col;
505 size_t numCols;
506
507 PackArrayMultiColumnVariableStride (const DstView& dst_,
508 const SrcView& src_,
509 const IdxView& idx_,
510 const ColView& col_,
511 const size_t numCols_) :
512 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
513
514 KOKKOS_INLINE_FUNCTION
515 void operator() (const size_type k) const {
516 const typename IdxView::value_type localRow = idx(k);
517 const size_t offset = k*numCols;
518 for (size_t j = 0; j < numCols; ++j) {
519 dst(offset + j) = src(localRow, col(j));
520 }
521 }
522
523 static void pack(const DstView& dst,
524 const SrcView& src,
525 const IdxView& idx,
526 const ColView& col,
527 size_t numCols) {
528 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
529 Kokkos::parallel_for
530 ("Tpetra::MultiVector pack multicol var stride",
531 range_type (0, idx.size ()),
532 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
533 }
534 };
535
536 template <typename DstView,
537 typename SrcView,
538 typename IdxView,
539 typename ColView,
540 typename SizeType = typename DstView::execution_space::size_type,
541 typename Enabled = void>
542 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
543 public:
544 using size_type = SizeType;
545 using value_type = size_t;
546
547 private:
548 DstView dst;
549 SrcView src;
550 IdxView idx;
551 ColView col;
552 size_type numCols;
553
554 public:
555 PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
556 const SrcView& src_,
557 const IdxView& idx_,
558 const ColView& col_,
559 const size_type numCols_) :
560 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
561
562 KOKKOS_INLINE_FUNCTION void
563 operator() (const size_type k, value_type& lclErrorCount) const {
564 typedef typename IdxView::non_const_value_type row_index_type;
565 typedef typename ColView::non_const_value_type col_index_type;
566
567 const row_index_type lclRow = idx(k);
568 if (lclRow < static_cast<row_index_type> (0) ||
569 lclRow >= static_cast<row_index_type> (src.extent (0))) {
570 ++lclErrorCount = 0;
571 }
572 else {
573 const size_type offset = k*numCols;
574 for (size_type j = 0; j < numCols; ++j) {
575 const col_index_type lclCol = col(j);
576 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
577 ++lclErrorCount = 0;
578 }
579 else { // all indices are valid; do the assignment
580 dst(offset + j) = src(lclRow, lclCol);
581 }
582 }
583 }
584 }
585
586 KOKKOS_INLINE_FUNCTION void
587 init (value_type& initialErrorCount) const {
588 initialErrorCount = 0;
589 }
590
591 KOKKOS_INLINE_FUNCTION void
592 join (value_type& dstErrorCount,
593 const value_type& srcErrorCount) const
594 {
595 dstErrorCount += srcErrorCount;
596 }
597
598 static void
599 pack (const DstView& dst,
600 const SrcView& src,
601 const IdxView& idx,
602 const ColView& col,
603 const size_type numCols)
604 {
605 using execution_space = typename DstView::execution_space;
606 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
607 using row_index_type = typename IdxView::non_const_value_type;
608 using col_index_type = typename ColView::non_const_value_type;
609
610 size_t errorCount = 0;
611 Kokkos::parallel_reduce
612 ("Tpetra::MultiVector pack multicol var stride debug only",
613 range_type (0, idx.size ()),
614 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
615 col, numCols),
616 errorCount);
617 if (errorCount != 0) {
618 constexpr size_t maxNumBadIndicesToPrint = 100;
619
620 std::ostringstream os; // for error reporting
621 os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
622 "found " << errorCount
623 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
624
625 // Go back and find any out-of-bounds entries in the array of
626 // row indices. Performance doesn't matter since we are already
627 // in an error state, so we can do this sequentially, on host.
628 auto idx_h = Kokkos::create_mirror_view (idx);
629
630 // DEEP_COPY REVIEW - NOT TESTED
631 Kokkos::deep_copy (idx_h, idx);
632
633 std::vector<row_index_type> badRows;
634 const size_type numRowInds = idx_h.extent (0);
635 for (size_type k = 0; k < numRowInds; ++k) {
636 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
637 badRows.push_back (idx_h(k));
638 }
639 }
640
641 if (badRows.size () != 0) {
642 os << badRows.size () << " out-of-bounds row ind"
643 << (badRows.size () != size_t (1) ? "ices" : "ex");
644 if (badRows.size () <= maxNumBadIndicesToPrint) {
645 os << ": [";
646 for (size_t k = 0; k < badRows.size (); ++k) {
647 os << badRows[k];
648 if (k + 1 < badRows.size ()) {
649 os << ", ";
650 }
651 }
652 os << "]. ";
653 }
654 else {
655 os << ". ";
656 }
657 }
658 else {
659 os << "No out-of-bounds row indices. ";
660 }
661
662 // Go back and find any out-of-bounds entries in the array
663 // of column indices.
664 auto col_h = Kokkos::create_mirror_view (col);
665
666 // DEEP_COPY REVIEW - NOT TESTED
667 Kokkos::deep_copy (col_h, col);
668
669 std::vector<col_index_type> badCols;
670 const size_type numColInds = col_h.extent (0);
671 for (size_type k = 0; k < numColInds; ++k) {
672 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
673 badCols.push_back (col_h(k));
674 }
675 }
676
677 if (badCols.size () != 0) {
678 os << badCols.size () << " out-of-bounds column ind"
679 << (badCols.size () != size_t (1) ? "ices" : "ex");
680 if (badCols.size () <= maxNumBadIndicesToPrint) {
681 os << ": [";
682 for (size_t k = 0; k < badCols.size (); ++k) {
683 os << badCols[k];
684 if (k + 1 < badCols.size ()) {
685 os << ", ";
686 }
687 }
688 os << "]. ";
689 }
690 else {
691 os << ". ";
692 }
693 }
694 else {
695 os << "No out-of-bounds column indices. ";
696 }
697
698 TEUCHOS_TEST_FOR_EXCEPTION
699 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
700 std::logic_error, "Tpetra::MultiVector variable stride pack "
701 "kernel reports errorCount=" << errorCount << ", but we failed "
702 "to find any bad rows or columns. This should never happen. "
703 "Please report this to the Tpetra developers.");
704
705 throw std::runtime_error (os.str ());
706 } // hasErr
707 }
708 };
709
710 template <typename DstView,
711 typename SrcView,
712 typename IdxView,
713 typename ColView>
714 void
715 pack_array_multi_column_variable_stride (const DstView& dst,
716 const SrcView& src,
717 const IdxView& idx,
718 const ColView& col,
719 const size_t numCols,
720 const bool debug = true)
721 {
722 static_assert (Kokkos::is_view<DstView>::value,
723 "DstView must be a Kokkos::View.");
724 static_assert (Kokkos::is_view<SrcView>::value,
725 "SrcView must be a Kokkos::View.");
726 static_assert (Kokkos::is_view<IdxView>::value,
727 "IdxView must be a Kokkos::View.");
728 static_assert (Kokkos::is_view<ColView>::value,
729 "ColView must be a Kokkos::View.");
730 static_assert (static_cast<int> (DstView::rank) == 1,
731 "DstView must be a rank-1 Kokkos::View.");
732 static_assert (static_cast<int> (SrcView::rank) == 2,
733 "SrcView must be a rank-2 Kokkos::View.");
734 static_assert (static_cast<int> (IdxView::rank) == 1,
735 "IdxView must be a rank-1 Kokkos::View.");
736 static_assert (static_cast<int> (ColView::rank) == 1,
737 "ColView must be a rank-1 Kokkos::View.");
738
739 if (debug) {
740 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
741 SrcView, IdxView, ColView> impl_type;
742 impl_type::pack (dst, src, idx, col, numCols);
743 }
744 else {
745 typedef PackArrayMultiColumnVariableStride<DstView,
746 SrcView, IdxView, ColView> impl_type;
747 impl_type::pack (dst, src, idx, col, numCols);
748 }
749 }
750
751 // Tag types to indicate whether to use atomic updates in the
752 // various CombineMode "Op"s.
753 struct atomic_tag {};
754 struct nonatomic_tag {};
755
756 struct AddOp {
757 template<class SC>
758 KOKKOS_INLINE_FUNCTION
759 void operator() (atomic_tag, SC& dest, const SC& src) const {
760 Kokkos::atomic_add (&dest, src);
761 }
762
763 template<class SC>
764 KOKKOS_INLINE_FUNCTION
765 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
766 dest += src;
767 }
768 };
769
770 struct InsertOp {
771 // There's no point to using Kokkos::atomic_assign for the REPLACE
772 // or INSERT CombineModes, since this is not a well-defined
773 // reduction for MultiVector anyway. See GitHub Issue #4417
774 // (which this fixes).
775 template<class SC>
776 KOKKOS_INLINE_FUNCTION
777 void operator() (atomic_tag, SC& dest, const SC& src) const {
778 dest = src;
779 }
780
781 template<class SC>
782 KOKKOS_INLINE_FUNCTION
783 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
784 dest = src;
785 }
786 };
787
788 struct AbsMaxOp {
789 template <class Scalar>
790 struct AbsMaxHelper{
791 Scalar value;
792
793 KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
794 auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
795 auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
796 value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
797 return *this;
798 }
799
800 KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
801 AbsMaxHelper ret = *this;
802 ret += rhs;
803 return ret;
804 }
805 };
806
807 template <typename SC>
808 KOKKOS_INLINE_FUNCTION
809 void operator() (atomic_tag, SC& dst, const SC& src) const {
810 Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
811 }
812
813 template <typename SC>
814 KOKKOS_INLINE_FUNCTION
815 void operator() (nonatomic_tag, SC& dst, const SC& src) const {
816 auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
817 auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
818 dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
819 }
820 };
821
822 template <typename ExecutionSpace,
823 typename DstView,
824 typename SrcView,
825 typename IdxView,
826 typename Op,
827 typename Enabled = void>
828 class UnpackArrayMultiColumn {
829 private:
830 static_assert (Kokkos::is_view<DstView>::value,
831 "DstView must be a Kokkos::View.");
832 static_assert (Kokkos::is_view<SrcView>::value,
833 "SrcView must be a Kokkos::View.");
834 static_assert (Kokkos::is_view<IdxView>::value,
835 "IdxView must be a Kokkos::View.");
836 static_assert (static_cast<int> (DstView::rank) == 2,
837 "DstView must be a rank-2 Kokkos::View.");
838 static_assert (static_cast<int> (SrcView::rank) == 1,
839 "SrcView must be a rank-1 Kokkos::View.");
840 static_assert (static_cast<int> (IdxView::rank) == 1,
841 "IdxView must be a rank-1 Kokkos::View.");
842
843 public:
844 typedef typename ExecutionSpace::execution_space execution_space;
845 typedef typename execution_space::size_type size_type;
846
847 private:
848 DstView dst;
849 SrcView src;
850 IdxView idx;
851 Op op;
852 size_t numCols;
853
854 public:
855 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
856 const DstView& dst_,
857 const SrcView& src_,
858 const IdxView& idx_,
859 const Op& op_,
860 const size_t numCols_) :
861 dst (dst_),
862 src (src_),
863 idx (idx_),
864 op (op_),
865 numCols (numCols_)
866 {}
867
868 template<class TagType>
869 KOKKOS_INLINE_FUNCTION void
870 operator() (TagType tag, const size_type k) const
871 {
872 static_assert
873 (std::is_same<TagType, atomic_tag>::value ||
874 std::is_same<TagType, nonatomic_tag>::value,
875 "TagType must be atomic_tag or nonatomic_tag.");
876
877 const typename IdxView::value_type localRow = idx(k);
878 const size_t offset = k*numCols;
879 for (size_t j = 0; j < numCols; ++j) {
880 op (tag, dst(localRow, j), src(offset+j));
881 }
882 }
883
884 static void
885 unpack (const ExecutionSpace& execSpace,
886 const DstView& dst,
887 const SrcView& src,
888 const IdxView& idx,
889 const Op& op,
890 const size_t numCols,
891 const bool use_atomic_updates)
892 {
893 if (use_atomic_updates) {
894 using range_type =
895 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
896 Kokkos::parallel_for
897 ("Tpetra::MultiVector unpack const stride atomic",
898 range_type (0, idx.size ()),
899 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
900 }
901 else {
902 using range_type =
903 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
904 Kokkos::parallel_for
905 ("Tpetra::MultiVector unpack const stride nonatomic",
906 range_type (0, idx.size ()),
907 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
908 }
909 }
910 };
911
912 template <typename ExecutionSpace,
913 typename DstView,
914 typename SrcView,
915 typename IdxView,
916 typename Op,
917 typename SizeType = typename ExecutionSpace::execution_space::size_type,
918 typename Enabled = void>
919 class UnpackArrayMultiColumnWithBoundsCheck {
920 private:
921 static_assert (Kokkos::is_view<DstView>::value,
922 "DstView must be a Kokkos::View.");
923 static_assert (Kokkos::is_view<SrcView>::value,
924 "SrcView must be a Kokkos::View.");
925 static_assert (Kokkos::is_view<IdxView>::value,
926 "IdxView must be a Kokkos::View.");
927 static_assert (static_cast<int> (DstView::rank) == 2,
928 "DstView must be a rank-2 Kokkos::View.");
929 static_assert (static_cast<int> (SrcView::rank) == 1,
930 "SrcView must be a rank-1 Kokkos::View.");
931 static_assert (static_cast<int> (IdxView::rank) == 1,
932 "IdxView must be a rank-1 Kokkos::View.");
933 static_assert (std::is_integral<SizeType>::value,
934 "SizeType must be a built-in integer type.");
935
936 public:
937 using execution_space = typename ExecutionSpace::execution_space;
938 using size_type = SizeType;
939 using value_type = size_t;
940
941 private:
942 DstView dst;
943 SrcView src;
944 IdxView idx;
945 Op op;
946 size_type numCols;
947
948 public:
949 UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
950 const DstView& dst_,
951 const SrcView& src_,
952 const IdxView& idx_,
953 const Op& op_,
954 const size_type numCols_) :
955 dst (dst_),
956 src (src_),
957 idx (idx_),
958 op (op_),
959 numCols (numCols_)
960 {}
961
962 template<class TagType>
963 KOKKOS_INLINE_FUNCTION void
964 operator() (TagType tag,
965 const size_type k,
966 size_t& lclErrCount) const
967 {
968 static_assert
969 (std::is_same<TagType, atomic_tag>::value ||
970 std::is_same<TagType, nonatomic_tag>::value,
971 "TagType must be atomic_tag or nonatomic_tag.");
972 using index_type = typename IdxView::non_const_value_type;
973
974 const index_type lclRow = idx(k);
975 if (lclRow < static_cast<index_type> (0) ||
976 lclRow >= static_cast<index_type> (dst.extent (0))) {
977 ++lclErrCount;
978 }
979 else {
980 const size_type offset = k*numCols;
981 for (size_type j = 0; j < numCols; ++j) {
982 op (tag, dst(lclRow,j), src(offset+j));
983 }
984 }
985 }
986
987 template<class TagType>
988 KOKKOS_INLINE_FUNCTION void
989 init (TagType, size_t& initialErrorCount) const {
990 initialErrorCount = 0;
991 }
992
993 template<class TagType>
994 KOKKOS_INLINE_FUNCTION void
995 join (TagType,
996 size_t& dstErrorCount,
997 const size_t& srcErrorCount) const
998 {
999 dstErrorCount += srcErrorCount;
1000 }
1001
1002 static void
1003 unpack (const ExecutionSpace& execSpace,
1004 const DstView& dst,
1005 const SrcView& src,
1006 const IdxView& idx,
1007 const Op& op,
1008 const size_type numCols,
1009 const bool use_atomic_updates)
1010 {
1011 using index_type = typename IdxView::non_const_value_type;
1012
1013 size_t errorCount = 0;
1014 if (use_atomic_updates) {
1015 using range_type =
1016 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1017 Kokkos::parallel_reduce
1018 ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1019 range_type (0, idx.size ()),
1020 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1021 idx, op, numCols),
1022 errorCount);
1023 }
1024 else {
1025 using range_type =
1026 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1027 Kokkos::parallel_reduce
1028 ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1029 range_type (0, idx.size ()),
1030 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1031 idx, op, numCols),
1032 errorCount);
1033 }
1034
1035 if (errorCount != 0) {
1036 // Go back and find the out-of-bounds entries in the index
1037 // array. Performance doesn't matter since we are already in
1038 // an error state, so we can do this sequentially, on host.
1039 auto idx_h = Kokkos::create_mirror_view (idx);
1040
1041 // DEEP_COPY REVIEW - NOT TESTED
1042 Kokkos::deep_copy (idx_h, idx);
1043
1044 std::vector<index_type> badIndices;
1045 const size_type numInds = idx_h.extent (0);
1046 for (size_type k = 0; k < numInds; ++k) {
1047 if (idx_h(k) < static_cast<index_type> (0) ||
1048 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1049 badIndices.push_back (idx_h(k));
1050 }
1051 }
1052
1053 if (errorCount != badIndices.size ()) {
1054 std::ostringstream os;
1055 os << "MultiVector unpack kernel: errorCount = " << errorCount
1056 << " != badIndices.size() = " << badIndices.size ()
1057 << ". This should never happen. "
1058 "Please report this to the Tpetra developers.";
1059 throw std::logic_error (os.str ());
1060 }
1061
1062 std::ostringstream os;
1063 os << "MultiVector unpack kernel had " << badIndices.size ()
1064 << " out-of bounds index/ices. Here they are: [";
1065 for (size_t k = 0; k < badIndices.size (); ++k) {
1066 os << badIndices[k];
1067 if (k + 1 < badIndices.size ()) {
1068 os << ", ";
1069 }
1070 }
1071 os << "].";
1072 throw std::runtime_error (os.str ());
1073 }
1074 }
1075 };
1076
1077 template <typename ExecutionSpace,
1078 typename DstView,
1079 typename SrcView,
1080 typename IdxView,
1081 typename Op>
1082 void
1083 unpack_array_multi_column (const ExecutionSpace& execSpace,
1084 const DstView& dst,
1085 const SrcView& src,
1086 const IdxView& idx,
1087 const Op& op,
1088 const size_t numCols,
1089 const bool use_atomic_updates,
1090 const bool debug)
1091 {
1092 static_assert (Kokkos::is_view<DstView>::value,
1093 "DstView must be a Kokkos::View.");
1094 static_assert (Kokkos::is_view<SrcView>::value,
1095 "SrcView must be a Kokkos::View.");
1096 static_assert (Kokkos::is_view<IdxView>::value,
1097 "IdxView must be a Kokkos::View.");
1098 static_assert (static_cast<int> (DstView::rank) == 2,
1099 "DstView must be a rank-2 Kokkos::View.");
1100 static_assert (static_cast<int> (SrcView::rank) == 1,
1101 "SrcView must be a rank-1 Kokkos::View.");
1102 static_assert (static_cast<int> (IdxView::rank) == 1,
1103 "IdxView must be a rank-1 Kokkos::View.");
1104
1105 if (debug) {
1106 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1107 DstView, SrcView, IdxView, Op> impl_type;
1108 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1109 use_atomic_updates);
1110 }
1111 else {
1112 typedef UnpackArrayMultiColumn<ExecutionSpace,
1113 DstView, SrcView, IdxView, Op> impl_type;
1114 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1115 use_atomic_updates);
1116 }
1117 }
1118
1119 template <typename ExecutionSpace,
1120 typename DstView,
1121 typename SrcView,
1122 typename IdxView,
1123 typename ColView,
1124 typename Op,
1125 typename Enabled = void>
1126 class UnpackArrayMultiColumnVariableStride {
1127 private:
1128 static_assert (Kokkos::is_view<DstView>::value,
1129 "DstView must be a Kokkos::View.");
1130 static_assert (Kokkos::is_view<SrcView>::value,
1131 "SrcView must be a Kokkos::View.");
1132 static_assert (Kokkos::is_view<IdxView>::value,
1133 "IdxView must be a Kokkos::View.");
1134 static_assert (Kokkos::is_view<ColView>::value,
1135 "ColView must be a Kokkos::View.");
1136 static_assert (static_cast<int> (DstView::rank) == 2,
1137 "DstView must be a rank-2 Kokkos::View.");
1138 static_assert (static_cast<int> (SrcView::rank) == 1,
1139 "SrcView must be a rank-1 Kokkos::View.");
1140 static_assert (static_cast<int> (IdxView::rank) == 1,
1141 "IdxView must be a rank-1 Kokkos::View.");
1142 static_assert (static_cast<int> (ColView::rank) == 1,
1143 "ColView must be a rank-1 Kokkos::View.");
1144
1145 public:
1146 using execution_space = typename ExecutionSpace::execution_space;
1147 using size_type = typename execution_space::size_type;
1148
1149 private:
1150 DstView dst;
1151 SrcView src;
1152 IdxView idx;
1153 ColView col;
1154 Op op;
1155 size_t numCols;
1156
1157 public:
1158 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1159 const DstView& dst_,
1160 const SrcView& src_,
1161 const IdxView& idx_,
1162 const ColView& col_,
1163 const Op& op_,
1164 const size_t numCols_) :
1165 dst (dst_),
1166 src (src_),
1167 idx (idx_),
1168 col (col_),
1169 op (op_),
1170 numCols (numCols_)
1171 {}
1172
1173 template<class TagType>
1174 KOKKOS_INLINE_FUNCTION void
1175 operator() (TagType tag, const size_type k) const
1176 {
1177 static_assert
1178 (std::is_same<TagType, atomic_tag>::value ||
1179 std::is_same<TagType, nonatomic_tag>::value,
1180 "TagType must be atomic_tag or nonatomic_tag.");
1181
1182 const typename IdxView::value_type localRow = idx(k);
1183 const size_t offset = k*numCols;
1184 for (size_t j = 0; j < numCols; ++j) {
1185 op (tag, dst(localRow, col(j)), src(offset+j));
1186 }
1187 }
1188
1189 static void
1190 unpack (const ExecutionSpace& execSpace,
1191 const DstView& dst,
1192 const SrcView& src,
1193 const IdxView& idx,
1194 const ColView& col,
1195 const Op& op,
1196 const size_t numCols,
1197 const bool use_atomic_updates)
1198 {
1199 if (use_atomic_updates) {
1200 using range_type =
1201 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1202 Kokkos::parallel_for
1203 ("Tpetra::MultiVector unpack var stride atomic",
1204 range_type (0, idx.size ()),
1205 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1206 idx, col, op, numCols));
1207 }
1208 else {
1209 using range_type =
1210 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1211 Kokkos::parallel_for
1212 ("Tpetra::MultiVector unpack var stride nonatomic",
1213 range_type (0, idx.size ()),
1214 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1215 idx, col, op, numCols));
1216 }
1217 }
1218 };
1219
1220 template <typename ExecutionSpace,
1221 typename DstView,
1222 typename SrcView,
1223 typename IdxView,
1224 typename ColView,
1225 typename Op,
1226 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1227 typename Enabled = void>
1228 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1229 private:
1230 static_assert (Kokkos::is_view<DstView>::value,
1231 "DstView must be a Kokkos::View.");
1232 static_assert (Kokkos::is_view<SrcView>::value,
1233 "SrcView must be a Kokkos::View.");
1234 static_assert (Kokkos::is_view<IdxView>::value,
1235 "IdxView must be a Kokkos::View.");
1236 static_assert (Kokkos::is_view<ColView>::value,
1237 "ColView must be a Kokkos::View.");
1238 static_assert (static_cast<int> (DstView::rank) == 2,
1239 "DstView must be a rank-2 Kokkos::View.");
1240 static_assert (static_cast<int> (SrcView::rank) == 1,
1241 "SrcView must be a rank-1 Kokkos::View.");
1242 static_assert (static_cast<int> (IdxView::rank) == 1,
1243 "IdxView must be a rank-1 Kokkos::View.");
1244 static_assert (static_cast<int> (ColView::rank) == 1,
1245 "ColView must be a rank-1 Kokkos::View.");
1246 static_assert (std::is_integral<SizeType>::value,
1247 "SizeType must be a built-in integer type.");
1248
1249 public:
1250 using execution_space = typename ExecutionSpace::execution_space;
1251 using size_type = SizeType;
1252 using value_type = size_t;
1253
1254 private:
1255 DstView dst;
1256 SrcView src;
1257 IdxView idx;
1258 ColView col;
1259 Op op;
1260 size_type numCols;
1261
1262 public:
1263 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1264 (const ExecutionSpace& /* execSpace */,
1265 const DstView& dst_,
1266 const SrcView& src_,
1267 const IdxView& idx_,
1268 const ColView& col_,
1269 const Op& op_,
1270 const size_t numCols_) :
1271 dst (dst_),
1272 src (src_),
1273 idx (idx_),
1274 col (col_),
1275 op (op_),
1276 numCols (numCols_)
1277 {}
1278
1279 template<class TagType>
1280 KOKKOS_INLINE_FUNCTION void
1281 operator() (TagType tag,
1282 const size_type k,
1283 value_type& lclErrorCount) const
1284 {
1285 static_assert
1286 (std::is_same<TagType, atomic_tag>::value ||
1287 std::is_same<TagType, nonatomic_tag>::value,
1288 "TagType must be atomic_tag or nonatomic_tag.");
1289 using row_index_type = typename IdxView::non_const_value_type;
1290 using col_index_type = typename ColView::non_const_value_type;
1291
1292 const row_index_type lclRow = idx(k);
1293 if (lclRow < static_cast<row_index_type> (0) ||
1294 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1295 ++lclErrorCount;
1296 }
1297 else {
1298 const size_type offset = k * numCols;
1299 for (size_type j = 0; j < numCols; ++j) {
1300 const col_index_type lclCol = col(j);
1301 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1302 ++lclErrorCount;
1303 }
1304 else { // all indices are valid; apply the op
1305 op (tag, dst(lclRow, col(j)), src(offset+j));
1306 }
1307 }
1308 }
1309 }
1310
1311 KOKKOS_INLINE_FUNCTION void
1312 init (value_type& initialErrorCount) const {
1313 initialErrorCount = 0;
1314 }
1315
1316 KOKKOS_INLINE_FUNCTION void
1317 join (value_type& dstErrorCount,
1318 const value_type& srcErrorCount) const
1319 {
1320 dstErrorCount += srcErrorCount;
1321 }
1322
1323 static void
1324 unpack (const ExecutionSpace& execSpace,
1325 const DstView& dst,
1326 const SrcView& src,
1327 const IdxView& idx,
1328 const ColView& col,
1329 const Op& op,
1330 const size_type numCols,
1331 const bool use_atomic_updates)
1332 {
1333 using row_index_type = typename IdxView::non_const_value_type;
1334 using col_index_type = typename ColView::non_const_value_type;
1335
1336 size_t errorCount = 0;
1337 if (use_atomic_updates) {
1338 using range_type =
1339 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1340 Kokkos::parallel_reduce
1341 ("Tpetra::MultiVector unpack var stride atomic debug only",
1342 range_type (0, idx.size ()),
1343 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1344 (execSpace, dst, src, idx, col, op, numCols),
1345 errorCount);
1346 }
1347 else {
1348 using range_type =
1349 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1350 Kokkos::parallel_reduce
1351 ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1352 range_type (0, idx.size ()),
1353 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1354 (execSpace, dst, src, idx, col, op, numCols),
1355 errorCount);
1356 }
1357
1358 if (errorCount != 0) {
1359 constexpr size_t maxNumBadIndicesToPrint = 100;
1360
1361 std::ostringstream os; // for error reporting
1362 os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1363 "found " << errorCount
1364 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1365
1366 // Go back and find any out-of-bounds entries in the array of
1367 // row indices. Performance doesn't matter since we are
1368 // already in an error state, so we can do this sequentially,
1369 // on host.
1370 auto idx_h = Kokkos::create_mirror_view (idx);
1371
1372 // DEEP_COPY REVIEW - NOT TESTED
1373 Kokkos::deep_copy (idx_h, idx);
1374
1375 std::vector<row_index_type> badRows;
1376 const size_type numRowInds = idx_h.extent (0);
1377 for (size_type k = 0; k < numRowInds; ++k) {
1378 if (idx_h(k) < static_cast<row_index_type> (0) ||
1379 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1380 badRows.push_back (idx_h(k));
1381 }
1382 }
1383
1384 if (badRows.size () != 0) {
1385 os << badRows.size () << " out-of-bounds row ind"
1386 << (badRows.size () != size_t (1) ? "ices" : "ex");
1387 if (badRows.size () <= maxNumBadIndicesToPrint) {
1388 os << ": [";
1389 for (size_t k = 0; k < badRows.size (); ++k) {
1390 os << badRows[k];
1391 if (k + 1 < badRows.size ()) {
1392 os << ", ";
1393 }
1394 }
1395 os << "]. ";
1396 }
1397 else {
1398 os << ". ";
1399 }
1400 }
1401 else {
1402 os << "No out-of-bounds row indices. ";
1403 }
1404
1405 // Go back and find any out-of-bounds entries in the array
1406 // of column indices.
1407 auto col_h = Kokkos::create_mirror_view (col);
1408
1409 // DEEP_COPY REVIEW - NOT TESTED
1410 Kokkos::deep_copy (col_h, col);
1411
1412 std::vector<col_index_type> badCols;
1413 const size_type numColInds = col_h.extent (0);
1414 for (size_type k = 0; k < numColInds; ++k) {
1415 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1416 badCols.push_back (col_h(k));
1417 }
1418 }
1419
1420 if (badCols.size () != 0) {
1421 os << badCols.size () << " out-of-bounds column ind"
1422 << (badCols.size () != size_t (1) ? "ices" : "ex");
1423 if (badCols.size () <= maxNumBadIndicesToPrint) {
1424 for (size_t k = 0; k < badCols.size (); ++k) {
1425 os << ": [";
1426 os << badCols[k];
1427 if (k + 1 < badCols.size ()) {
1428 os << ", ";
1429 }
1430 }
1431 os << "]. ";
1432 }
1433 else {
1434 os << ". ";
1435 }
1436 }
1437 else {
1438 os << "No out-of-bounds column indices. ";
1439 }
1440
1441 TEUCHOS_TEST_FOR_EXCEPTION
1442 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1443 std::logic_error, "Tpetra::MultiVector variable stride unpack "
1444 "kernel reports errorCount=" << errorCount << ", but we failed "
1445 "to find any bad rows or columns. This should never happen. "
1446 "Please report this to the Tpetra developers.");
1447
1448 throw std::runtime_error (os.str ());
1449 } // hasErr
1450 }
1451 };
1452
1453 template <typename ExecutionSpace,
1454 typename DstView,
1455 typename SrcView,
1456 typename IdxView,
1457 typename ColView,
1458 typename Op>
1459 void
1460 unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1461 const DstView& dst,
1462 const SrcView& src,
1463 const IdxView& idx,
1464 const ColView& col,
1465 const Op& op,
1466 const size_t numCols,
1467 const bool use_atomic_updates,
1468 const bool debug)
1469 {
1470 static_assert (Kokkos::is_view<DstView>::value,
1471 "DstView must be a Kokkos::View.");
1472 static_assert (Kokkos::is_view<SrcView>::value,
1473 "SrcView must be a Kokkos::View.");
1474 static_assert (Kokkos::is_view<IdxView>::value,
1475 "IdxView must be a Kokkos::View.");
1476 static_assert (Kokkos::is_view<ColView>::value,
1477 "ColView must be a Kokkos::View.");
1478 static_assert (static_cast<int> (DstView::rank) == 2,
1479 "DstView must be a rank-2 Kokkos::View.");
1480 static_assert (static_cast<int> (SrcView::rank) == 1,
1481 "SrcView must be a rank-1 Kokkos::View.");
1482 static_assert (static_cast<int> (IdxView::rank) == 1,
1483 "IdxView must be a rank-1 Kokkos::View.");
1484 static_assert (static_cast<int> (ColView::rank) == 1,
1485 "ColView must be a rank-1 Kokkos::View.");
1486
1487 if (debug) {
1488 using impl_type =
1489 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1490 DstView, SrcView, IdxView, ColView, Op>;
1491 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1492 use_atomic_updates);
1493 }
1494 else {
1495 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1496 DstView, SrcView, IdxView, ColView, Op>;
1497 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1498 use_atomic_updates);
1499 }
1500 }
1501
1502 template <typename DstView, typename SrcView,
1503 typename DstIdxView, typename SrcIdxView, typename Op,
1504 typename Enabled = void>
1505 struct PermuteArrayMultiColumn {
1506 typedef typename DstView::execution_space execution_space;
1507 typedef typename execution_space::size_type size_type;
1508
1509 DstView dst;
1510 SrcView src;
1511 DstIdxView dst_idx;
1512 SrcIdxView src_idx;
1513 size_t numCols;
1514 Op op;
1515
1516 PermuteArrayMultiColumn (const DstView& dst_,
1517 const SrcView& src_,
1518 const DstIdxView& dst_idx_,
1519 const SrcIdxView& src_idx_,
1520 const size_t numCols_,
1521 const Op& op_) :
1522 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1523 numCols(numCols_), op(op_) {}
1524
1525 KOKKOS_INLINE_FUNCTION void
1526 operator() (const size_type k) const {
1527 const typename DstIdxView::value_type toRow = dst_idx(k);
1528 const typename SrcIdxView::value_type fromRow = src_idx(k);
1529 nonatomic_tag tag; // permute does not need atomics
1530 for (size_t j = 0; j < numCols; ++j) {
1531 op(tag, dst(toRow, j), src(fromRow, j));
1532 }
1533 }
1534
1535 static void
1536 permute (const DstView& dst,
1537 const SrcView& src,
1538 const DstIdxView& dst_idx,
1539 const SrcIdxView& src_idx,
1540 const size_t numCols,
1541 const Op& op)
1542 {
1543 using range_type =
1544 Kokkos::RangePolicy<execution_space, size_type>;
1545 // permute does not need atomics for Op
1546 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1547 Kokkos::parallel_for
1548 ("Tpetra::MultiVector permute multicol const stride",
1549 range_type (0, n),
1550 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1551 }
1552 };
1553
1554 // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1555 // SrcView::Rank == 2
1556 template <typename DstView, typename SrcView,
1557 typename DstIdxView, typename SrcIdxView, typename Op>
1558 void permute_array_multi_column(const DstView& dst,
1559 const SrcView& src,
1560 const DstIdxView& dst_idx,
1561 const SrcIdxView& src_idx,
1562 size_t numCols,
1563 const Op& op) {
1564 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1565 dst, src, dst_idx, src_idx, numCols, op);
1566 }
1567
1568 template <typename DstView, typename SrcView,
1569 typename DstIdxView, typename SrcIdxView,
1570 typename DstColView, typename SrcColView, typename Op,
1571 typename Enabled = void>
1572 struct PermuteArrayMultiColumnVariableStride {
1573 typedef typename DstView::execution_space execution_space;
1574 typedef typename execution_space::size_type size_type;
1575
1576 DstView dst;
1577 SrcView src;
1578 DstIdxView dst_idx;
1579 SrcIdxView src_idx;
1580 DstColView dst_col;
1581 SrcColView src_col;
1582 size_t numCols;
1583 Op op;
1584
1585 PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1586 const SrcView& src_,
1587 const DstIdxView& dst_idx_,
1588 const SrcIdxView& src_idx_,
1589 const DstColView& dst_col_,
1590 const SrcColView& src_col_,
1591 const size_t numCols_,
1592 const Op& op_) :
1593 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1594 dst_col(dst_col_), src_col(src_col_),
1595 numCols(numCols_), op(op_) {}
1596
1597 KOKKOS_INLINE_FUNCTION void
1598 operator() (const size_type k) const {
1599 const typename DstIdxView::value_type toRow = dst_idx(k);
1600 const typename SrcIdxView::value_type fromRow = src_idx(k);
1601 const nonatomic_tag tag; // permute does not need atomics
1602 for (size_t j = 0; j < numCols; ++j) {
1603 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1604 }
1605 }
1606
1607 static void
1608 permute (const DstView& dst,
1609 const SrcView& src,
1610 const DstIdxView& dst_idx,
1611 const SrcIdxView& src_idx,
1612 const DstColView& dst_col,
1613 const SrcColView& src_col,
1614 const size_t numCols,
1615 const Op& op)
1616 {
1617 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1618 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1619 Kokkos::parallel_for
1620 ("Tpetra::MultiVector permute multicol var stride",
1621 range_type (0, n),
1622 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1623 dst_col, src_col, numCols, op));
1624 }
1625 };
1626
1627 // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1628 // SrcView::Rank == 2
1629 template <typename DstView, typename SrcView,
1630 typename DstIdxView, typename SrcIdxView,
1631 typename DstColView, typename SrcColView, typename Op>
1632 void permute_array_multi_column_variable_stride(const DstView& dst,
1633 const SrcView& src,
1634 const DstIdxView& dst_idx,
1635 const SrcIdxView& src_idx,
1636 const DstColView& dst_col,
1637 const SrcColView& src_col,
1638 size_t numCols,
1639 const Op& op) {
1640 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1641 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1642 dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1643 }
1644
1645} // Details namespace
1646} // KokkosRefactor namespace
1647} // Tpetra namespace
1648
1649#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...