Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42
44#include "Tpetra_BlockView.hpp"
45#include "Teuchos_OrdinalTraits.hpp"
46
47
48namespace Tpetra {
49
50template<class Scalar, class LO, class GO, class Node>
57
58template<class Scalar, class LO, class GO, class Node>
59Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
62{
64 const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
66 src_bmv == nullptr, std::invalid_argument, "Tpetra::"
67 "BlockMultiVector: The source object of an Import or Export to a "
68 "BlockMultiVector, must also be a BlockMultiVector.");
69 return Teuchos::rcp (src_bmv, false); // nonowning RCP
70}
71
72template<class Scalar, class LO, class GO, class Node>
75 const Teuchos::DataAccess copyOrView) :
77 meshMap_ (in.meshMap_),
78 pointMap_ (in.pointMap_),
79 mv_ (in.mv_, copyOrView),
80 blockSize_ (in.blockSize_)
81{}
82
83template<class Scalar, class LO, class GO, class Node>
86 const LO blockSize,
87 const LO numVecs) :
88 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
89 meshMap_ (meshMap),
90 pointMap_ (makePointMap (meshMap, blockSize)),
91 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
92 blockSize_ (blockSize)
93{}
94
95template<class Scalar, class LO, class GO, class Node>
98 const map_type& pointMap,
99 const LO blockSize,
100 const LO numVecs) :
101 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
102 meshMap_ (meshMap),
103 pointMap_ (pointMap),
104 mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
105 blockSize_ (blockSize)
106{}
107
108template<class Scalar, class LO, class GO, class Node>
111 const map_type& meshMap,
112 const LO blockSize) :
113 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
114 meshMap_ (meshMap),
115 blockSize_ (blockSize)
116{
117 using Teuchos::RCP;
118
119 if (X_mv.getCopyOrView () == Teuchos::View) {
120 // The input MultiVector has view semantics, so assignment just
121 // does a shallow copy.
122 mv_ = X_mv;
123 }
124 else if (X_mv.getCopyOrView () == Teuchos::Copy) {
125 // The input MultiVector has copy semantics. We can't change
126 // that, but we can make a view of the input MultiVector and
127 // change the view to have view semantics. (That sounds silly;
128 // shouldn't views always have view semantics? but remember that
129 // "view semantics" just governs the default behavior of the copy
130 // constructor and assignment operator.)
132 const size_t numCols = X_mv.getNumVectors ();
133 if (numCols == 0) {
134 Teuchos::Array<size_t> cols (0); // view will have zero columns
135 X_view_const = X_mv.subView (cols ());
136 } else { // Range1D is an inclusive range
137 X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
138 }
140 X_view_const.is_null (), std::logic_error, "Tpetra::"
141 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
142 "should never happen. Please report this bug to the Tpetra developers.");
143
144 // It's perfectly OK to cast away const here. Those view methods
145 // should be marked const anyway, because views can't change the
146 // allocation (just the entries themselves).
147 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
149 X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
150 "BlockMultiVector constructor: We just set a MultiVector "
151 "to have view semantics, but it claims that it doesn't have view "
152 "semantics. This should never happen. "
153 "Please report this bug to the Tpetra developers.");
154 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
155 }
156
157 // At this point, mv_ has been assigned, so we can ignore X_mv.
158 Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
159 if (! pointMap.is_null ()) {
160 pointMap_ = *pointMap; // Map::operator= also does a shallow copy
161 }
162}
163
164template<class Scalar, class LO, class GO, class Node>
167 const map_type& newMeshMap,
168 const map_type& newPointMap,
169 const size_t offset) :
170 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
171 meshMap_ (newMeshMap),
172 pointMap_ (newPointMap),
173 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
174 blockSize_ (X.getBlockSize ())
175{}
176
177template<class Scalar, class LO, class GO, class Node>
180 const map_type& newMeshMap,
181 const size_t offset) :
182 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
183 meshMap_ (newMeshMap),
184 pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
185 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
186 blockSize_ (X.getBlockSize ())
187{}
188
189template<class Scalar, class LO, class GO, class Node>
195
196template<class Scalar, class LO, class GO, class Node>
199makePointMap (const map_type& meshMap, const LO blockSize)
200{
202 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
203
204 const GST gblNumMeshMapInds =
205 static_cast<GST> (meshMap.getGlobalNumElements ());
206 const size_t lclNumMeshMapIndices =
207 static_cast<size_t> (meshMap.getLocalNumElements ());
208 const GST gblNumPointMapInds =
209 gblNumMeshMapInds * static_cast<GST> (blockSize);
210 const size_t lclNumPointMapInds =
211 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
212 const GO indexBase = meshMap.getIndexBase ();
213
214 if (meshMap.isContiguous ()) {
216 meshMap.getComm ());
217 }
218 else {
219 // "Hilbert's Hotel" trick: multiply each process' GIDs by
220 // blockSize, and fill in. That ensures correctness even if the
221 // mesh Map is overlapping.
222 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
223 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
224 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
225 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
226 const GO meshGid = lclMeshGblInds[g];
227 const GO pointGidStart = indexBase +
228 (meshGid - indexBase) * static_cast<GO> (blockSize);
229 const size_type offset = g * static_cast<size_type> (blockSize);
230 for (LO k = 0; k < blockSize; ++k) {
231 const GO pointGid = pointGidStart + static_cast<GO> (k);
232 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
233 }
234 }
236 meshMap.getComm ());
237 }
238}
239
240
241template<class Scalar, class LO, class GO, class Node>
242void
245 const LO colIndex,
246 const Scalar vals[])
247{
248 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
249 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
250 getBlockSize ());
251 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
252 using exec_space = typename device_type::execution_space;
253 Kokkos::deep_copy (exec_space(), X_dst, X_src);
254}
255
256
257template<class Scalar, class LO, class GO, class Node>
258bool
261 const LO colIndex,
262 const Scalar vals[])
263{
264 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
265 return false;
266 } else {
267 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
268 return true;
269 }
270}
271
272template<class Scalar, class LO, class GO, class Node>
273bool
276 const LO colIndex,
277 const Scalar vals[])
278{
279 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
280 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
281 return false;
282 } else {
283 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
284 return true;
285 }
286}
287
288template<class Scalar, class LO, class GO, class Node>
289void
292 const LO colIndex,
293 const Scalar vals[])
294{
295 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
296 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
297 getBlockSize ());
298 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
299}
300
301template<class Scalar, class LO, class GO, class Node>
302bool
305 const LO colIndex,
306 const Scalar vals[])
307{
308 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
309 return false;
310 } else {
311 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
312 return true;
313 }
314}
315
316template<class Scalar, class LO, class GO, class Node>
317bool
320 const LO colIndex,
321 const Scalar vals[])
322{
323 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
324 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
325 return false;
326 } else {
327 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
328 return true;
329 }
330}
331
332
333template<class Scalar, class LO, class GO, class Node>
334typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
337 const LO colIndex,
338 const Access::ReadOnlyStruct) const
339{
340 if (!isValidLocalMeshIndex(localRowIndex)) {
341 return const_little_host_vec_type();
342 } else {
343 const size_t blockSize = getBlockSize();
344 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345 LO startRow = localRowIndex*blockSize;
346 LO endRow = startRow + blockSize;
347 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
348 colIndex);
349 }
350}
351
352template<class Scalar, class LO, class GO, class Node>
353typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
356 const LO colIndex,
357 const Access::OverwriteAllStruct)
358{
359 if (!isValidLocalMeshIndex(localRowIndex)) {
360 return little_host_vec_type();
361 } else {
362 const size_t blockSize = getBlockSize();
363 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
366 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
367 colIndex);
368 }
369}
370
371template<class Scalar, class LO, class GO, class Node>
372typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
375 const LO colIndex,
376 const Access::ReadWriteStruct)
377{
378 if (!isValidLocalMeshIndex(localRowIndex)) {
379 return little_host_vec_type();
380 } else {
381 const size_t blockSize = getBlockSize();
382 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
383 LO startRow = localRowIndex*blockSize;
384 LO endRow = startRow + blockSize;
385 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
386 colIndex);
387 }
388}
389
390template<class Scalar, class LO, class GO, class Node>
391Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
392BlockMultiVector<Scalar, LO, GO, Node>::
393getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
394{
395 using Teuchos::rcp;
396 using Teuchos::rcpFromRef;
397
398 // The source object of an Import or Export must be a
399 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
400 // them in that order; one must succeed. Note that the target of
401 // the Import or Export calls checkSizes in DistObject's doTransfer.
402 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
403 const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
404 if (srcBlkVec == nullptr) {
405 const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
406 if (srcMultiVec == nullptr) {
407 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
408 // currently does a shallow copy by default. This is why we
409 // return by RCP, rather than by value.
410 return rcp (new mv_type ());
411 } else { // src is a MultiVector
412 return rcp (srcMultiVec, false); // nonowning RCP
413 }
414 } else { // src is a BlockMultiVector
415 return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
416 }
417}
418
419template<class Scalar, class LO, class GO, class Node>
422{
423 return ! getMultiVectorFromSrcDistObject (src).is_null ();
424}
425
426template<class Scalar, class LO, class GO, class Node>
429(const SrcDistObject& src,
430 const size_t numSameIDs,
431 const Kokkos::DualView<const local_ordinal_type*,
432 buffer_device_type>& permuteToLIDs,
433 const Kokkos::DualView<const local_ordinal_type*,
434 buffer_device_type>& permuteFromLIDs,
435 const CombineMode CM)
436{
438 (true, std::logic_error,
439 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
440 "instead, create a point importer using makePointMap function.");
441}
442
443template<class Scalar, class LO, class GO, class Node>
444void BlockMultiVector<Scalar, LO, GO, Node>::
445packAndPrepare
446(const SrcDistObject& src,
447 const Kokkos::DualView<const local_ordinal_type*,
448 buffer_device_type>& exportLIDs,
449 Kokkos::DualView<packet_type*,
450 buffer_device_type>& exports,
451 Kokkos::DualView<size_t*,
452 buffer_device_type> numPacketsPerLID,
453 size_t& constantNumPackets)
454{
455 TEUCHOS_TEST_FOR_EXCEPTION
456 (true, std::logic_error,
457 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
458 "instead, create a point importer using makePointMap function.");
459}
460
461template<class Scalar, class LO, class GO, class Node>
462void BlockMultiVector<Scalar, LO, GO, Node>::
463unpackAndCombine
464(const Kokkos::DualView<const local_ordinal_type*,
465 buffer_device_type>& importLIDs,
466 Kokkos::DualView<packet_type*,
467 buffer_device_type> imports,
468 Kokkos::DualView<size_t*,
469 buffer_device_type> numPacketsPerLID,
470 const size_t constantNumPackets,
471 const CombineMode combineMode)
472{
473 TEUCHOS_TEST_FOR_EXCEPTION
474 (true, std::logic_error,
475 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
476 "instead, create a point importer using makePointMap function.");
477}
478
479template<class Scalar, class LO, class GO, class Node>
482{
483 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
484 meshMap_.isNodeLocalElement (meshLocalIndex);
485}
486
487template<class Scalar, class LO, class GO, class Node>
489putScalar (const Scalar& val)
490{
491 mv_.putScalar (val);
492}
493
494template<class Scalar, class LO, class GO, class Node>
496scale (const Scalar& val)
497{
498 mv_.scale (val);
499}
500
501template<class Scalar, class LO, class GO, class Node>
503update (const Scalar& alpha,
505 const Scalar& beta)
506{
507 mv_.update (alpha, X.mv_, beta);
508}
509
510namespace Impl {
511// Y := alpha * D * X
512template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
513struct BlockWiseMultiply {
514 typedef typename ViewD::size_type Size;
515
516private:
517 typedef typename ViewD::device_type Device;
518 typedef typename ViewD::non_const_value_type ImplScalar;
519 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
520
521 template <typename View>
522 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
523 typename View::device_type, Unmanaged>;
524 typedef UnmanagedView<ViewY> UnMViewY;
525 typedef UnmanagedView<ViewD> UnMViewD;
526 typedef UnmanagedView<ViewX> UnMViewX;
527
528 const Size block_size_;
529 Scalar alpha_;
530 UnMViewY Y_;
531 UnMViewD D_;
532 UnMViewX X_;
533
534public:
535 BlockWiseMultiply (const Size block_size, const Scalar& alpha,
536 const ViewY& Y, const ViewD& D, const ViewX& X)
537 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
538 {}
539
540 KOKKOS_INLINE_FUNCTION
541 void operator() (const Size k) const {
542 const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
543 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
544 const auto num_vecs = X_.extent(1);
545 for (Size i = 0; i < num_vecs; ++i) {
546 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
547 auto X_curBlk = Kokkos::subview(X_, kslice, i);
548 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
549 // Y_curBlk := alpha * D_curBlk * X_curBlk.
550 // Recall that GEMV does an update (+=) of the last argument.
551 Tpetra::FILL(Y_curBlk, zero);
552 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
553 }
554 }
555};
556
557template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
558inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
559createBlockWiseMultiply (const int block_size, const Scalar& alpha,
560 const ViewY& Y, const ViewD& D, const ViewX& X) {
561 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
562}
563
564template <typename ViewY,
565 typename Scalar,
566 typename ViewD,
567 typename ViewZ,
568 typename LO = typename ViewY::size_type>
569class BlockJacobiUpdate {
570private:
571 typedef typename ViewD::device_type Device;
572 typedef typename ViewD::non_const_value_type ImplScalar;
573 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
574
575 template <typename ViewType>
576 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
577 typename ViewType::array_layout,
578 typename ViewType::device_type,
579 Unmanaged>;
580 typedef UnmanagedView<ViewY> UnMViewY;
581 typedef UnmanagedView<ViewD> UnMViewD;
582 typedef UnmanagedView<ViewZ> UnMViewZ;
583
584 const LO blockSize_;
585 UnMViewY Y_;
586 const Scalar alpha_;
587 UnMViewD D_;
588 UnMViewZ Z_;
589 const Scalar beta_;
590
591public:
592 BlockJacobiUpdate (const ViewY& Y,
593 const Scalar& alpha,
594 const ViewD& D,
595 const ViewZ& Z,
596 const Scalar& beta) :
597 blockSize_ (D.extent (1)),
598 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
599 Y_ (Y),
600 alpha_ (alpha),
601 D_ (D),
602 Z_ (Z),
603 beta_ (beta)
604 {
605 static_assert (static_cast<int> (ViewY::rank) == 1,
606 "Y must have rank 1.");
607 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
608 static_assert (static_cast<int> (ViewZ::rank) == 1,
609 "Z must have rank 1.");
610 // static_assert (static_cast<int> (ViewZ::rank) ==
611 // static_cast<int> (ViewY::rank),
612 // "Z must have the same rank as Y.");
613 }
614
615 KOKKOS_INLINE_FUNCTION void
616 operator() (const LO& k) const
617 {
618 using Kokkos::ALL;
619 using Kokkos::subview;
620 typedef Kokkos::pair<LO, LO> range_type;
621 typedef Kokkos::Details::ArithTraits<Scalar> KAT;
622
623 // We only have to implement the alpha != 0 case.
624
625 auto D_curBlk = subview (D_, k, ALL (), ALL ());
626 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
627
628 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
629
630 auto Z_curBlk = subview (Z_, kslice);
631 auto Y_curBlk = subview (Y_, kslice);
632 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
633 if (beta_ == KAT::zero ()) {
634 Tpetra::FILL (Y_curBlk, KAT::zero ());
635 }
636 else if (beta_ != KAT::one ()) {
637 Tpetra::SCAL (beta_, Y_curBlk);
638 }
639 Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
640 }
641};
642
643template<class ViewY,
644 class Scalar,
645 class ViewD,
646 class ViewZ,
647 class LO = typename ViewD::size_type>
648void
649blockJacobiUpdate (const ViewY& Y,
650 const Scalar& alpha,
651 const ViewD& D,
652 const ViewZ& Z,
653 const Scalar& beta)
654{
655 static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
656 static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
657 static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
658 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
659 "Y and Z must have the same rank.");
660 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
661
662 const auto lclNumMeshRows = D.extent (0);
663
664#ifdef HAVE_TPETRA_DEBUG
665 // D.extent(0) is the (local) number of mesh rows.
666 // D.extent(1) is the block size. Thus, their product should be
667 // the local number of point rows, that is, the number of rows in Y.
668 const auto blkSize = D.extent (1);
669 const auto lclNumPtRows = lclNumMeshRows * blkSize;
670 TEUCHOS_TEST_FOR_EXCEPTION
671 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
672 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
673 "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
674 << " = " << lclNumPtRows << ".");
675 TEUCHOS_TEST_FOR_EXCEPTION
676 (Y.extent (0) != Z.extent (0), std::invalid_argument,
677 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
678 "Z.extent(0) = " << Z.extent (0) << ".");
679 TEUCHOS_TEST_FOR_EXCEPTION
680 (Y.extent (1) != Z.extent (1), std::invalid_argument,
681 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
682 "Z.extent(1) = " << Z.extent (1) << ".");
683#endif // HAVE_TPETRA_DEBUG
684
685 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
686 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
687 // lclNumMeshRows must fit in LO, else the Map would not be correct.
688 range_type range (0, static_cast<LO> (lclNumMeshRows));
689 Kokkos::parallel_for (range, functor);
690}
691
692} // namespace Impl
693
694template<class Scalar, class LO, class GO, class Node>
697 const Kokkos::View<const impl_scalar_type***,
698 device_type, Kokkos::MemoryUnmanaged>& D,
700{
701 using Kokkos::ALL;
702 typedef typename device_type::execution_space exec_space;
703 const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
704
705 if (alpha == STS::zero ()) {
706 this->putScalar (STS::zero ());
707 }
708 else { // alpha != 0
709 const LO blockSize = this->getBlockSize ();
710 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
711 auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
712 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
713 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
714
715 // Use an explicit RangePolicy with the desired execution space.
716 // Otherwise, if you just use a number, it runs on the default
717 // execution space. For example, if the default execution space
718 // is Cuda but the current execution space is Serial, using just a
719 // number would incorrectly run with Cuda.
720 Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
721 Kokkos::parallel_for (range, bwm);
722 }
723}
724
725template<class Scalar, class LO, class GO, class Node>
728 const Kokkos::View<const impl_scalar_type***,
729 device_type, Kokkos::MemoryUnmanaged>& D,
732 const Scalar& beta)
733{
734 using Kokkos::ALL;
735 using Kokkos::subview;
736 typedef impl_scalar_type IST;
737
738 const IST alphaImpl = static_cast<IST> (alpha);
739 const IST betaImpl = static_cast<IST> (beta);
740 const LO numVecs = mv_.getNumVectors ();
741
742 if (alpha == STS::zero ()) { // Y := beta * Y
743 this->scale (beta);
744 }
745 else { // alpha != 0
746 Z.update (STS::one (), X, -STS::one ());
747 for (LO j = 0; j < numVecs; ++j) {
748 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
749 auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
750 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
751 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
752 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
753 }
754 }
755}
756
757} // namespace Tpetra
758
759//
760// Explicit instantiation macro
761//
762// Must be expanded from within the Tpetra namespace!
763//
764#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
765 template class BlockMultiVector< S, LO, GO, NODE >;
766
767#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Struct that holds views of the contents of a CrsMatrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.