Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
43#define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
44
47#include "Kokkos_Core.hpp"
48#include "Teuchos_CommHelpers.hpp"
49#include "Tpetra_Details_temporaryViewUtils.hpp"
50#include <limits>
51#include <type_traits>
52
55
56namespace Tpetra {
57namespace Details {
58
59template<typename InputViewType, typename OutputViewType>
60static void
61allReduceRawContiguous (const OutputViewType& output,
62 const InputViewType& input,
63 const Teuchos::Comm<int>& comm)
64{
65 using Teuchos::outArg;
66 using Teuchos::REDUCE_SUM;
67 using Teuchos::reduceAll;
68 using ValueType = typename InputViewType::non_const_value_type;
69 size_t count = input.span();
70 TEUCHOS_ASSERT( count <= size_t (INT_MAX) );
71 if(isInterComm(comm) && input.data() == output.data())
72 {
73 //Can't do in-place collective on an intercomm,
74 //so use a separate copy as the input.
75 typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
76 Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
77 tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
78 // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
79 // Either way, MPI is called right afterwards, meaning we'd need a sync on device
80 Kokkos::deep_copy(tempInput, input);
81 reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
82 tempInput.data(), output.data());
83 }
84 else
85 reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
86 input.data(), output.data());
87}
88
92template<class InputViewType, class OutputViewType>
93static void
95 const InputViewType& input,
96 const Teuchos::Comm<int>& comm)
97{
98
99 // using execution_space = typename OutputViewType::execution_space;
100 const bool viewsAlias = output.data () == input.data ();
101 if (comm.getSize () == 1) {
102 if (! viewsAlias) {
103 // InputViewType and OutputViewType can't be AnonymousSpace
104 // Views, because deep_copy needs to know their memory spaces.
105 // DEEP_COPY REVIEW - NOT TESTED
106 Kokkos::deep_copy (output, input);
107 }
108 return;
109 }
110
111 // we must ensure MPI can handle the pointers we pass it
112 // if GPUAware, we are done
113 // otherwise, if the views use GPUs, then we should copy them
114 using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
115 //if one or both is already in the correct layout, toLayout returns the same view
116 auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
117 auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
119 {
120 allReduceRawContiguous(outputContig, inputContig, comm);
121
122 }
123 else
124 {
125 auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
126 auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
127 allReduceRawContiguous(outputMPI, inputMPI, comm);
128 // DEEP_COPY REVIEW - Could be either
129 Kokkos::deep_copy(outputContig, outputMPI);
130 }
131 // DEEP_COPY REVIEW - Could be either
132 Kokkos::deep_copy(output, outputContig);
133}
134
135} // namespace Details
136} // namespace Tpetra
137
138#endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::isInterComm.
Struct that holds views of the contents of a CrsMatrix.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
Implementation details of Tpetra.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.