69 Teuchos::ETransp mode,
73 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
74 impl_scalar_type implAlpha = alpha;
80 typename Aggregates_kokkos::aggregates_sizes_type::const_type aggSizes = aggregates_->ComputeAggregateSizes();
82 auto kokkos_view_X = X.getDeviceLocalView(Xpetra::Access::ReadOnly);
83 auto kokkos_view_Y = Y.getDeviceLocalView(Xpetra::Access::ReadWrite);
84 LO numCols = kokkos_view_X.extent(1);
86 if(mode == Teuchos::TRANS) {
87 auto vertex2AggId = aggregates_->GetVertex2AggId();
88 auto vertex2AggIdView = vertex2AggId->getDeviceLocalView(Xpetra::Access::ReadOnly);
89 LO numNodes = kokkos_view_X.extent(0);
93 Kokkos::parallel_for(
"MueLu:MatrixFreeTentativeR_kokkos:apply",
md_range_type({0,0},{numCols,numNodes}),
94 KOKKOS_LAMBDA(
const int colIdx,
const int NodeIdx) {
95 LO aggIdx = vertex2AggIdView(NodeIdx,0);
97 Kokkos::atomic_add(&kokkos_view_Y(aggIdx,colIdx),implAlpha*kokkos_view_X(NodeIdx,colIdx)/Kokkos::sqrt(aggSizes(aggIdx)));
101 const auto vertex2Agg = aggregates_->GetVertex2AggId();
102 auto vertex2AggView = vertex2Agg->getDeviceLocalView(Xpetra::Access::ReadOnly);
103 LO numNodes = kokkos_view_Y.extent(0);
107 Kokkos::parallel_for(
"MueLu:MatrixFreeTentativeP_kokkos:apply",
md_range_type({0,0},{numCols,numNodes}),
108 KOKKOS_LAMBDA(
const int colIdx,
const int fineIdx) {
109 LO aggIdx = vertex2AggView(fineIdx,0);
110 kokkos_view_Y(fineIdx,colIdx) += implAlpha*kokkos_view_X(aggIdx,colIdx)/Kokkos::sqrt(aggSizes(aggIdx));