MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47#define MUELU_TENTATIVEPFACTORY_DEF_HPP
48
49#include <Xpetra_MapFactory.hpp>
50#include <Xpetra_Map.hpp>
51#include <Xpetra_CrsMatrix.hpp>
52#include <Xpetra_CrsGraphFactory.hpp>
53#include <Xpetra_Matrix.hpp>
54#include <Xpetra_MatrixMatrix.hpp>
55#include <Xpetra_MultiVector.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_VectorFactory.hpp>
58#include <Xpetra_Import.hpp>
59#include <Xpetra_ImportFactory.hpp>
60#include <Xpetra_CrsMatrixWrap.hpp>
61#include <Xpetra_StridedMap.hpp>
62#include <Xpetra_StridedMapFactory.hpp>
63#include <Xpetra_IO.hpp>
64
65#ifdef HAVE_MUELU_TPETRA
66#include "Xpetra_TpetraBlockCrsMatrix.hpp"
67//#include "Tpetra_BlockCrsMatrix.hpp"
68#endif
69
71
72#include "MueLu_Aggregates.hpp"
73#include "MueLu_AmalgamationFactory.hpp"
74#include "MueLu_AmalgamationInfo.hpp"
75#include "MueLu_CoarseMapFactory.hpp"
76#include "MueLu_MasterList.hpp"
77#include "MueLu_Monitor.hpp"
78#include "MueLu_NullspaceFactory.hpp"
79#include "MueLu_PerfUtils.hpp"
80#include "MueLu_Utilities.hpp"
81
82
83
84
85namespace MueLu {
86
87 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89 RCP<ParameterList> validParamList = rcp(new ParameterList());
90
91#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
92 SET_VALID_ENTRY("tentative: calculate qr");
93 SET_VALID_ENTRY("tentative: build coarse coordinates");
94 SET_VALID_ENTRY("tentative: constant column sums");
95#undef SET_VALID_ENTRY
96 validParamList->set< std::string >("Nullspace name", "Nullspace", "Name for the input nullspace");
97
98 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
99 validParamList->set< RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
100 validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
101 validParamList->set< RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
102 validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
103 validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
104 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
105 validParamList->set< RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
106
107 // Make sure we don't recursively validate options for the matrixmatrix kernels
108 ParameterList norecurse;
109 norecurse.disableRecursiveValidation();
110 validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
111
112 return validParamList;
113 }
114
115 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117
118 const ParameterList& pL = GetParameterList();
119 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
120 std::string nspName = "Nullspace";
121 if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
122
123 Input(fineLevel, "A");
124 Input(fineLevel, "Aggregates");
125 Input(fineLevel, nspName);
126 Input(fineLevel, "UnAmalgamationInfo");
127 Input(fineLevel, "CoarseMap");
128 if( fineLevel.GetLevelID() == 0 &&
129 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
130 pL.get<bool>("tentative: build coarse coordinates") ) { // and we want coordinates on other levels
131 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
132 Input(fineLevel, "Coordinates");
133 } else if (bTransferCoordinates_) {
134 Input(fineLevel, "Coordinates");
135 }
136 }
137
138 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140 return BuildP(fineLevel, coarseLevel);
141 }
142
143 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
145 FactoryMonitor m(*this, "Build", coarseLevel);
146 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
147 typedef Xpetra::MultiVector<coordinate_type,LO,GO,NO> RealValuedMultiVector;
148 typedef Xpetra::MultiVectorFactory<coordinate_type,LO,GO,NO> RealValuedMultiVectorFactory;
149
150 const ParameterList& pL = GetParameterList();
151 std::string nspName = "Nullspace";
152 if(pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
153
154
155 RCP<Matrix> Ptentative;
156 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
157 RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
158 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
159 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
160 if ( aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
161 Ptentative = Teuchos::null;
162 Set(coarseLevel, "P", Ptentative);
163 return;
164 }
165
166 RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
167 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, nspName);
168 RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap");
169 RCP<RealValuedMultiVector> fineCoords;
170 if(bTransferCoordinates_) {
171 fineCoords = Get< RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
172 }
173
174 // FIXME: We should remove the NodeComm on levels past the threshold
175 if(fineLevel.IsAvailable("Node Comm")) {
176 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel,"Node Comm");
177 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
178 }
179
180 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
181 TEUCHOS_TEST_FOR_EXCEPTION( A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
182 Exceptions::RuntimeError,"MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
183
184 RCP<MultiVector> coarseNullspace;
185 RCP<RealValuedMultiVector> coarseCoords;
186
187 if(bTransferCoordinates_) {
188 //*** Create the coarse coordinates ***
189 // First create the coarse map and coarse multivector
190 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
191 LO blkSize = 1;
192 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
193 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
194 }
195 GO indexBase = coarseMap->getIndexBase();
196 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
197 Array<GO> nodeList(numCoarseNodes);
198 const int numDimensions = fineCoords->getNumVectors();
199
200 for (LO i = 0; i < numCoarseNodes; i++) {
201 nodeList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;
202 }
203 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
204 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
205 nodeList,
206 indexBase,
207 fineCoords->getMap()->getComm());
208 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
209
210 // Create overlapped fine coordinates to reduce global communication
211 RCP<RealValuedMultiVector> ghostedCoords;
212 if (aggregates->AggregatesCrossProcessors()) {
213 RCP<const Map> aggMap = aggregates->GetMap();
214 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
215
216 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
217 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
218 } else {
219 ghostedCoords = fineCoords;
220 }
221
222 // Get some info about aggregates
223 int myPID = coarseCoordsMap->getComm()->getRank();
224 LO numAggs = aggregates->GetNumAggregates();
225 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
226 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
227 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
228
229 // Fill in coarse coordinates
230 for (int dim = 0; dim < numDimensions; ++dim) {
231 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
232 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
233
234 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
235 if (procWinner[lnode] == myPID &&
236 lnode < fineCoordsData.size() &&
237 vertex2AggID[lnode] < coarseCoordsData.size() &&
238 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
239 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
240 }
241 }
242 for (LO agg = 0; agg < numAggs; agg++) {
243 coarseCoordsData[agg] /= aggSizes[agg];
244 }
245 }
246 }
247
248 if (!aggregates->AggregatesCrossProcessors()) {
249 if(Xpetra::Helpers<SC,LO,GO,NO>::isTpetraBlockCrs(A)) {
250 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
251 }
252 else {
253 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
254 }
255 }
256 else
257 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
258
259 // If available, use striding information of fine level matrix A for range
260 // map and coarseMap as domain map; otherwise use plain range map of
261 // Ptent = plain range map of A for range map and coarseMap as domain map.
262 // NOTE:
263 // The latter is not really safe, since there is no striding information
264 // for the range map. This is not really a problem, since striding
265 // information is always available on the intermedium levels and the
266 // coarsest levels.
267 if (A->IsView("stridedMaps") == true)
268 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
269 else
270 Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
271
272 if(bTransferCoordinates_) {
273 Set(coarseLevel, "Coordinates", coarseCoords);
274 }
275 Set(coarseLevel, "Nullspace", coarseNullspace);
276 Set(coarseLevel, "P", Ptentative);
277
278 if (IsPrint(Statistics2)) {
279 RCP<ParameterList> params = rcp(new ParameterList());
280 params->set("printLoadBalancingInfo", true);
281 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
282 }
283 }
284
285 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
287 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
288 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
289#ifdef HAVE_MUELU_TPETRA
290
291 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
292 be generalized later, if we ever need to do so:
293 1) Null space dimension === block size of matrix: So no elasticity right now
294 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
295 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
296
297 These assumptions keep our code way simpler and still support the use cases we actually care about.
298 */
299
300 RCP<const Map> rowMap = A->getRowMap();
301 RCP<const Map> rangeMap = A->getRangeMap();
302 RCP<const Map> colMap = A->getColMap();
303 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
304 const size_t numFineBlockRows = rowMap->getLocalNumElements();
305
306 typedef Teuchos::ScalarTraits<SC> STS;
307 // typedef typename STS::magnitudeType Magnitude;
308 const SC zero = STS::zero();
309 const SC one = STS::one();
310 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
311
312 const GO numAggs = aggregates->GetNumAggregates();
313 const size_t NSDim = fineNullspace->getNumVectors();
314 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
315
316 // Need to generate the coarse block map
317 // NOTE: We assume NSDim == block size here
318 // NOTE: We also assume that coarseMap has contiguous GIDs
319 //const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
320 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
321 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
322 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
323 numCoarseBlockRows,
324 coarsePointMap->getIndexBase(),
325 coarsePointMap->getComm());
326 // Sanity checking
327 const ParameterList& pL = GetParameterList();
328 const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
329 const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
330
331 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
332 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
333
334 // The aggregates use the amalgamated column map, which in this case is what we want
335
336 // Aggregates map is based on the amalgamated column map
337 // We can skip global-to-local conversion if LIDs in row map are
338 // same as LIDs in column map
339 bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
340
341 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
342 // aggStart is a pointer into aggToRowMapLO
343 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
344 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
345 ArrayRCP<LO> aggStart;
346 ArrayRCP<LO> aggToRowMapLO;
347 ArrayRCP<GO> aggToRowMapGO;
348 if (goodMap) {
349 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
350 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
351 } else {
352 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
353 }
354
355 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
356
357 // Pull out the nullspace vectors so that we can have random access.
358 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
359 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
360 for (size_t i = 0; i < NSDim; i++) {
361 fineNS[i] = fineNullspace->getData(i);
362 if (coarsePointMap->getLocalNumElements() > 0)
363 coarseNS[i] = coarseNullspace->getDataNonConst(i);
364 }
365
366 // BlockCrs requires that we build the (block) graph first, so let's do that...
367 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
368 // block non-zero per row in the matrix;
369 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap,coarseBlockMap,0);
370 ArrayRCP<size_t> iaPtent;
371 ArrayRCP<LO> jaPtent;
372 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
373 ArrayView<size_t> ia = iaPtent();
374 ArrayView<LO> ja = jaPtent();
375
376 for (size_t i = 0; i < numFineBlockRows; i++) {
377 ia[i] = i;
378 ja[i] = INVALID;
379 }
380 ia[numCoarseBlockRows] = numCoarseBlockRows;
381
382
383 for (GO agg = 0; agg < numAggs; agg++) {
384 LO aggSize = aggStart[agg+1] - aggStart[agg];
385 Xpetra::global_size_t offset = agg;
386
387 for (LO j = 0; j < aggSize; j++) {
388 // FIXME: Allow for bad maps
389 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
390 const size_t rowStart = ia[localRow];
391 ja[rowStart] = offset;
392 }
393 }
394
395 // Compress storage (remove all INVALID, which happen when we skip zeros)
396 // We do that in-place
397 size_t ia_tmp = 0, nnz = 0;
398 for (size_t i = 0; i < numFineBlockRows; i++) {
399 for (size_t j = ia_tmp; j < ia[i+1]; j++)
400 if (ja[j] != INVALID) {
401 ja [nnz] = ja [j];
402 nnz++;
403 }
404 ia_tmp = ia[i+1];
405 ia[i+1] = nnz;
406 }
407
408 if (rowMap->lib() == Xpetra::UseTpetra) {
409 // - Cannot resize for Epetra, as it checks for same pointers
410 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
411 // NOTE: these invalidate ja and val views
412 jaPtent .resize(nnz);
413 }
414
415 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
416 BlockGraph->setAllIndices(iaPtent, jaPtent);
417
418 // Managing labels & constants for ESFC
419 {
420 RCP<ParameterList> FCparams;
421 if(pL.isSublist("matrixmatrix: kernel params"))
422 FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
423 else
424 FCparams= rcp(new ParameterList);
425 // By default, we don't need global constants for TentativeP, but we do want it for the graph
426 // if we're printing statistics, so let's leave it on for now.
427 FCparams->set("compute global constants",FCparams->get("compute global constants",true));
428 std::string levelIDs = toString(levelID);
429 FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
430 RCP<const Export> dummy_e;
431 RCP<const Import> dummy_i;
432 BlockGraph->expertStaticFillComplete(coarseBlockMap,rowMap,dummy_i,dummy_e,FCparams);
433 }
434
435 // Now let's make a BlockCrs Matrix
436 // NOTE: Assumes block size== NSDim
437 RCP<Xpetra::CrsMatrix<SC,LO,GO,NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC,LO,GO,NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap,NSDim);
438 RCP<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> >(P_xpetra);
439 if(P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
440 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
441
443 // "no-QR" option //
445 // Local Q factor is just the fine nullspace support over the current aggregate.
446 // Local R factor is the identity.
447 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
448 // NOTE: "goodMap" case only
449 Teuchos::Array<Scalar> block(NSDim*NSDim, zero);
450 Teuchos::Array<LO> bcol(1);
451
452 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
453 for (LO agg = 0; agg < numAggs; agg++) {
454 bcol[0] = agg;
455 const LO aggSize = aggStart[agg+1] - aggStart[agg];
456 Xpetra::global_size_t offset = agg*NSDim;
457
458 // Process each row in the local Q factor
459 // NOTE: Blocks are in row-major order
460 for (LO j = 0; j < aggSize; j++) {
461 const LO localBlockRow = aggToRowMapLO[aggStart[agg]+j];
462
463 for (size_t r = 0; r < NSDim; r++) {
464 LO localPointRow = localBlockRow*NSDim + r;
465 for (size_t c = 0; c < NSDim; c++)
466 block[r*NSDim+c] = fineNS[c][localPointRow];
467 }
468 // NOTE: Assumes columns==aggs and are ordered sequentially
469 P_tpetra->replaceLocalValues(localBlockRow,bcol(),block());
470
471 }//end aggSize
472
473 for (size_t j = 0; j < NSDim; j++)
474 coarseNS[j][offset+j] = one;
475
476 } //for (GO agg = 0; agg < numAggs; agg++)
477
478 Ptentative = P_wrap;
479#else
480 throw std::runtime_error("TentativePFactory::BuildPuncoupledBlockCrs: Requires Tpetra");
481#endif
482 }
483
484 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
486 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
487 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
488 typedef Teuchos::ScalarTraits<SC> STS;
489 typedef typename STS::magnitudeType Magnitude;
490 const SC zero = STS::zero();
491 const SC one = STS::one();
492
493 // number of aggregates
494 GO numAggs = aggregates->GetNumAggregates();
495
496 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
497 // aggStart is a pointer into aggToRowMap
498 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
499 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
500 ArrayRCP<LO> aggStart;
501 ArrayRCP< GO > aggToRowMap;
502 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
503
504 // find size of the largest aggregate
505 LO maxAggSize=0;
506 for (GO i=0; i<numAggs; ++i) {
507 LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
508 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
509 }
510
511 // dimension of fine level nullspace
512 const size_t NSDim = fineNullspace->getNumVectors();
513
514 // index base for coarse Dof map (usually 0)
515 GO indexBase=A->getRowMap()->getIndexBase();
516
517 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
518 const RCP<const Map> uniqueMap = A->getDomainMap();
519 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
520 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
521 fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
522
523 // Pull out the nullspace vectors so that we can have random access.
524 ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
525 for (size_t i=0; i<NSDim; ++i)
526 fineNS[i] = fineNullspaceWithOverlap->getData(i);
527
528 //Allocate storage for the coarse nullspace.
529 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
530
531 ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
532 for (size_t i=0; i<NSDim; ++i)
533 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
534
535 //This makes the rowmap of Ptent the same as that of A->
536 //This requires moving some parts of some local Q's to other processors
537 //because aggregates can span processors.
538 RCP<const Map > rowMapForPtent = A->getRowMap();
539 const Map& rowMapForPtentRef = *rowMapForPtent;
540
541 // Set up storage for the rows of the local Qs that belong to other processors.
542 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
543 RCP<const Map> colMap = A->getColMap();
544
545 RCP<const Map > ghostQMap;
546 RCP<MultiVector> ghostQvalues;
547 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
548 RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
549 ArrayRCP< ArrayRCP<SC> > ghostQvals;
550 ArrayRCP< ArrayRCP<GO> > ghostQcols;
551 ArrayRCP< GO > ghostQrows;
552
553 Array<GO> ghostGIDs;
554 for (LO j=0; j<numAggs; ++j) {
555 for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
556 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
557 ghostGIDs.push_back(aggToRowMap[k]);
558 }
559 }
560 }
561 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
562 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
563 ghostGIDs,
564 indexBase, A->getRowMap()->getComm()); //JG:Xpetra::global_size_t>?
565 //Vector to hold bits of Q that go to other processors.
566 ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
567 //Note that Epetra does not support MultiVectors templated on Scalar != double.
568 //So to work around this, we allocate an array of Vectors. This shouldn't be too
569 //expensive, as the number of Vectors is NSDim.
570 ghostQcolumns.resize(NSDim);
571 for (size_t i=0; i<NSDim; ++i)
572 ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
573 ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
574 if (ghostQvalues->getLocalLength() > 0) {
575 ghostQvals.resize(NSDim);
576 ghostQcols.resize(NSDim);
577 for (size_t i=0; i<NSDim; ++i) {
578 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
579 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
580 }
581 ghostQrows = ghostQrowNums->getDataNonConst(0);
582 }
583
584 //importer to handle moving Q
585 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
586
587 // Dense QR solver
588 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
589
590 //Allocate temporary storage for the tentative prolongator.
591 Array<GO> globalColPtr(maxAggSize*NSDim,0);
592 Array<LO> localColPtr(maxAggSize*NSDim,0);
593 Array<SC> valPtr(maxAggSize*NSDim,0.);
594
595 //Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
596 const Map& coarseMapRef = *coarseMap;
597
598 // For the 3-arrays constructor
599 ArrayRCP<size_t> ptent_rowptr;
600 ArrayRCP<LO> ptent_colind;
601 ArrayRCP<Scalar> ptent_values;
602
603 // Because ArrayRCPs are slow...
604 ArrayView<size_t> rowptr_v;
605 ArrayView<LO> colind_v;
606 ArrayView<Scalar> values_v;
607
608 // For temporary usage
609 Array<size_t> rowptr_temp;
610 Array<LO> colind_temp;
611 Array<Scalar> values_temp;
612
613 RCP<CrsMatrix> PtentCrs;
614
615 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
616 PtentCrs = PtentCrsWrap->getCrsMatrix();
617 Ptentative = PtentCrsWrap;
618
619 //*****************************************************************
620 //Loop over all aggregates and calculate local QR decompositions.
621 //*****************************************************************
622 GO qctr=0; //for indexing into Ptent data vectors
623 const Map& nonUniqueMapRef = *nonUniqueMap;
624
625 size_t total_nnz_count=0;
626
627 for (GO agg=0; agg<numAggs; ++agg)
628 {
629 LO myAggSize = aggStart[agg+1]-aggStart[agg];
630 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
631 // "localQR" (in column major format) for the QR routine.
632 Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
633 for (size_t j=0; j<NSDim; ++j) {
634 bool bIsZeroNSColumn = true;
635 for (LO k=0; k<myAggSize; ++k)
636 {
637 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
638 // fineNS[j][n] is the nth entry in the jth NS vector
639 try{
640 SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ]; // extract information from fine level NS
641 localQR(k,j) = nsVal;
642 if (nsVal != zero) bIsZeroNSColumn = false;
643 }
644 catch(...) {
645 GetOStream(Runtime1,-1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
646 GetOStream(Runtime1,-1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
647 GetOStream(Runtime1,-1) << "(local?) aggId=" << agg << std::endl;
648 GetOStream(Runtime1,-1) << "aggSize=" << myAggSize << std::endl;
649 GetOStream(Runtime1,-1) << "agg DOF=" << k << std::endl;
650 GetOStream(Runtime1,-1) << "NS vector j=" << j << std::endl;
651 GetOStream(Runtime1,-1) << "j*myAggSize + k = " << j*myAggSize + k << std::endl;
652 GetOStream(Runtime1,-1) << "aggToRowMap["<<agg<<"][" << k << "] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
653 GetOStream(Runtime1,-1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] << " is global element in nonUniqueMap = " <<
654 nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
655 GetOStream(Runtime1,-1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
656 GetOStream(Runtime1,-1) << "fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
657 GetOStream(Errors,-1) << "caught an error!" << std::endl;
658 }
659 } //for (LO k=0 ...
660 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
661 } //for (LO j=0 ...
662
663 Xpetra::global_size_t offset=agg*NSDim;
664
665 if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
666 // calculate QR decomposition (standard)
667 // R is stored in localQR (size: myAggSize x NSDim)
668
669 // Householder multiplier
670 SC tau = localQR(0,0);
671
672 if (NSDim == 1) {
673 // Only one nullspace vector, so normalize by hand
674 Magnitude dtemp=0;
675 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
676 Magnitude tmag = STS::magnitude(localQR(k,0));
677 dtemp += tmag*tmag;
678 }
679 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
680 tau = localQR(0,0);
681 localQR(0,0) = dtemp;
682 } else {
683 qrSolver.setMatrix( Teuchos::rcp(&localQR, false) );
684 qrSolver.factor();
685 }
686
687 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
688 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
689 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
690 for (size_t j=0; j<NSDim; ++j) {
691 for (size_t k=0; k<=j; ++k) {
692 try {
693 if (coarseMapRef.isNodeLocalElement(offset+k)) {
694 coarseNS[j][offset+k] = localQR(k, j); //TODO is offset+k the correct local ID?!
695 }
696 }
697 catch(...) {
698 GetOStream(Errors,-1) << "caught error in coarseNS insert, j="<<j<<", offset+k = "<<offset+k<<std::endl;
699 }
700 }
701 }
702
703 // Calculate Q, the tentative prolongator.
704 // The Lapack GEQRF call only works for myAggsize >= NSDim
705
706 if (NSDim == 1) {
707 // Only one nullspace vector, so calculate Q by hand
708 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
709 localQR(0,0) = tau;
710 dtemp = 1 / dtemp;
711 for (LocalOrdinal i=0; i<myAggSize; ++i) {
712 localQR(i,0) *= dtemp ;
713 }
714 } else {
715 qrSolver.formQ();
716 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
717 for (size_t j=0; j<NSDim; j++) {
718 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
719 localQR(i,j) = (*qFactor)(i,j);
720 }
721 }
722 }
723
724 // end default case (myAggSize >= NSDim)
725 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
726 // See comments for the uncoupled case
727
728 // R = extended (by adding identity rows) localQR
729 for (size_t j = 0; j < NSDim; j++)
730 for (size_t k = 0; k < NSDim; k++) {
731 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset+k), Exceptions::RuntimeError,
732 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset+k);
733
734 if (k < as<size_t>(myAggSize))
735 coarseNS[j][offset+k] = localQR(k,j);
736 else
737 coarseNS[j][offset+k] = (k == j ? one : zero);
738 }
739
740 // Q = I (rectangular)
741 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
742 for (size_t j = 0; j < NSDim; j++)
743 localQR(i,j) = (j == i ? one : zero);
744 } // end else (special handling for 1pt aggregates)
745
746 //Process each row in the local Q factor. If the row is local to the current processor
747 //according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
748 //to be communicated later to the owning processor.
749 //FIXME -- what happens if maps are blocked?
750 for (GO j=0; j<myAggSize; ++j) {
751 //This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
752 //If it is, the row is inserted. If not, the row number, columns, and values are saved in
753 //MultiVectors that will be sent to other processors.
754 GO globalRow = aggToRowMap[aggStart[agg]+j];
755
756 //TODO is the use of Xpetra::global_size_t below correct?
757 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
758 ghostQrows[qctr] = globalRow;
759 for (size_t k=0; k<NSDim; ++k) {
760 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
761 ghostQvals[k][qctr] = localQR(j,k);
762 }
763 ++qctr;
764 } else {
765 size_t nnz=0;
766 for (size_t k=0; k<NSDim; ++k) {
767 try{
768 if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
769 localColPtr[nnz] = agg * NSDim + k;
770 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
771 valPtr[nnz] = localQR(j,k);
772 ++total_nnz_count;
773 ++nnz;
774 }
775 }
776 catch(...) {
777 GetOStream(Errors,-1) << "caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
778 }
779 } //for (size_t k=0; k<NSDim; ++k)
780
781 try{
782 Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
783 }
784 catch(...) {
785 GetOStream(Errors,-1) << "pid " << A->getRowMap()->getComm()->getRank()
786 << "caught error during Ptent row insertion, global row "
787 << globalRow << std::endl;
788 }
789 }
790 } //for (GO j=0; j<myAggSize; ++j)
791
792 } // for (LO agg=0; agg<numAggs; ++agg)
793
794
795 // ***********************************************************
796 // ************* end of aggregate-wise QR ********************
797 // ***********************************************************
798 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
799 // Import ghost parts of Q factors and insert into Ptentative.
800 // First import just the global row numbers.
801 RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
802 targetQrowNums->putScalar(-1);
803 targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
804 ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
805
806 // Now create map based on just the row numbers imported.
807 Array<GO> gidsToImport;
808 gidsToImport.reserve(targetQrows.size());
809 for (typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
810 if (*r > -1) {
811 gidsToImport.push_back(*r);
812 }
813 }
814 RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
815 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
816 gidsToImport, indexBase, A->getRowMap()->getComm() );
817
818 // Import using the row numbers that this processor will receive.
819 importer = ImportFactory::Build(ghostQMap, reducedMap);
820
821 Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
822 for (size_t i=0; i<NSDim; ++i) {
823 targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
824 targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
825 }
826 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
827 targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
828
829 ArrayRCP< ArrayRCP<SC> > targetQvals;
830 ArrayRCP<ArrayRCP<GO> > targetQcols;
831 if (targetQvalues->getLocalLength() > 0) {
832 targetQvals.resize(NSDim);
833 targetQcols.resize(NSDim);
834 for (size_t i=0; i<NSDim; ++i) {
835 targetQvals[i] = targetQvalues->getDataNonConst(i);
836 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
837 }
838 }
839
840 valPtr = Array<SC>(NSDim,0.);
841 globalColPtr = Array<GO>(NSDim,0);
842 for (typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
843 if (targetQvalues->getLocalLength() > 0) {
844 for (size_t j=0; j<NSDim; ++j) {
845 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
846 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
847 }
848 Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
849 } //if (targetQvalues->getLocalLength() > 0)
850 }
851
852 Ptentative->fillComplete(coarseMap, A->getDomainMap());
853 }
854
855
856
857 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
859 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
860 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
861 RCP<const Map> rowMap = A->getRowMap();
862 RCP<const Map> colMap = A->getColMap();
863 const size_t numRows = rowMap->getLocalNumElements();
864
865 typedef Teuchos::ScalarTraits<SC> STS;
866 typedef typename STS::magnitudeType Magnitude;
867 const SC zero = STS::zero();
868 const SC one = STS::one();
869 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
870
871 const GO numAggs = aggregates->GetNumAggregates();
872 const size_t NSDim = fineNullspace->getNumVectors();
873 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes();
874
875
876 // Sanity checking
877 const ParameterList& pL = GetParameterList();
878 const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
879 const bool &constantColSums = pL.get<bool>("tentative: constant column sums");
880
881 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums,Exceptions::RuntimeError,
882 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
883
884 // Aggregates map is based on the amalgamated column map
885 // We can skip global-to-local conversion if LIDs in row map are
886 // same as LIDs in column map
887 bool goodMap = MueLu::Utilities<SC,LO,GO,NO>::MapsAreNested(*rowMap, *colMap);
888
889 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
890 // aggStart is a pointer into aggToRowMapLO
891 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
892 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
893 ArrayRCP<LO> aggStart;
894 ArrayRCP<LO> aggToRowMapLO;
895 ArrayRCP<GO> aggToRowMapGO;
896 if (goodMap) {
897 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
898 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
899
900 } else {
901 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
902 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
903 << "using GO->LO conversion with performance penalty" << std::endl;
904 }
905 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
906
907 // Pull out the nullspace vectors so that we can have random access.
908 ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
909 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
910 for (size_t i = 0; i < NSDim; i++) {
911 fineNS[i] = fineNullspace->getData(i);
912 if (coarseMap->getLocalNumElements() > 0)
913 coarseNS[i] = coarseNullspace->getDataNonConst(i);
914 }
915
916 size_t nnzEstimate = numRows * NSDim;
917
918 // Time to construct the matrix and fill in the values
919 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
920 RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
921
922 ArrayRCP<size_t> iaPtent;
923 ArrayRCP<LO> jaPtent;
924 ArrayRCP<SC> valPtent;
925
926 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
927
928 ArrayView<size_t> ia = iaPtent();
929 ArrayView<LO> ja = jaPtent();
930 ArrayView<SC> val = valPtent();
931
932 ia[0] = 0;
933 for (size_t i = 1; i <= numRows; i++)
934 ia[i] = ia[i-1] + NSDim;
935
936 for (size_t j = 0; j < nnzEstimate; j++) {
937 ja [j] = INVALID;
938 val[j] = zero;
939 }
940
941
942 if (doQRStep) {
944 // Standard aggregate-wise QR //
946 for (GO agg = 0; agg < numAggs; agg++) {
947 LO aggSize = aggStart[agg+1] - aggStart[agg];
948
949 Xpetra::global_size_t offset = agg*NSDim;
950
951 // Extract the piece of the nullspace corresponding to the aggregate, and
952 // put it in the flat array, "localQR" (in column major format) for the
953 // QR routine.
954 Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
955 if (goodMap) {
956 for (size_t j = 0; j < NSDim; j++)
957 for (LO k = 0; k < aggSize; k++)
958 localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
959 } else {
960 for (size_t j = 0; j < NSDim; j++)
961 for (LO k = 0; k < aggSize; k++)
962 localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
963 }
964
965 // Test for zero columns
966 for (size_t j = 0; j < NSDim; j++) {
967 bool bIsZeroNSColumn = true;
968
969 for (LO k = 0; k < aggSize; k++)
970 if (localQR(k,j) != zero)
971 bIsZeroNSColumn = false;
972
973 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
974 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
975 }
976
977 // Calculate QR decomposition (standard)
978 // NOTE: Q is stored in localQR and R is stored in coarseNS
979 if (aggSize >= Teuchos::as<LO>(NSDim)) {
980
981 if (NSDim == 1) {
982 // Only one nullspace vector, calculate Q and R by hand
983 Magnitude norm = STS::magnitude(zero);
984 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
985 norm += STS::magnitude(localQR(k,0)*localQR(k,0));
986 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
987
988 // R = norm
989 coarseNS[0][offset] = norm;
990
991 // Q = localQR(:,0)/norm
992 for (LO i = 0; i < aggSize; i++)
993 localQR(i,0) /= norm;
994
995 } else {
996 Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
997 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
998 qrSolver.factor();
999
1000 // R = upper triangular part of localQR
1001 for (size_t j = 0; j < NSDim; j++)
1002 for (size_t k = 0; k <= j; k++)
1003 coarseNS[j][offset+k] = localQR(k,j); //TODO is offset+k the correct local ID?!
1004
1005 // Calculate Q, the tentative prolongator.
1006 // The Lapack GEQRF call only works for myAggsize >= NSDim
1007 qrSolver.formQ();
1008 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
1009 for (size_t j = 0; j < NSDim; j++)
1010 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
1011 localQR(i,j) = (*qFactor)(i,j);
1012 }
1013
1014 } else {
1015 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
1016
1017 // The local QR decomposition is not possible in the "overconstrained"
1018 // case (i.e. number of columns in localQR > number of rows), which
1019 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
1020 // is only possible for single node aggregates in structural mechanics.
1021 // (Similar problems may arise in discontinuous Galerkin problems...)
1022 // We bypass the QR decomposition and use an identity block in the
1023 // tentative prolongator for the single node aggregate and transfer the
1024 // corresponding fine level null space information 1-to-1 to the coarse
1025 // level null space part.
1026
1027 // NOTE: The resulting tentative prolongation operator has
1028 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
1029 // coarse level operator A. To deal with that one has the following
1030 // options:
1031 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
1032 // false) to add some identity block to the diagonal of the zero rows
1033 // in the coarse level operator A, such that standard level smoothers
1034 // can be used again.
1035 // - Use special (projection-based) level smoothers, which can deal
1036 // with singular matrices (very application specific)
1037 // - Adapt the code below to avoid zero columns. However, we do not
1038 // support a variable number of DOFs per node in MueLu/Xpetra which
1039 // makes the implementation really hard.
1040
1041 // R = extended (by adding identity rows) localQR
1042 for (size_t j = 0; j < NSDim; j++)
1043 for (size_t k = 0; k < NSDim; k++)
1044 if (k < as<size_t>(aggSize))
1045 coarseNS[j][offset+k] = localQR(k,j);
1046 else
1047 coarseNS[j][offset+k] = (k == j ? one : zero);
1048
1049 // Q = I (rectangular)
1050 for (size_t i = 0; i < as<size_t>(aggSize); i++)
1051 for (size_t j = 0; j < NSDim; j++)
1052 localQR(i,j) = (j == i ? one : zero);
1053 }
1054
1055
1056 // Process each row in the local Q factor
1057 // FIXME: What happens if maps are blocked?
1058 for (LO j = 0; j < aggSize; j++) {
1059 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
1060
1061 size_t rowStart = ia[localRow];
1062 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1063 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1064 if (localQR(j,k) != zero) {
1065 ja [rowStart+lnnz] = offset + k;
1066 val[rowStart+lnnz] = localQR(j,k);
1067 lnnz++;
1068 }
1069 }
1070 }
1071 }
1072
1073 } else {
1074 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1075 if (NSDim>1)
1076 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1078 // "no-QR" option //
1080 // Local Q factor is just the fine nullspace support over the current aggregate.
1081 // Local R factor is the identity.
1082 // TODO I have not implemented any special handling for aggregates that are too
1083 // TODO small to locally support the nullspace, as is done in the standard QR
1084 // TODO case above.
1085 if (goodMap) {
1086 for (GO agg = 0; agg < numAggs; agg++) {
1087 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1088 Xpetra::global_size_t offset = agg*NSDim;
1089
1090 // Process each row in the local Q factor
1091 // FIXME: What happens if maps are blocked?
1092 for (LO j = 0; j < aggSize; j++) {
1093
1094 //TODO Here I do not check for a zero nullspace column on the aggregate.
1095 // as is done in the standard QR case.
1096
1097 const LO localRow = aggToRowMapLO[aggStart[agg]+j];
1098
1099 const size_t rowStart = ia[localRow];
1100
1101 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1102 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1103 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
1104 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1105 if (qr_jk != zero) {
1106 ja [rowStart+lnnz] = offset + k;
1107 val[rowStart+lnnz] = qr_jk;
1108 lnnz++;
1109 }
1110 }
1111 }
1112 for (size_t j = 0; j < NSDim; j++)
1113 coarseNS[j][offset+j] = one;
1114 } //for (GO agg = 0; agg < numAggs; agg++)
1115
1116 } else {
1117 for (GO agg = 0; agg < numAggs; agg++) {
1118 const LO aggSize = aggStart[agg+1] - aggStart[agg];
1119 Xpetra::global_size_t offset = agg*NSDim;
1120 for (LO j = 0; j < aggSize; j++) {
1121
1122 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
1123
1124 const size_t rowStart = ia[localRow];
1125
1126 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1127 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1128 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
1129 if(constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1130 if (qr_jk != zero) {
1131 ja [rowStart+lnnz] = offset + k;
1132 val[rowStart+lnnz] = qr_jk;
1133 lnnz++;
1134 }
1135 }
1136 }
1137 for (size_t j = 0; j < NSDim; j++)
1138 coarseNS[j][offset+j] = one;
1139 } //for (GO agg = 0; agg < numAggs; agg++)
1140
1141 } //if (goodmap) else ...
1142
1143 } //if doQRStep ... else
1144
1145 // Compress storage (remove all INVALID, which happen when we skip zeros)
1146 // We do that in-place
1147 size_t ia_tmp = 0, nnz = 0;
1148 for (size_t i = 0; i < numRows; i++) {
1149 for (size_t j = ia_tmp; j < ia[i+1]; j++)
1150 if (ja[j] != INVALID) {
1151 ja [nnz] = ja [j];
1152 val[nnz] = val[j];
1153 nnz++;
1154 }
1155 ia_tmp = ia[i+1];
1156 ia[i+1] = nnz;
1157 }
1158 if (rowMap->lib() == Xpetra::UseTpetra) {
1159 // - Cannot resize for Epetra, as it checks for same pointers
1160 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1161 // NOTE: these invalidate ja and val views
1162 jaPtent .resize(nnz);
1163 valPtent.resize(nnz);
1164 }
1165
1166 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1167
1168 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1169
1170
1171 // Managing labels & constants for ESFC
1172 RCP<ParameterList> FCparams;
1173 if(pL.isSublist("matrixmatrix: kernel params"))
1174 FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1175 else
1176 FCparams= rcp(new ParameterList);
1177 // By default, we don't need global constants for TentativeP
1178 FCparams->set("compute global constants",FCparams->get("compute global constants",false));
1179 std::string levelIDs = toString(levelID);
1180 FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
1181 RCP<const Export> dummy_e;
1182 RCP<const Import> dummy_i;
1183
1184 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);
1185 }
1186
1187
1188
1189} //namespace MueLu
1190
1191// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1192
1193#define MUELU_TENTATIVEPFACTORY_SHORT
1194#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
static const NoFactory * get()
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.