732 using Teuchos::rcp_const_cast;
733 using Teuchos::rcp_dynamic_cast;
734 using Teuchos::Array;
735 using Teuchos::ArrayView;
736 const char prefix[] =
"Ifpack2::RILUK::compute: ";
741 TEUCHOS_TEST_FOR_EXCEPTION
742 (A_.is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
743 "call setMatrix() with a nonnull input before calling this method.");
744 TEUCHOS_TEST_FOR_EXCEPTION
745 (! A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
746 "fill complete. You may not invoke initialize() or compute() with this "
747 "matrix until the matrix is fill complete. If your matrix is a "
748 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
749 "range Maps, if appropriate) before calling this method.");
751 if (! isInitialized ()) {
755 Teuchos::Time timer (
"RILUK::compute");
758 Teuchos::TimeMonitor timeMon (timer);
759 double startTime = timer.wallTime();
763 if (!this->isKokkosKernelsSpiluk_) {
766 initAllValues (*A_local_);
772 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
774 size_t NumIn, NumL, NumU;
777 const size_t MaxNumEntries =
778 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
780 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
781 Teuchos::Array<scalar_type> InV(MaxNumEntries);
782 size_t num_cols = U_->getColMap()->getLocalNumElements();
783 Teuchos::Array<int> colflag(num_cols);
785 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
789 for (
size_t j = 0; j < num_cols; ++j) {
792 using IST =
typename row_matrix_type::impl_scalar_type;
793 for (
size_t i = 0; i < L_->getLocalNumRows (); ++i) {
797 local_inds_host_view_type UUI;
798 values_host_view_type UUV;
802 NumIn = MaxNumEntries;
803 nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
804 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()),MaxNumEntries);
806 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
809 InI[NumL] = local_row;
811 nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
812 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
814 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
818 for (
size_t j = 0; j < NumIn; ++j) {
824 for (
size_t jj = 0; jj < NumL; ++jj) {
826 IST multiplier = InV[jj];
830 U_->getLocalRowView(j, UUI, UUV);
833 if (RelaxValue_ == STM::zero ()) {
834 for (
size_t k = 0; k < NumUU; ++k) {
835 const int kk = colflag[UUI[k]];
840 InV[kk] -=
static_cast<scalar_type>(multiplier * UUV[k]);
846 for (
size_t k = 0; k < NumUU; ++k) {
850 const int kk = colflag[UUI[k]];
852 InV[kk] -=
static_cast<scalar_type>(multiplier*UUV[k]);
855 diagmod -=
static_cast<scalar_type>(multiplier*UUV[k]);
863 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
868 if (RelaxValue_ != STM::zero ()) {
869 DV(i) += RelaxValue_*diagmod;
872 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
873 if (STS::real (DV(i)) < STM::zero ()) {
874 DV(i) = -MinDiagonalValue;
877 DV(i) = MinDiagonalValue;
884 for (
size_t j = 0; j < NumU; ++j) {
890 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
894 for (
size_t j = 0; j < NumIn; ++j) {
895 colflag[InI[j]] = -1;
906 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
907 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
910 L_solver_->setMatrix (L_);
911 L_solver_->compute ();
912 U_solver_->setMatrix (U_);
913 U_solver_->compute ();
917 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
918 if(A_local_crs.is_null()) {
920 Array<size_t> entriesPerRow(numRows);
922 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
924 RCP<crs_matrix_type> A_local_crs_nc =
926 A_local_->getColMap (),
929 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
930 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
932 size_t numEntries = 0;
933 A_local_->getLocalRowCopy(i, indices, values, numEntries);
934 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()),indices.data());
936 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
937 A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
939 auto lclMtx = A_local_crs->getLocalMatrixDevice();
940 A_local_rowmap_ = lclMtx.graph.row_map;
941 A_local_entries_ = lclMtx.graph.entries;
942 A_local_values_ = lclMtx.values;
948 if (L_->isStaticGraph () || L_->isLocallyIndexed ()) {
949 L_->setAllToScalar (STS::zero ());
950 U_->setAllToScalar (STS::zero ());
953 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
956 auto lclL = L_->getLocalMatrixDevice();
957 row_map_type L_rowmap = lclL.graph.row_map;
958 auto L_entries = lclL.graph.entries;
959 auto L_values = lclL.values;
961 auto lclU = U_->getLocalMatrixDevice();
962 row_map_type U_rowmap = lclU.graph.row_map;
963 auto U_entries = lclU.graph.entries;
964 auto U_values = lclU.values;
966 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
967 A_local_rowmap_, A_local_entries_, A_local_values_,
968 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
971 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
972 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
974 L_solver_->setMatrix (L_);
975 L_solver_->compute ();
976 U_solver_->setMatrix (U_);
977 U_solver_->compute ();
982 computeTime_ += (timer.wallTime() - startTime);
989apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
990 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
991 Teuchos::ETransp mode,
996 using Teuchos::rcpFromRef;
998 TEUCHOS_TEST_FOR_EXCEPTION(
999 A_.is_null (), std::runtime_error,
"Ifpack2::RILUK::apply: The matrix is "
1000 "null. Please call setMatrix() with a nonnull input, then initialize() "
1001 "and compute(), before calling this method.");
1002 TEUCHOS_TEST_FOR_EXCEPTION(
1003 ! isComputed (), std::runtime_error,
1004 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1005 "you must call compute() before calling this method.");
1006 TEUCHOS_TEST_FOR_EXCEPTION(
1007 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1008 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1009 "X.getNumVectors() = " << X.getNumVectors ()
1010 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
1011 TEUCHOS_TEST_FOR_EXCEPTION(
1012 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1013 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1014 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1015 "fixed. There is a FIXME in this file about this very issue.");
1016#ifdef HAVE_IFPACK2_DEBUG
1019 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1020 Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1023 for (
size_t j = 0; j < X.getNumVectors (); ++j) {
1024 if (STM::isnaninf (norms[j])) {
1029 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1036 Teuchos::Time timer (
"RILUK::apply");
1037 double startTime = timer.wallTime();
1039 Teuchos::TimeMonitor timeMon (timer);
1040 if (alpha == one && beta == zero) {
1041 if (mode == Teuchos::NO_TRANS) {
1042#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1046 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1049 L_solver_->apply (X, Y_tmp, mode);
1051 if (!this->isKokkosKernelsSpiluk_) {
1054 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1057 U_solver_->apply (Y_tmp, Y, mode);
1060 L_solver_->apply (X, Y, mode);
1062 if (!this->isKokkosKernelsSpiluk_) {
1065 Y.elementWiseMultiply (one, *D_, Y, zero);
1068 U_solver_->apply (Y, Y, mode);
1072#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1076 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1079 U_solver_->apply (X, Y_tmp, mode);
1081 if (!this->isKokkosKernelsSpiluk_) {
1087 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1090 L_solver_->apply (Y_tmp, Y, mode);
1093 U_solver_->apply (X, Y, mode);
1095 if (!this->isKokkosKernelsSpiluk_) {
1101 Y.elementWiseMultiply (one, *D_, Y, zero);
1104 L_solver_->apply (Y, Y, mode);
1109 if (alpha == zero) {
1119 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1120 apply (X, Y_tmp, mode);
1121 Y.update (alpha, Y_tmp, beta);
1126#ifdef HAVE_IFPACK2_DEBUG
1128 Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1131 for (
size_t j = 0; j < Y.getNumVectors (); ++j) {
1132 if (STM::isnaninf (norms[j])) {
1137 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1142 applyTime_ += (timer.wallTime() - startTime);