Ifpack Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
Euclid_dh.c
Go to the documentation of this file.
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack: Object-Oriented Algebraic Preconditioner Package
5// Copyright (2002) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#include "Euclid_dh.h"
44#include "Mem_dh.h"
45#include "Mat_dh.h"
46#include "Vec_dh.h"
47#include "Factor_dh.h"
48#include "getRow_dh.h"
49#include "ilu_dh.h"
50#include "Parser_dh.h"
51#include "SortedList_dh.h"
52#include "SubdomainGraph_dh.h"
53#include "ExternalRows_dh.h"
54#include "krylov_dh.h"
55
57static void invert_diagonals_private (Euclid_dh ctx);
58static void compute_rho_private (Euclid_dh ctx);
59static void factor_private (Euclid_dh ctx);
60/* static void discard_indices_private(Euclid_dh ctx); */
61static void reduce_timings_private (Euclid_dh ctx);
62
63#undef __FUNC__
64#define __FUNC__ "Euclid_dhCreate"
65void
67{
69 struct _mpi_interface_dh *ctx =
70 (struct _mpi_interface_dh *)
71 MALLOC_DH (sizeof (struct _mpi_interface_dh));
73 *ctxOUT = ctx;
74
75 ctx->isSetup = false;
76
77 ctx->rho_init = 2.0;
78 ctx->rho_final = 0.0;
79
80 ctx->m = 0;
81 ctx->n = 0;
82 ctx->rhs = NULL;
83 ctx->A = NULL;
84 ctx->F = NULL;
85 ctx->sg = NULL;
86
87 ctx->scale = NULL;
88 ctx->isScaled = false;
89 ctx->work = NULL;
90 ctx->work2 = NULL;
91 ctx->from = 0;
92 ctx->to = 0;
93
94 strcpy (ctx->algo_par, "pilu");
95 strcpy (ctx->algo_ilu, "iluk");
96 ctx->level = 1;
97 ctx->droptol = DEFAULT_DROP_TOL;
98 ctx->sparseTolA = 0.0;
99 ctx->sparseTolF = 0.0;
100 ctx->pivotMin = 0.0;
101 ctx->pivotFix = PIVOT_FIX_DEFAULT;
102 ctx->maxVal = 0.0;
103
104 ctx->slist = NULL;
105 ctx->extRows = NULL;
106
107 strcpy (ctx->krylovMethod, "bicgstab");
108 ctx->maxIts = 200;
109 ctx->rtol = 1e-5;
110 ctx->atol = 1e-50;
111 ctx->its = 0;
112 ctx->itsTotal = 0;
113 ctx->setupCount = 0;
114 ctx->logging = 0;
115 ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats"));
116
117 {
118 int i;
119 for (i = 0; i < TIMING_BINS; ++i)
120 ctx->timing[i] = 0.0;
121 for (i = 0; i < STATS_BINS; ++i)
122 ctx->stats[i] = 0.0;
123 }
124 ctx->timingsWereReduced = false;
125
126 ++ref_counter;
128
129
130#undef __FUNC__
131#define __FUNC__ "Euclid_dhDestroy"
132void
134{
136 if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging)
137 {
138 /* insert switch so memory report will also be printed */
139 Parser_dhInsert (parser_dh, "-eu_mem", "1");
141 Euclid_dhPrintHypreReport (ctx, stdout);
143 }
144
145 if (ctx->setupCount > 1 && ctx->printStats)
146 {
147 Euclid_dhPrintStatsShorter (ctx, stdout);
149 }
150
151 if (ctx->F != NULL)
152 {
153 Factor_dhDestroy (ctx->F);
155 }
156 if (ctx->sg != NULL)
157 {
158 SubdomainGraph_dhDestroy (ctx->sg);
160 }
161 if (ctx->scale != NULL)
162 {
163 FREE_DH (ctx->scale);
165 }
166 if (ctx->work != NULL)
167 {
168 FREE_DH (ctx->work);
170 }
171 if (ctx->work2 != NULL)
172 {
173 FREE_DH (ctx->work2);
175 }
176 if (ctx->slist != NULL)
177 {
178 SortedList_dhDestroy (ctx->slist);
180 }
181 if (ctx->extRows != NULL)
182 {
183 ExternalRows_dhDestroy (ctx->extRows);
185 }
186 FREE_DH (ctx);
188
189 --ref_counter;
191
192
193/* on entry, "A" must have been set. If this context is being
194 reused, user must ensure ???
195*/
196#undef __FUNC__
197#define __FUNC__ "Euclid_dhSetup"
198void
200{
201 START_FUNC_DH int m, n, beg_row;
202 double t1;
203 bool isSetup = ctx->isSetup;
204 bool bj = false;
205
206 /*----------------------------------------------------
207 * If Euclid was previously setup, print summary of
208 * what happened during previous setup/solve
209 *----------------------------------------------------*/
210 if (ctx->setupCount && ctx->printStats)
211 {
212 Euclid_dhPrintStatsShorter (ctx, stdout);
214 ctx->its = 0;
215 }
216
217 /*----------------------------------------------------
218 * zero array for statistical reporting
219 *----------------------------------------------------*/
220 {
221 int i;
222 for (i = 0; i < STATS_BINS; ++i)
223 ctx->stats[i] = 0.0;
224 }
225
226 /*----------------------------------------------------
227 * internal timing
228 *----------------------------------------------------*/
229 ctx->timing[SOLVE_START_T] = MPI_Wtime ();
230 /* sum timing from last linear solve cycle, if any */
231 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
232 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
233
234 if (ctx->F != NULL)
235 {
236 Factor_dhDestroy (ctx->F);
238 ctx->F = NULL;
239 }
240
241 if (ctx->A == NULL)
242 {
243 SET_V_ERROR ("must set ctx->A before calling init");
244 }
245 EuclidGetDimensions (ctx->A, &beg_row, &m, &n);
247 ctx->m = m;
248 ctx->n = n;
249
250 if (Parser_dhHasSwitch (parser_dh, "-print_size"))
251 {
253 ("setting up linear system; global rows: %i local rows: %i (on P_0)\n",
254 n, m);
255 }
256
257 sprintf (msgBuf_dh, "localRow= %i; globalRows= %i; beg_row= %i", m, n,
258 beg_row);
260
261 bj = Parser_dhHasSwitch (parser_dh, "-bj");
262
263 /*------------------------------------------------------------------------
264 * Setup the SubdomainGraph, which contains connectivity and
265 * and permutation information. If this context is being reused,
266 * this may already have been done; if being resused, the underlying
267 * subdomain graph cannot change (user's responsibility?)
268 *------------------------------------------------------------------------*/
269 if (ctx->sg == NULL)
270 {
271 int blocks = np_dh;
272 t1 = MPI_Wtime ();
273 if (np_dh == 1)
274 {
275 Parser_dhReadInt (parser_dh, "-blocks", &blocks);
277 SubdomainGraph_dhCreate (&(ctx->sg));
279 SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A);
281 }
282 else
283 {
284 SubdomainGraph_dhCreate (&(ctx->sg));
286 SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A);
288 }
289 ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1);
290 }
291
292
293/* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */
294
295 /*----------------------------------------------------
296 * for debugging
297 *----------------------------------------------------*/
298 if (Parser_dhHasSwitch (parser_dh, "-doNotFactor"))
299 {
300 goto END_OF_FUNCTION;
301 }
302
303
304 /*----------------------------------------------------
305 * query parser for runtime parameters
306 *----------------------------------------------------*/
307 if (!isSetup)
308 {
311 }
312 if (!strcmp (ctx->algo_par, "bj"))
313 bj = false;
314
315 /*---------------------------------------------------------
316 * allocate and initialize storage for row-scaling
317 * (ctx->isScaled is set in get_runtime_params_private(); )
318 *---------------------------------------------------------*/
319 if (ctx->scale == NULL)
320 {
321 ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
323 }
324 {
325 int i;
326 for (i = 0; i < m; ++i)
327 ctx->scale[i] = 1.0;
328 }
329
330 /*------------------------------------------------------------------
331 * allocate work vectors; used in factorization and triangular solves;
332 *------------------------------------------------------------------*/
333 if (ctx->work == NULL)
334 {
335 ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
337 }
338 if (ctx->work2 == NULL)
339 {
340 ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH));
342 }
343
344 /*-----------------------------------------------------------------
345 * perform the incomplete factorization (this should be, at least
346 * for higher level ILUK, the most time-intensive portion of setup)
347 *-----------------------------------------------------------------*/
348 t1 = MPI_Wtime ();
349 factor_private (ctx);
351 ctx->timing[FACTOR_T] += (MPI_Wtime () - t1);
352
353 /*--------------------------------------------------------------
354 * invert diagonals, for faster triangular solves
355 *--------------------------------------------------------------*/
356 if (strcmp (ctx->algo_par, "none"))
357 {
360 }
361
362 /*--------------------------------------------------------------
363 * compute rho_final: global ratio of nzF/nzA
364 * also, if -sparseA > 0, compute ratio of nzA
365 * used in factorization
366 *--------------------------------------------------------------*/
367 /* for some reason compute_rho_private() was expensive, so now it's
368 an option, unless there's only one mpi task.
369 */
370 if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1)
371 {
372 if (strcmp (ctx->algo_par, "none"))
373 {
374 t1 = MPI_Wtime ();
377 ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1);
378 }
379 }
380
381 /*--------------------------------------------------------------
382 * if using PILU, set up persistent comms and global-to-local
383 * number scheme, for efficient triangular solves.
384 * (Thanks to Edmond Chow for these algorithmic ideas.)
385 *--------------------------------------------------------------*/
386
387 if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1)
388 {
389 t1 = MPI_Wtime ();
390 Factor_dhSolveSetup (ctx->F, ctx->sg);
392 ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1);
393 }
394
395END_OF_FUNCTION:;
396
397 /*-------------------------------------------------------
398 * internal timing
399 *-------------------------------------------------------*/
400 ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]);
401 ctx->setupCount += 1;
402
403 ctx->isSetup = true;
404
406
407
408#undef __FUNC__
409#define __FUNC__ "get_runtime_params_private"
410void
412{
413 START_FUNC_DH char *tmp;
414
415 /* params for use of internal solvers */
416 Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts));
417 Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol));
418 Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol));
419
420 /* parallelization strategy (bj, pilu, none) */
421 tmp = NULL;
422 Parser_dhReadString (parser_dh, "-par", &tmp);
423 if (tmp != NULL)
424 {
425 strcpy (ctx->algo_par, tmp);
426 }
427 if (Parser_dhHasSwitch (parser_dh, "-bj"))
428 {
429 strcpy (ctx->algo_par, "bj");
430 }
431
432
433 /* factorization parameters */
434 Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init));
435 /* inital storage allocation for factor */
436 Parser_dhReadInt (parser_dh, "-level", &ctx->level);
437 Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level);
438
439 if (Parser_dhHasSwitch (parser_dh, "-ilut"))
440 {
441 Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol);
442 ctx->isScaled = true;
443 strcpy (ctx->algo_ilu, "ilut");
444 }
445
446 /* make sure both algo_par and algo_ilu are set to "none,"
447 if at least one is.
448 */
449 if (!strcmp (ctx->algo_par, "none"))
450 {
451 strcpy (ctx->algo_ilu, "none");
452 }
453 else if (!strcmp (ctx->algo_ilu, "none"))
454 {
455 strcpy (ctx->algo_par, "none");
456 }
457
458
459 Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA));
460 /* sparsify A before factoring */
461 Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF));
462 /* sparsify after factoring */
463 Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin));
464 /* adjust pivots if smaller than this */
465 Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix));
466 /* how to adjust pivots */
467
468 /* set row scaling for mandatory cases */
469 if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut"))
470 {
471 ctx->isScaled = true;
472 }
473
474 /* solve method */
475 tmp = NULL;
476 Parser_dhReadString (parser_dh, "-ksp_type", &tmp);
477 if (tmp != NULL)
478 {
479 strcpy (ctx->krylovMethod, tmp);
480
481 if (!strcmp (ctx->krylovMethod, "bcgs"))
482 {
483 strcpy (ctx->krylovMethod, "bicgstab");
484 }
485 }
487
488#undef __FUNC__
489#define __FUNC__ "invert_diagonals_private"
490void
492{
493 START_FUNC_DH REAL_DH * aval = ctx->F->aval;
494 int *diag = ctx->F->diag;
495 if (aval == NULL || diag == NULL)
496 {
497 SET_INFO ("can't invert diags; either F->aval or F->diag is NULL");
498 }
499 else
500 {
501 int i, m = ctx->F->m;
502 for (i = 0; i < m; ++i)
503 {
504 aval[diag[i]] = 1.0 / aval[diag[i]];
505 }
506 }
508
509
510/* records rho_final (max for all processors) */
511#undef __FUNC__
512#define __FUNC__ "compute_rho_private"
513void
515{
516 START_FUNC_DH if (ctx->F != NULL)
517 {
518 double bufLocal[3], bufGlobal[3];
519 int m = ctx->m;
520
521 ctx->stats[NZF_STATS] = (double) ctx->F->rp[m];
522 bufLocal[0] = ctx->stats[NZA_STATS]; /* nzA */
523 bufLocal[1] = ctx->stats[NZF_STATS]; /* nzF */
524 bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */
525
526 if (np_dh == 1)
527 {
528 bufGlobal[0] = bufLocal[0];
529 bufGlobal[1] = bufLocal[1];
530 bufGlobal[2] = bufLocal[2];
531 }
532 else
533 {
534 MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0,
535 comm_dh);
536 }
537
538 if (myid_dh == 0)
539 {
540
541 /* compute rho */
542 if (bufGlobal[0] && bufGlobal[1])
543 {
544 ctx->rho_final = bufGlobal[1] / bufGlobal[0];
545 }
546 else
547 {
548 ctx->rho_final = -1;
549 }
550
551 /* compute ratio of nonzeros in A that were used */
552 if (bufGlobal[0] && bufGlobal[2])
553 {
554 ctx->stats[NZA_RATIO_STATS] =
555 100.0 * bufGlobal[2] / bufGlobal[0];
556 }
557 else
558 {
559 ctx->stats[NZA_RATIO_STATS] = 100.0;
560 }
561 }
562 }
564
565#undef __FUNC__
566#define __FUNC__ "factor_private"
567void
569{
571 /*-------------------------------------------------------------
572 * special case, for testing/debugging: no preconditioning
573 *-------------------------------------------------------------*/
574 if (!strcmp (ctx->algo_par, "none"))
575 {
576 goto DO_NOTHING;
577 }
578
579 /*-------------------------------------------------------------
580 * Initialize object to hold factor.
581 *-------------------------------------------------------------*/
582 {
583 int br = 0;
584 int id = np_dh;
585 if (ctx->sg != NULL)
586 {
587 br = ctx->sg->beg_rowP[myid_dh];
588 id = ctx->sg->o2n_sub[myid_dh];
589 }
590 Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F));
592 ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh];
593 ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count;
594 if (!strcmp (ctx->algo_par, "bj"))
595 ctx->F->blockJacobi = true;
596 if (Parser_dhHasSwitch (parser_dh, "-bj"))
597 ctx->F->blockJacobi = true;
598 }
599
600 /*-------------------------------------------------------------
601 * single mpi task with single or multiple subdomains
602 *-------------------------------------------------------------*/
603 if (np_dh == 1)
604 {
605
606 /* ILU(k) factorization */
607 if (!strcmp (ctx->algo_ilu, "iluk"))
608 {
609 ctx->from = 0;
610 ctx->to = ctx->m;
611
612 /* only for debugging: use ilu_mpi_pilu */
613 if (Parser_dhHasSwitch (parser_dh, "-mpi"))
614 {
615 if (ctx->sg != NULL && ctx->sg->blocks > 1)
616 {
618 ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1");
619 }
620 iluk_mpi_pilu (ctx);
622 }
623
624 /* "normal" operation */
625 else
626 {
627 iluk_seq_block (ctx);
629 /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj. */
630 }
631 }
632
633 /* ILUT factorization */
634 else if (!strcmp (ctx->algo_ilu, "ilut"))
635 {
636 ctx->from = 0;
637 ctx->to = ctx->m;
638 ilut_seq (ctx);
640 }
641
642 /* all other factorization methods */
643 else
644 {
645 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
646 ctx->algo_ilu);
648 }
649 }
650
651 /*-------------------------------------------------------------
652 * multiple mpi tasks with multiple subdomains
653 *-------------------------------------------------------------*/
654 else
655 {
656 /* block jacobi */
657 if (!strcmp (ctx->algo_par, "bj"))
658 {
659 ctx->from = 0;
660 ctx->to = ctx->m;
661 iluk_mpi_bj (ctx);
663 }
664
665 /* iluk */
666 else if (!strcmp (ctx->algo_ilu, "iluk"))
667 {
668 bool bj = ctx->F->blockJacobi; /* for debugging */
669
670 /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */
671
672 SortedList_dhCreate (&(ctx->slist));
674 SortedList_dhInit (ctx->slist, ctx->sg);
676 ExternalRows_dhCreate (&(ctx->extRows));
678 ExternalRows_dhInit (ctx->extRows, ctx);
680
681 /* factor interior rows */
682 ctx->from = 0;
683 ctx->to = ctx->F->first_bdry;
684
685/*
686if (Parser_dhHasSwitch(parser_dh, "-test")) {
687 printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh);
688 iluk_seq(ctx); CHECK_V_ERROR;
689} else {
690 iluk_mpi_pilu(ctx); CHECK_V_ERROR;
691}
692*/
693
694 iluk_seq (ctx);
696
697 /* get external rows from lower ordered neighbors in the
698 subdomain graph; these rows are needed for factoring
699 this subdomain's boundary rows.
700 */
701 if (!bj)
702 {
703 ExternalRows_dhRecvRows (ctx->extRows);
705 }
706
707 /* factor boundary rows */
708 ctx->from = ctx->F->first_bdry;
709 ctx->to = ctx->F->m;
710 iluk_mpi_pilu (ctx);
712
713 /* send this processor's boundary rows to higher ordered
714 neighbors in the subdomain graph.
715 */
716 if (!bj)
717 {
718 ExternalRows_dhSendRows (ctx->extRows);
720 }
721
722 /* discard column indices in factor if they would alter
723 the subdomain graph (any such elements are in upper
724 triangular portion of the row)
725 */
726
727
728 SortedList_dhDestroy (ctx->slist);
730 ctx->slist = NULL;
731 ExternalRows_dhDestroy (ctx->extRows);
733 ctx->extRows = NULL;
734 }
735
736 /* all other factorization methods */
737 else
738 {
739 sprintf (msgBuf_dh, "factorization method: %s is not implemented",
740 ctx->algo_ilu);
742 }
743 }
744
745DO_NOTHING:;
746
748
749#if 0
750
751#undef __FUNC__
752#define __FUNC__ "discard_indices_private"
753void
754discard_indices_private (Euclid_dh ctx)
755{
757#if 0
758 int *rp = ctx->F->rp, *cval = ctx->F->cval;
759 double *aval = ctx->F->aval;
760 int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount;
761 int i, j, k, idx, count = 0, start_of_row;
762 int beg_row = ctx->beg_row, end_row = beg_row + m;
763 int *diag = ctx->F->diag;
764
765 /* if col is not locally owned, and doesn't belong to a
766 * nabor in the (original) subdomain graph, we need to discard
767 * the column index and associated value. First, we'll flag all
768 * such indices for deletion.
769 */
770 for (i = 0; i < m; ++i)
771 {
772 for (j = rp[i]; j < rp[i + 1]; ++j)
773 {
774 int col = cval[j];
775 if (col < beg_row || col >= end_row)
776 {
777 bool flag = true;
778 int owner = find_owner_private_mpi (ctx, col);
780
781 for (k = 0; k < nc; ++k)
782 {
783 if (nabors[k] == owner)
784 {
785 flag = false;
786 break;
787 }
788 }
789
790 if (flag)
791 {
792 cval[j] = -1;
793 ++count;
794 }
795 }
796 }
797 }
798
799 sprintf (msgBuf_dh,
800 "deleting %i indices that would alter the subdomain graph", count);
802
803 /* Second, perform the actual deletion */
804 idx = 0;
805 start_of_row = 0;
806 for (i = 0; i < m; ++i)
807 {
808 for (j = start_of_row; j < rp[i + 1]; ++j)
809 {
810 int col = cval[j];
811 double val = aval[j];
812 if (col != -1)
813 {
814 cval[idx] = col;
815 aval[idx] = val;
816 ++idx;
817 }
818 }
819 start_of_row = rp[i + 1];
820 rp[i + 1] = idx;
821 }
822
823 /* rebuild diagonal pointers */
824 for (i = 0; i < m; ++i)
825 {
826 for (j = rp[i]; j < rp[i + 1]; ++j)
827 {
828 if (cval[j] == i + beg_row)
829 {
830 diag[i] = j;
831 break;
832 }
833 }
834 }
835#endif
837#endif
838
839#undef __FUNC__
840#define __FUNC__ "Euclid_dhSolve"
841void
843{
844 START_FUNC_DH int itsOUT;
845 Mat_dh A = (Mat_dh) ctx->A;
846
847 if (!strcmp (ctx->krylovMethod, "cg"))
848 {
849 cg_euclid (A, ctx, x->vals, b->vals, &itsOUT);
850 ERRCHKA;
851 }
852 else if (!strcmp (ctx->krylovMethod, "bicgstab"))
853 {
854 bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT);
855 ERRCHKA;
856 }
857 else
858 {
859 sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod);
861 }
862 *its = itsOUT;
864
865#undef __FUNC__
866#define __FUNC__ "Euclid_dhPrintStats"
867void
869{
870 START_FUNC_DH double *timing;
871 int nz;
872
873 nz = Factor_dhReadNz (ctx->F);
875 timing = ctx->timing;
876
877 /* add in timing from lasst setup (if any) */
878 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
879 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
880
883
884 fprintf_dh (fp,
885 "\n==================== Euclid report (start) ====================\n");
886 fprintf_dh (fp, "\nruntime parameters\n");
887 fprintf_dh (fp, "------------------\n");
888 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
889 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
890 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
891 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
892 fprintf_dh (fp, " matrix was row scaled: %i\n", ctx->isScaled);
893
894 fprintf_dh (fp, " matrix row count: %i\n", ctx->n);
895 fprintf_dh (fp, " nzF: %i\n", nz);
896 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
897 fprintf_dh (fp, " level: %i\n", ctx->level);
898 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
899
900 fprintf_dh (fp, "\nEuclid timing report\n");
901 fprintf_dh (fp, "--------------------\n");
902 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
904 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
905 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
906 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
908 fprintf_dh (fp, " factorization: %0.2f\n", timing[FACTOR_T]);
909 fprintf_dh (fp, " solve setup: %0.2f\n",
911 fprintf_dh (fp, " rho: %0.2f\n",
912 ctx->timing[COMPUTE_RHO_T]);
913 fprintf_dh (fp, " misc (should be small): %0.2f\n",
917
918 if (ctx->sg != NULL)
919 {
920 SubdomainGraph_dhPrintStats (ctx->sg, fp);
922 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
924 }
925
926
927 fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n");
928 fprintf_dh (fp, "---------------------------------------------------\n");
929 fprintf_dh (fp, " solve method: %s\n", ctx->krylovMethod);
930 fprintf_dh (fp, " maxIts: %i\n", ctx->maxIts);
931 fprintf_dh (fp, " rtol: %g\n", ctx->rtol);
932 fprintf_dh (fp, " atol: %g\n", ctx->atol);
933 fprintf_dh (fp,
934 "\n==================== Euclid report (end) ======================\n");
936
937
938/* nzA ratio and rho refer to most recent solve, if more than
939 one solve (call to Setup) was performed. Other stats
940 are cumulative.
941*/
942#undef __FUNC__
943#define __FUNC__ "Euclid_dhPrintStatsShort"
944void
945Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve,
946 FILE * fp)
947{
948 START_FUNC_DH double *timing = ctx->timing;
949 /* double *stats = ctx->stats; */
950 /* double setup_factor; */
951 /* double setup_other; */
952 double apply_total;
953 double apply_per_it;
954 /* double nzUsedRatio; */
955 double perIt;
956 int blocks = np_dh;
957
958 if (np_dh == 1)
959 blocks = ctx->sg->blocks;
960
963
964 /* setup_factor = timing[FACTOR_T]; */
965 /* setup_other = timing[SETUP_T] - setup_factor; */
966 apply_total = timing[TRI_SOLVE_T];
967 apply_per_it = apply_total / (double) ctx->its;
968 /* nzUsedRatio = stats[NZA_RATIO_STATS]; */
969 perIt = solve / (double) ctx->its;
970
971 fprintf_dh (fp, "\n");
972 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
973 "method", "subdms", "level", "its", "setup", "solve", "total",
974 "perIt", "perIt", "rows");
975 fprintf_dh (fp,
976 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
977 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */
978 blocks, /* number of subdomains */
979 ctx->level, /* level, for ILU(k) */
980 ctx->its, /* iterations */
981 setup, /* total setup time, from caller */
982 solve, /* total setup time, from caller */
983 setup + solve, /* total time, from caller */
984 perIt, /* time per iteration, solver+precond. */
985 apply_per_it, /* time per iteration, solver+precond. */
986 (double) ctx->n /* global unknnowns */
987 );
988
989
990
991
992#if 0
993 fprintf_dh (fp, "\n");
994 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
995 "", "", "", "", "", "setup", "setup", "", "", "", "", "", "");
996
997 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n",
998 "method", "subdms", "level", "its", "total", "factor",
999 "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows");
1000 fprintf_dh (fp,
1001 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n");
1002
1003
1004 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */
1005 blocks, /* number of subdomains */
1006 ctx->level, /* level, for ILU(k) */
1007 ctx->its, /* iterations */
1008 setup, /* total setup time, from caller */
1009 solve, /* total setup time, from caller */
1010 setup_factor, /* pc solve: factorization */
1011 setup_other, /* pc setup: other */
1012 apply_total, /* triangular solve time */
1013 apply_per_it, /* time for one triangular solve */
1014 ctx->rho_final, /* rho */
1015 ctx->sparseTolA, /* sparseA tolerance */
1016 nzUsedRatio, /* percent of A that was used */
1017 (double) ctx->n /* global unknnowns */
1018 );
1019#endif
1020
1021#if 0
1022 /* special: for scalability studies */
1023 fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level",
1024 "subGph", "factor", "solveS", "perIt");
1025 fprintf_dh (fp, "------ ----- ----- ----- ----- ----- WW\n");
1026 fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n",
1027 ctx->algo_par,
1028 ctx->level,
1030 timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it);
1031#endif
1033
1034
1035/* its during last solve; rho; nzaUsed */
1036#undef __FUNC__
1037#define __FUNC__ "Euclid_dhPrintStatsShorter"
1038void
1040{
1041 START_FUNC_DH double *stats = ctx->stats;
1042
1043 int its = ctx->its;
1044 double rho = ctx->rho_final;
1045 double nzUsedRatio = stats[NZA_RATIO_STATS];
1046
1047
1048 fprintf_dh (fp, "\nStats from last linear solve: YY\n");
1049 fprintf_dh (fp, "%6s %6s %6s YY\n", "its", "rho", "A_%");
1050 fprintf_dh (fp, " ----- ----- ----- YY\n");
1051 fprintf_dh (fp, "%6i %6.2f %6.2f YYY\n", its, rho, nzUsedRatio);
1052
1054
1055#undef __FUNC__
1056#define __FUNC__ "Euclid_dhPrintScaling"
1057void
1059{
1060 START_FUNC_DH int i, m = ctx->m;
1061
1062 if (m > 10)
1063 m = 10;
1064
1065 if (ctx->scale == NULL)
1066 {
1067 SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?");
1068 }
1069
1070 fprintf (fp, "\n---------- 1st %i row scaling values:\n", m);
1071 for (i = 0; i < m; ++i)
1072 {
1073 fprintf (fp, " %i %g \n", i + 1, ctx->scale[i]);
1074 }
1076
1077
1078#undef __FUNC__
1079#define __FUNC__ "reduce_timings_private"
1080void
1082{
1083 START_FUNC_DH if (np_dh > 1)
1084 {
1085 double bufOUT[TIMING_BINS];
1086
1087 memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double));
1088 MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0,
1089 comm_dh);
1090 }
1091
1092 ctx->timingsWereReduced = true;
1094
1095#undef __FUNC__
1096#define __FUNC__ "Euclid_dhPrintHypreReport"
1097void
1099{
1100 START_FUNC_DH double *timing;
1101 int nz;
1102
1103 nz = Factor_dhReadNz (ctx->F);
1105 timing = ctx->timing;
1106
1107 /* add in timing from lasst setup (if any) */
1108 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T];
1109 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0;
1110
1113
1114 if (myid_dh == 0)
1115 {
1116
1117 fprintf (fp,
1118 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n");
1119 fprintf_dh (fp, "\nruntime parameters\n");
1120 fprintf_dh (fp, "------------------\n");
1121 fprintf_dh (fp, " setups: %i\n", ctx->setupCount);
1122 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal);
1123 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par);
1124 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu);
1125 if (!strcmp (ctx->algo_ilu, "iluk"))
1126 {
1127 fprintf_dh (fp, " level: %i\n", ctx->level);
1128 }
1129
1130 if (ctx->isScaled)
1131 {
1132 fprintf_dh (fp, " matrix was row scaled\n");
1133 }
1134
1135 fprintf_dh (fp, " global matrix row count: %i\n", ctx->n);
1136 fprintf_dh (fp, " nzF: %i\n", nz);
1137 fprintf_dh (fp, " rho: %g\n", ctx->rho_final);
1138 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA);
1139
1140 fprintf_dh (fp, "\nEuclid timing report\n");
1141 fprintf_dh (fp, "--------------------\n");
1142 fprintf_dh (fp, " solves total: %0.2f (see docs)\n",
1144 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]);
1145 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]);
1146 fprintf_dh (fp, " subdomain graph setup: %0.2f\n",
1148 fprintf_dh (fp, " factorization: %0.2f\n",
1149 timing[FACTOR_T]);
1150 fprintf_dh (fp, " solve setup: %0.2f\n",
1152 fprintf_dh (fp, " rho: %0.2f\n",
1153 ctx->timing[COMPUTE_RHO_T]);
1154 fprintf_dh (fp, " misc (should be small): %0.2f\n",
1158
1159 if (ctx->sg != NULL)
1160 {
1161 SubdomainGraph_dhPrintStats (ctx->sg, fp);
1163 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
1165 }
1166
1167 fprintf (fp,
1168 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n");
1169
1170 }
1171
1173
1174#undef __FUNC__
1175#define __FUNC__ "Euclid_dhPrintTestData"
1176void
1178{
1180 /* Print data that should remain that will hopefully
1181 remain the same for any platform.
1182 Possibly "tri solves" may change . . .
1183 */
1184 if (myid_dh == 0)
1185 {
1186 fprintf (fp, " setups: %i\n", ctx->setupCount);
1187 fprintf (fp, " tri solves: %i\n", ctx->its);
1188 fprintf (fp, " parallelization method: %s\n", ctx->algo_par);
1189 fprintf (fp, " factorization method: %s\n", ctx->algo_ilu);
1190 fprintf (fp, " level: %i\n", ctx->level);
1191 fprintf (fp, " row scaling: %i\n", ctx->isScaled);
1192 }
1193 SubdomainGraph_dhPrintRatios (ctx->sg, fp);
void Euclid_dhPrintStats(Euclid_dh ctx, FILE *fp)
Definition Euclid_dh.c:868
void Euclid_dhPrintTestData(Euclid_dh ctx, FILE *fp)
Definition Euclid_dh.c:1177
static void reduce_timings_private(Euclid_dh ctx)
Definition Euclid_dh.c:1081
void Euclid_dhSolve(Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its)
Definition Euclid_dh.c:842
void Euclid_dhDestroy(Euclid_dh ctx)
Definition Euclid_dh.c:133
static void factor_private(Euclid_dh ctx)
Definition Euclid_dh.c:568
void Euclid_dhSetup(Euclid_dh ctx)
Definition Euclid_dh.c:199
void Euclid_dhCreate(Euclid_dh *ctxOUT)
Definition Euclid_dh.c:66
void Euclid_dhPrintStatsShort(Euclid_dh ctx, double setup, double solve, FILE *fp)
Definition Euclid_dh.c:945
void Euclid_dhPrintHypreReport(Euclid_dh ctx, FILE *fp)
Definition Euclid_dh.c:1098
static void invert_diagonals_private(Euclid_dh ctx)
Definition Euclid_dh.c:491
void Euclid_dhPrintScaling(Euclid_dh ctx, FILE *fp)
Definition Euclid_dh.c:1058
void Euclid_dhPrintStatsShorter(Euclid_dh ctx, FILE *fp)
Definition Euclid_dh.c:1039
static void compute_rho_private(Euclid_dh ctx)
Definition Euclid_dh.c:514
static void get_runtime_params_private(Euclid_dh ctx)
Definition Euclid_dh.c:411
#define DEFAULT_DROP_TOL
Definition Euclid_dh.h:46
@ COMPUTE_RHO_T
Definition Euclid_dh.h:116
@ SUB_GRAPH_T
Definition Euclid_dh.h:113
@ TRI_SOLVE_T
Definition Euclid_dh.h:111
@ SOLVE_START_T
Definition Euclid_dh.h:110
@ TOTAL_SOLVE_T
Definition Euclid_dh.h:119
@ FACTOR_T
Definition Euclid_dh.h:114
@ TOTAL_SOLVE_TEMP_T
Definition Euclid_dh.h:118
@ SETUP_T
Definition Euclid_dh.h:112
@ SOLVE_SETUP_T
Definition Euclid_dh.h:115
#define STATS_BINS
Definition Euclid_dh.h:123
#define TIMING_BINS
Definition Euclid_dh.h:108
@ NZA_STATS
Definition Euclid_dh.h:125
@ NZF_STATS
Definition Euclid_dh.h:126
@ NZA_USED_STATS
Definition Euclid_dh.h:127
@ NZA_RATIO_STATS
Definition Euclid_dh.h:128
void ExternalRows_dhInit(ExternalRows_dh er, Euclid_dh ctx)
void ExternalRows_dhCreate(ExternalRows_dh *er)
void ExternalRows_dhSendRows(ExternalRows_dh er)
void ExternalRows_dhDestroy(ExternalRows_dh er)
void ExternalRows_dhRecvRows(ExternalRows_dh er)
void Factor_dhInit(void *A, bool fillFlag, bool avalFlag, double rho, int id, int beg_rowP, Factor_dh *Fout)
Definition Factor_dh.c:1144
void Factor_dhSolveSetup(Factor_dh mat, SubdomainGraph_dh sg)
Definition Factor_dh.c:649
void Factor_dhDestroy(Factor_dh mat)
Definition Factor_dh.c:116
int Factor_dhReadNz(Factor_dh mat)
Definition Factor_dh.c:219
void Parser_dhInsert(Parser_dh p, char *option, char *value)
Definition Parser_dh.c:335
bool Parser_dhReadInt(Parser_dh p, char *in, int *out)
Definition Parser_dh.c:249
bool Parser_dhHasSwitch(Parser_dh p, char *s)
Definition Parser_dh.c:213
bool Parser_dhReadString(Parser_dh p, char *in, char **out)
Definition Parser_dh.c:287
bool Parser_dhReadDouble(Parser_dh p, char *in, double *out)
Definition Parser_dh.c:272
void SortedList_dhInit(SortedList_dh sList, SubdomainGraph_dh sg)
void SortedList_dhCreate(SortedList_dh *sList)
void SortedList_dhDestroy(SortedList_dh sList)
void SubdomainGraph_dhDestroy(SubdomainGraph_dh s)
void SubdomainGraph_dhInit(SubdomainGraph_dh s, int blocks, bool bj, void *A)
void SubdomainGraph_dhCreate(SubdomainGraph_dh *s)
void SubdomainGraph_dhPrintRatios(SubdomainGraph_dh s, FILE *fp)
void SubdomainGraph_dhPrintStats(SubdomainGraph_dh sg, FILE *fp)
int myid_dh
int np_dh
Parser_dh parser_dh
void printf_dh(char *fmt,...)
int ref_counter
void fprintf_dh(FILE *fp, char *fmt,...)
char msgBuf_dh[MSG_BUF_SIZE_DH]
#define REAL_DH
MPI_Comm comm_dh
struct _mat_dh * Mat_dh
#define MALLOC_DH(s)
#define FREE_DH(p)
#define PIVOT_FIX_DEFAULT
#define ERRCHKA
void EuclidGetDimensions(void *A, int *beg_row, int *rowsLocal, int *rowsGlobal)
Definition getRow_dh.c:89
void iluk_seq(Euclid_dh ctx)
Definition ilu_seq.c:109
void iluk_seq_block(Euclid_dh ctx)
Definition ilu_seq.c:299
void iluk_mpi_bj(Euclid_dh ctx)
Definition ilu_mpi_bj.c:68
void ilut_seq(Euclid_dh ctx)
Definition ilu_seq.c:788
void iluk_mpi_pilu(Euclid_dh ctx)
void bicgstab_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
Definition krylov_dh.c:53
void cg_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
Definition krylov_dh.c:227
#define SET_V_ERROR(msg)
Definition macros_dh.h:126
#define START_FUNC_DH
Definition macros_dh.h:181
#define CHECK_V_ERROR
Definition macros_dh.h:138
#define SET_INFO(msg)
Definition macros_dh.h:156
#define END_FUNC_DH
Definition macros_dh.h:187
double timing[TIMING_BINS]
Definition Euclid_dh.h:189
double stats[STATS_BINS]
Definition Euclid_dh.h:190
double * vals
Definition Vec_dh.h:55