56 int *end_rows,
int reqlen,
57 int *reqind,
int *outlist);
63 not used anyplace, I think;
65 expansion ?[mar 21, 2 K + 1]
66 static
void Mat_dhAllocate_getRow_private (
Mat_dh A);
72#define __FUNC__ "Mat_dhCreate"
101 tmp->len_private = 0;
102 tmp->rowCheckedOut = -1;
103 tmp->cval_private = NULL;
104 tmp->aval_private = NULL;
106 tmp->row_perm = NULL;
110 tmp->recv_req = NULL;
111 tmp->send_req = NULL;
119 tmp->matvecIsSetup =
false;
123 tmp->matvec_timing =
true;
129#define __FUNC__ "Mat_dhDestroy"
142 if (mat->len != NULL)
147 if (mat->cval != NULL)
152 if (mat->aval != NULL)
157 if (mat->diag != NULL)
162 if (mat->fill != NULL)
167 if (mat->cval_private != NULL)
172 if (mat->aval_private != NULL)
177 if (mat->row_perm != NULL)
184 for (i = 0; i < mat->num_recv; i++)
185 MPI_Request_free (&mat->recv_req[i]);
186 for (i = 0; i < mat->num_send; i++)
187 MPI_Request_free (&mat->send_req[i]);
188 if (mat->recv_req != NULL)
193 if (mat->send_req != NULL)
198 if (mat->status != NULL)
203 if (mat->recvbuf != NULL)
208 if (mat->sendbuf != NULL)
213 if (mat->sendind != NULL)
219 if (mat->matvecIsSetup)
224 if (mat->numb != NULL)
236#define __FUNC__ "Mat_dhMatVecSetDown"
247#define __FUNC__ "Mat_dhMatVecSetup"
258 int *outlist, *inlist;
259 int ierr, i, row, *
rp = mat->rp, *
cval = mat->cval;
262 int firstLocal = mat->beg_row;
263 int lastLocal = firstLocal +
m;
264 int *beg_rows, *end_rows;
272 mat->status = (MPI_Status *)
MALLOC_DH (
np_dh *
sizeof (MPI_Status));
287 MPI_Allgather (&firstLocal, 1, MPI_INT, beg_rows, 1, MPI_INT,
293 MPI_Allgather (&lastLocal, 1, MPI_INT, end_rows, 1, MPI_INT,
302 for (i = 0; i <
np_dh; ++i)
321 inlist[0] = outlist[0];
326 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT,
comm_dh);
334 for (row = 0; row <
m; row++)
336 int len =
rp[row + 1] -
rp[row];
337 int *ind =
cval +
rp[row];
358#define __FUNC__ "setup_matvec_receives_private"
361 int reqlen,
int *reqind,
int *outlist)
371 mat->recvbuf = (
double *)
MALLOC_DH ((reqlen +
m) *
sizeof (double));
373 for (i = 0; i < reqlen; i = j)
380 for (j = i + 1; j < reqlen; j++)
383 if (reqind[j] < beg_rows[this_pe] || reqind[j] > end_rows[this_pe])
389 MPI_Isend (&reqind[i], j - i, MPI_INT, this_pe, 444,
comm_dh,
392 ierr = MPI_Request_free (&request);
396 outlist[this_pe] = j - i;
399 MPI_Recv_init (&mat->recvbuf[i +
m], j - i, MPI_DOUBLE, this_pe, 555,
400 comm_dh, &mat->recv_req[mat->num_recv]);
404 mat->recvlen += j - i;
411#define __FUNC__ "setup_matvec_sends_private"
416 MPI_Request *requests;
417 MPI_Status *statuses;
419 requests = (MPI_Request *)
MALLOC_DH (
np_dh *
sizeof (MPI_Request));
421 statuses = (MPI_Status *)
MALLOC_DH (
np_dh *
sizeof (MPI_Status));
426 for (i = 0; i <
np_dh; i++)
436 for (i = 0; i <
np_dh; i++)
442 MPI_Irecv (&mat->sendind[j], inlist[i], MPI_INT, i, 444,
comm_dh,
443 &requests[mat->num_send]);
447 MPI_Send_init (&mat->sendbuf[j], inlist[i], MPI_DOUBLE, i, 555,
448 comm_dh, &mat->send_req[mat->num_send]);
460 ierr = MPI_Waitall (mat->num_send, requests, statuses);
464 for (i = 0; i < mat->sendlen; i++)
465 mat->sendind[i] -= first;
474#define __FUNC__ "Mat_dhMatVec"
486 int ierr, i, row,
m = mat->m;
487 int *
rp = mat->rp, *
cval = mat->cval;
488 double *
aval = mat->aval;
491 double *
sendbuf = mat->sendbuf;
492 double *
recvbuf = mat->recvbuf;
493 double t1 = 0, t2 = 0, t3 = 0, t4 = 0;
494 bool timeFlag = mat->matvec_timing;
514 ierr = MPI_Startall (mat->num_recv, mat->recv_req);
516 ierr = MPI_Startall (mat->num_send, mat->send_req);
518 ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
520 ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
533 for (i = 0; i <
m; i++)
537 for (row = 0; row <
m; row++)
539 int len =
rp[row + 1] -
rp[row];
540 int *ind =
cval +
rp[row];
541 double *val =
aval +
rp[row];
543 for (i = 0; i <
len; i++)
545 temp += (val[i] *
recvbuf[ind[i]]);
562#define __FUNC__ "Mat_dhMatVec_omp"
567 int *
rp = mat->rp, *
cval = mat->cval;
568 double *
aval = mat->aval;
571 double *
sendbuf = mat->sendbuf;
572 double *
recvbuf = mat->recvbuf;
573 double t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0;
576 bool timeFlag = mat->matvec_timing;
591 ierr = MPI_Startall (mat->num_recv, mat->recv_req);
593 ierr = MPI_Startall (mat->num_send, mat->send_req);
595 ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status);
597 ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status);
607 for (i = 0; i <
m; i++)
618 for (row = 0; row <
m; row++)
624 for (i = 0; i <
len; i++)
626 temp += (val[i] *
recvbuf[ind[i]]);
643#define __FUNC__ "Mat_dhMatVec_uni_omp"
648 int *
rp = mat->rp, *
cval = mat->cval;
649 double *
aval = mat->aval;
650 double t1 = 0, t2 = 0;
651 bool timeFlag = mat->matvec_timing;
659 for (row = 0; row <
m; row++)
661 int len =
rp[row + 1] -
rp[row];
662 int *ind =
cval +
rp[row];
663 double *val =
aval +
rp[row];
665 for (i = 0; i <
len; i++)
667 temp += (val[i] * x[ind[i]]);
684#define __FUNC__ "Mat_dhMatVec_uni"
689 int *
rp = mat->rp, *
cval = mat->cval;
690 double *
aval = mat->aval;
691 double t1 = 0, t2 = 0;
692 bool timeFlag = mat->matvec_timing;
697 for (row = 0; row <
m; row++)
699 int len =
rp[row + 1] -
rp[row];
700 int *ind =
cval +
rp[row];
701 double *val =
aval +
rp[row];
703 for (i = 0; i <
len; i++)
705 temp += (val[i] * x[ind[i]]);
721#define __FUNC__ "Mat_dhReadNz"
727 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM,
comm_dh);
736#define __FUNC__ "Mat_dhAllocate_getRow_private"
738Mat_dhAllocate_getRow_private (
Mat_dh A)
744 for (i = 0; i <
m; ++i)
774#define __FUNC__ "Mat_dhZeroTiming"
783 mat->time_max[i] = 0;
784 mat->time_min[i] = 0;
789#define __FUNC__ "Mat_dhReduceTiming"
798 MPI_Allreduce (mat->time, mat->time_min,
MAT_DH_BINS, MPI_DOUBLE, MPI_MIN,
800 MPI_Allreduce (mat->time, mat->time_max,
MAT_DH_BINS, MPI_DOUBLE, MPI_MAX,
805#define __FUNC__ "Mat_dhPermute"
810 int i, j, *RP = A->
rp, *CVAL = A->
cval;
811 int *o2n, *
rp, *
cval,
m = A->
m, nz = RP[
m];
822 for (i = 0; i <
m; ++i)
826 rp = B->rp = (
int *)
MALLOC_DH ((
m + 1) *
sizeof (int));
830 aval = B->aval = (
double *)
MALLOC_DH (nz *
sizeof (
double));
835 for (i = 0; i <
m; ++i)
838 rp[i + 1] = RP[oldRow + 1] - RP[oldRow];
840 for (i = 1; i <=
m; ++i)
841 rp[i] =
rp[i] +
rp[i - 1];
843 for (i = 0; i <
m; ++i)
847 for (j = RP[oldRow]; j < RP[oldRow + 1]; ++j)
849 cval[idx] = o2n[CVAL[j]];
866#define __FUNC__ "Mat_dhPrintGraph"
878 for (pe = 0; pe <
np_dh; ++pe)
887 A->
aval, NULL, NULL, NULL, fp);
904#define __FUNC__ "Mat_dhPrintRows"
925 "\n----- A, unpermuted ------------------------------------\n");
926 for (i = 0; i <
m; ++i)
928 fprintf (fp,
"%i :: ", 1 + i +
beg_row);
929 for (j =
rp[i]; j <
rp[i + 1]; ++j)
933 fprintf (fp,
"%i ", 1 +
cval[j]);
937 fprintf (fp,
"%i,%g ; ", 1 +
cval[j],
aval[j]);
952 for (i = 0; i < sg->
blocks; ++i)
964 "\n----- A, permuted, single mpi task ------------------\n");
965 fprintf (fp,
"---- new subdomain: %i; old subdomain: %i\n", i,
967 fprintf (fp,
" old beg_row: %i; new beg_row: %i\n",
969 fprintf (fp,
" local rows in this block: %i\n",
971 fprintf (fp,
" bdry rows in this block: %i\n",
973 fprintf (fp,
" 1st bdry row= %i \n",
976 for (oldRow =
beg_row; oldRow < end_row; ++oldRow)
981 fprintf (fp,
"%3i (old= %3i) :: ", idx, 1 + oldRow);
986 for (k = 0; k <
len; ++k)
1017 for (i = 0; i <
m; ++i)
1019 int row = n2o_row[i];
1020 fprintf (fp,
"%3i (old= %3i) :: ", 1 + i + beg_rowP,
1022 for (j =
rp[row]; j <
rp[row + 1]; ++j)
1030 col = o2n_col[col -
beg_row] + beg_rowP;
1042 "nonlocal column= %i not in hash table",
1054 fprintf (fp,
"%i ", 1 + col);
1058 fprintf (fp,
"%i,%g ; ", 1 + col,
aval[j]);
1069#define __FUNC__ "Mat_dhPrintTriples"
1093 for (pe = 0; pe <
np_dh; ++pe)
1109 for (i = 0; i <
m; ++i)
1111 for (j =
rp[i]; j <
rp[i + 1]; ++j)
1115 fprintf (fp,
"%i %i\n", 1 + i +
beg_row,
1121 if (val == 0.0 && matlab)
1137 else if (
np_dh == 1)
1139 int i, j, k, idx = 1;
1144 for (i = 0; i < sg->
blocks; ++i)
1146 int oldBlock = sg->
n2o_sub[i];
1150 for (j =
beg_row; j < end_row; ++j)
1161 for (k = 0; k <
len; ++k)
1163 fprintf (fp,
"%i %i\n", idx, 1 + sg->
o2n_col[
cval[k]]);
1170 for (k = 0; k <
len; ++k)
1172 double val =
aval[k];
1173 if (val == 0.0 && matlab)
1198 for (pe = 0; pe <
np_dh; ++pe)
1214 for (i = 0; i <
m; ++i)
1216 int row = n2o_row[i];
1217 for (j =
rp[row]; j <
rp[row + 1]; ++j)
1224 if (val == 0.0 && matlab)
1231 col = o2n_col[col -
beg_row] + beg_rowP;
1243 "nonlocal column= %i not in hash table",
1255 fprintf (fp,
"%i %i\n", 1 + i + beg_rowP, 1 + col);
1274#define __FUNC__ "Mat_dhPrintCSR"
1282 SET_V_ERROR (
"only implemented for a single mpi task");
1287 (
"not implemented for reordered matrix (SubdomainGraph_dh should be NULL)");
1310#define __FUNC__ "Mat_dhPrintBIN"
1316 SET_V_ERROR (
"only implemented for a single MPI task");
1322 SET_V_ERROR (
"not implemented for reordering; ensure sg=NULL");
1326 NULL, NULL, NULL, filename);
1336#define __FUNC__ "Mat_dhReadCSR"
1345 SET_V_ERROR (
"only implemented for a single MPI task");
1364#define __FUNC__ "Mat_dhReadTriples"
1373 SET_V_ERROR (
"only implemented for a single MPI task");
1395#define __FUNC__ "Mat_dhReadBIN"
1403 SET_V_ERROR (
"only implemented for a single MPI task");
1415#define __FUNC__ "Mat_dhTranspose"
1436#define __FUNC__ "Mat_dhMakeStructurallySymmetric"
1455#define __FUNC__ "Mat_dhFixDiags"
1465 for (i = 0; i <
m; ++i)
1468 for (j =
rp[i]; j <
rp[i + 1]; ++j)
1485 (
"\nMat_dhFixDiags:: %i diags not explicitly present; inserting!\n",
1495 for (i = 0; i <
m; ++i)
1498 for (j =
rp[i]; j <
rp[i + 1]; ++j)
1500 sum += fabs (
aval[j]);
1502 for (j =
rp[i]; j <
rp[i + 1]; ++j)
1514#define __FUNC__ "insert_diags_private"
1521 int nz = RP[
m] + ct;
1532 for (i = 0; i <
m; ++i)
1535 for (j = RP[i]; j < RP[i + 1]; ++j)
1537 cval[idx] = CVAL[j];
1538 aval[idx] = AVAL[j];
1563#define __FUNC__ "Mat_dhPrintDiags"
1572 "=================== diagonal elements ====================\n");
1573 for (i = 0; i <
m; ++i)
1576 for (j =
rp[i]; j <
rp[i + 1]; ++j)
1580 fprintf (fp,
"%i %g\n", i + 1,
aval[j]);
1587 fprintf (fp,
"%i ---------- missing\n", i + 1);
1594#define __FUNC__ "Mat_dhGetRow"
1602 "requested globalRow= %i, which is local row= %i, but only have %i rows!",
1603 globalRow, row, B->
m);
1606 *
len = B->
rp[row + 1] - B->
rp[row];
1608 *ind = B->
cval + B->
rp[row];
1610 *val = B->
aval + B->
rp[row];
1614#define __FUNC__ "Mat_dhRestoreRow"
1621#define __FUNC__ "Mat_dhRowPermute"
1626 SET_V_ERROR (
"turned off; compilation problem on blue");
1629 int i, j,
m = mat->m, nz = mat->rp[
m];
1633 bool debug = mat->debug;
1638* = 1:Compute a row permutation of the matrix so that the
1639 * permuted matrix has as many entries on its diagonal as
1640 * possible.The values on the diagonal are of arbitrary size.
1641 * HSL subroutine MC21A / AD is used
for this
1642 . * = 2: Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3:Compute a row permutation of the matrix so that the smallest
1643 * value on the diagonal of the permuted matrix is maximized.
1644 * The algorithm differs from the one used
for JOB
1645 = 2 and may * have quite a different performance. * = 4: Compute a row permutation of the matrix so that the sum * of the diagonal entries of the permuted matrix is maximized. * = 5:Compute a row permutation of the matrix so that the product
1646 * of the diagonal entries of the permuted matrix is maximized
1647 * and vectors to scale the matrix so that the nonzero diagonal
1648 * entries of the permuted matrix are one in absolute value and
1649 * all the off - diagonal entries are less than or equal to one in
1658 sprintf (
msgBuf_dh,
"calling row permutation with algo= %i", algo);
1661 r1 = (
double *)
MALLOC_DH (
m *
sizeof (
double));
1663 c1 = (
double *)
MALLOC_DH (
m *
sizeof (
double));
1665 if (mat->row_perm == NULL)
1667 mat->row_perm = o2n = (
int *)
MALLOC_DH (
m *
sizeof (
int));
1672 o2n = mat->row_perm;
1683 for (i = 0; i < nz; ++i)
1689 fprintf (
logFile,
"\n-------- row permutation vector --------\n");
1690 for (i = 0; i <
m; ++i)
1691 fprintf (
logFile,
"%i ", 1 + o2n[i]);
1696 printf (
"\n-------- row permutation vector --------\n");
1697 for (i = 0; i <
m; ++i)
1698 printf (
"%i ", 1 + o2n[i]);
1705 for (i = 0; i <
m; ++i)
1716 printf (
"@@@ [%i] Mat_dhRowPermute :: got natural ordering!\n",
1727 (
"@@@ [%i] Mat_dhRowPermute :: scaling matrix rows and columns!\n",
1731 for (i = 0; i <
m; i++)
1733 r1[i] = exp (r1[i]);
1734 c1[i] = exp (c1[i]);
1736 for (i = 0; i <
m; i++)
1737 for (j =
rp[i]; j <
rp[i + 1]; j++)
1742 mat->rp, mat->cval, mat->aval);
1760#define __FUNC__ "Mat_dhPartition"
1765 int *RP = mat->rp, *CVAL = mat->cval;
1767 int i, j, *
rp, *
cval, idx = 0;
1769 rp = *rpOUT = (
int *)
MALLOC_DH ((
m + 1) *
sizeof (int));
1776 for (i = 0; i <
m; ++i)
1778 for (j = RP[i]; j < RP[i + 1]; ++j)
1792#define __FUNC__ "Mat_dhPartition"
1795 int **beg_rowOUT,
int **row_countOUT,
int **n2oOUT,
1799#ifndef HAVE_METIS_DH
1804 int *
beg_row, *row_count, *n2o, *o2n, bk,
new, *part;
1806 int i, cutEdgeCount;
1808 int metisOpts[5] = { 0, 0, 0, 0, 0 };
1814 row_count = *row_countOUT = (
int *)
MALLOC_DH (blocks *
sizeof (
int));
1816 *n2oOUT = n2o = (
int *)
MALLOC_DH (
m *
sizeof (
int));
1818 *o2nOUT = o2n = (
int *)
MALLOC_DH (
m *
sizeof (
int));
1822== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == = Metis arguments:
1824 n - number of nodes
rp[],
cval[]NULL, NULL, 0
1826 blocksIN, options[5] = 0::0 / 1 use defauls;
1831 == == == == == == == == == == == == == == == == == == == == == == == == ==
1841 METIS_PartGraphKway (&
m,
rp,
cval, NULL, NULL,
1842 &zero, &zero, &blocks, metisOpts, &cutEdgeCount, part);
1851 printf_dh (
"\nmetis partitioning vector; blocks= %i\n", blocks);
1852 for (i = 0; i <
m; ++i)
1857 for (i = 0; i < blocks; ++i)
1859 for (i = 0; i <
m; ++i)
1865 for (i = 1; i < blocks; ++i)
1871 for (i = 0; i < blocks; ++i)
1874 for (i = 0; i < blocks; ++i)
1881 int *tmp = (
int *)
MALLOC_DH (blocks *
sizeof (
int));
1883 memcpy (tmp,
beg_row, blocks *
sizeof (
int));
1884 for (i = 0; i <
m; ++i)
int Hash_i_dhLookup(Hash_i_dh h, int key)
void Mat_dhRowPermute(Mat_dh mat)
void Mat_dhPrintDiags(Mat_dh A, FILE *fp)
void Mat_dhDestroy(Mat_dh mat)
void Mat_dhPrintGraph(Mat_dh A, SubdomainGraph_dh sg, FILE *fp)
void Mat_dhReadBIN(Mat_dh *mat, char *filename)
void Mat_dhPartition(Mat_dh mat, int blocks, int **beg_rowOUT, int **row_countOUT, int **n2oOUT, int **o2nOUT)
int Mat_dhReadNz(Mat_dh mat)
void Mat_dhPrintCSR(Mat_dh A, SubdomainGraph_dh sg, char *filename)
void Mat_dhTranspose(Mat_dh A, Mat_dh *Bout)
void Mat_dhPermute(Mat_dh A, int *n2o, Mat_dh *Bout)
void Mat_dhCreate(Mat_dh *mat)
void Mat_dhPrintBIN(Mat_dh A, SubdomainGraph_dh sg, char *filename)
void Mat_dhPrintRows(Mat_dh A, SubdomainGraph_dh sg, FILE *fp)
void Mat_dhMatVec(Mat_dh mat, double *x, double *b)
void Mat_dhReduceTiming(Mat_dh mat)
static void setup_matvec_receives_private(Mat_dh mat, int *beg_rows, int *end_rows, int reqlen, int *reqind, int *outlist)
void Mat_dhMatVecSetup(Mat_dh mat)
void insert_diags_private(Mat_dh A, int ct)
void Mat_dhPrintTriples(Mat_dh A, SubdomainGraph_dh sg, char *filename)
void Mat_dhMatVec_uni(Mat_dh mat, double *x, double *b)
void Mat_dhRestoreRow(Mat_dh B, int row, int *len, int **ind, double **val)
void Mat_dhFixDiags(Mat_dh A)
void Mat_dhZeroTiming(Mat_dh mat)
void Mat_dhMakeStructurallySymmetric(Mat_dh A)
void Mat_dhReadTriples(Mat_dh *mat, int ignore, char *filename)
void Mat_dhMatVec_uni_omp(Mat_dh mat, double *x, double *b)
static void setup_matvec_sends_private(Mat_dh mat, int *inlist)
void Mat_dhMatVec_omp(Mat_dh mat, double *x, double *b)
void Mat_dhGetRow(Mat_dh B, int globalRow, int *len, int **ind, double **val)
void build_adj_lists_private(Mat_dh mat, int **rpOUT, int **cvalOUT)
void Mat_dhMatVecSetdown(Mat_dh mat)
void Mat_dhReadCSR(Mat_dh *mat, char *filename)
#define MATVEC_TOTAL_TIME
void dldperm(int job, int n, int nnz, int colptr[], int adjncy[], double nzval[], int *perm, double u[], double v[])
void Numbering_dhDestroy(Numbering_dh numb)
void Numbering_dhCreate(Numbering_dh *numb)
void Numbering_dhGlobalToLocal(Numbering_dh numb, int len, int *global, int *local)
void Numbering_dhSetup(Numbering_dh numb, Mat_dh mat)
bool Parser_dhReadInt(Parser_dh p, char *in, int *out)
bool Parser_dhHasSwitch(Parser_dh p, char *s)
void printf_dh(char *fmt,...)
char msgBuf_dh[MSG_BUF_SIZE_DH]
FILE * openFile_dh(const char *filenameIN, const char *modeIN)
void closeFile_dh(FILE *fpIN)
void io_dh_print_ebin_mat_private(int m, int beg_row, int *rp, int *cval, double *aval, int *n2o, int *o2n, Hash_i_dh hash, char *filename)
void io_dh_read_ebin_mat_private(int *m, int **rp, int **cval, double **aval, char *filename)
#define CHECK_MPI_ERROR(errCode)
#define CHECK_MPI_V_ERROR(errCode)
#define END_FUNC_VAL(retval)
void mat_dh_transpose_private(int m, int *RP, int **rpOUT, int *CVAL, int **cvalOUT, double *AVAL, double **avalOUT)
void mat_dh_transpose_reuse_private(int m, int *rpIN, int *cvalIN, double *avalIN, int *rpOUT, int *cvalOUT, double *avalOUT)
int mat_find_owner(int *beg_rows, int *end_rows, int index)
void mat_dh_read_triples_private(int ignore, int *mOUT, int **rpOUT, int **cvalOUT, double **avalOUT, FILE *fp)
void make_symmetric_private(int m, int **rpIN, int **cvalIN, double **avalIN)
void mat_dh_print_graph_private(int m, int beg_row, int *rp, int *cval, double *aval, int *n2o, int *o2n, Hash_i_dh hash, FILE *fp)
void mat_dh_print_csr_private(int m, int *rp, int *cval, double *aval, FILE *fp)
void mat_dh_read_csr_private(int *mOUT, int **rpOUT, int **cvalOUT, double **avalOUT, FILE *fp)