59 const cusp::csr_matrix<IndexType, ValueType, MemorySpace>& A,
60 IndexType nrhs, IndexType max_its, ValueType tol) {
63 cusp::array2d<ValueType, MemorySpace, Orientation> x(A.num_rows, nrhs, 0);
64 cusp::array2d<ValueType, MemorySpace, Orientation> b(A.num_rows, nrhs, 1);
75 TEUCHOS_FUNC_TIME_MONITOR(
"Total Block-CG Solve Time");
82 typedef int IndexType;
83 typedef double ValueType;
84 typedef cusp::device_memory MemorySpace;
92 Teuchos::CommandLineProcessor
CLP;
93 CLP.setDocString(
"This test performance of block multiply routines.\n");
95 CLP.setOption(
"n", &n,
"Number of mesh points in the each direction");
96 IndexType nrhs_begin = 32;
97 CLP.setOption(
"begin", &nrhs_begin,
98 "Staring number of right-hand-sides");
99 IndexType nrhs_end = 512;
100 CLP.setOption(
"end", &nrhs_end,
101 "Ending number of right-hand-sides");
102 IndexType nrhs_step = 32;
103 CLP.setOption(
"step", &nrhs_step,
104 "Increment in number of right-hand-sides");
105 IndexType max_its = 100;
106 CLP.setOption(
"max_iterations", &max_its,
107 "Maximum number of CG iterations");
109 CLP.setOption(
"tolerance", &tol,
"Convergence tolerance");
111 CLP.setOption(
"device", &device_id,
"CUDA device ID");
115 cudaSetDevice(device_id);
116 cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
119 cusp::csr_matrix<IndexType, ValueType, MemorySpace> A;
120 cusp::gallery::poisson27pt(A, n, n, n);
123 Teuchos::RCP<Teuchos::Time> tm_cg =
124 Teuchos::TimeMonitor::getNewTimer(
"Total Block-CG Solve Time");
125 Teuchos::RCP<Teuchos::Time> tm_prec =
126 Teuchos::TimeMonitor::getNewTimer(
"CUSP Block Multilevel Solve");
127 Teuchos::RCP<Teuchos::Time> tm_coarse =
128 Teuchos::TimeMonitor::getNewTimer(
"CUSP Coarse-grid Solve");
129 Teuchos::RCP<Teuchos::Time> tm_op =
130 Teuchos::TimeMonitor::getNewTimer(
"CUSP Operator block-apply");
131 Teuchos::RCP<Teuchos::Time> tm_prec_op =
132 Teuchos::TimeMonitor::getNewTimer(
"CUSP Matrix block-apply");
134 std::cout <<
"nrhs , num_rows , num_entries , "
135 <<
"row_cg , row_op , row_prec , row_prec_op , row_coarse , "
136 <<
"col_cg , col_op , col_prec , col_prec_op , col_coarse"
139 for (IndexType nrhs = nrhs_begin; nrhs <= nrhs_end; nrhs += nrhs_step) {
141 std::cout << nrhs <<
" , "
142 << A.num_rows <<
" , " << A.num_entries <<
" , ";
145 Teuchos::TimeMonitor::zeroOutTimers();
146 cusp_sa_block_cg<cusp::row_major>(A, nrhs, max_its, tol);
148 std::cout << tm_cg->totalElapsedTime() <<
" , "
149 << tm_op->totalElapsedTime() <<
" , "
150 << tm_prec->totalElapsedTime() <<
" , "
151 << tm_prec_op->totalElapsedTime() <<
" , "
152 << tm_coarse->totalElapsedTime() <<
" , ";
155 Teuchos::TimeMonitor::zeroOutTimers();
156 cusp_sa_block_cg<cusp::column_major>(A, nrhs, max_its, tol);
158 std::cout << tm_cg->totalElapsedTime() <<
" , "
159 << tm_op->totalElapsedTime() <<
" , "
160 << tm_prec->totalElapsedTime() <<
" , "
161 << tm_prec_op->totalElapsedTime() <<
" , "
162 << tm_coarse->totalElapsedTime() << std::endl;
167 TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
void cusp_sa_block_cg(const cusp::csr_matrix< IndexType, ValueType, MemorySpace > &A, IndexType nrhs, IndexType max_its, ValueType tol)