76 const int use_trials ,
77 const int use_nodes[] ,
78 Kokkos::View< Scalar* , Device >& residual,
83 using Teuchos::rcpFromRef;
84 using Teuchos::arrayView;
89 typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
95 typedef typename ElementComputationType::vector_type VectorType ;
101 const double bubble_x = 1.0 ;
102 const double bubble_y = 1.0 ;
103 const double bubble_z = 1.0 ;
107 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
108 bubble_x , bubble_y , bubble_z );
112 Kokkos::Timer wall_clock ;
116 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
128 typename NodeNodeGraphType::Times graph_times;
129 const NodeNodeGraphType
130 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
134 jacobian = LocalMatrixType( mesh_to_graph.graph );
139 VectorType solution(
"solution" , fixture.node_count() );
140 residual = VectorType(
"residual" , fixture.node_count_owned() );
143 const ElementComputationType elemcomp( fixture , solution ,
144 mesh_to_graph.elem_graph ,
145 jacobian , residual );
147 Kokkos::deep_copy( solution , Scalar(1.2345) );
152 Kokkos::deep_copy( residual , Scalar(0) );
153 Kokkos::deep_copy( jacobian.
coeff , Scalar(0) );
195 const MatrixType& analytic_jacobian,
196 const VectorType& fad_residual,
197 const MatrixType& fad_jacobian,
198 const std::string& test_name)
200 const double tol = 1e-14;
202 Teuchos::RCP<Teuchos::FancyOStream> out =
203 Teuchos::VerboseObjectBase::getDefaultOStream();
204 std::stringstream buf;
205 Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,
false));
207 typename VectorType::HostMirror host_analytic_residual =
208 Kokkos::create_mirror_view(analytic_residual);
209 typename VectorType::HostMirror host_fad_residual =
210 Kokkos::create_mirror_view(fad_residual);
211 Kokkos::deep_copy( host_analytic_residual, analytic_residual );
212 Kokkos::deep_copy( host_fad_residual, fad_residual );
214 fbuf << test_name <<
":" << std::endl;
216 if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
217 fbuf <<
"Analytic residual dimension "
218 << host_analytic_residual.extent(0)
219 <<
" does not match Fad residual dimension "
220 << host_fad_residual.extent(0) << std::endl;
224 const size_t num_node = host_analytic_residual.extent(0);
225 for (
size_t i=0;
i<num_node; ++
i) {
227 host_analytic_residual(
i),
"analytic residual",
228 host_fad_residual(
i),
"Fad residual",
233 typename MatrixType::HostMirror host_analytic_jacobian =
234 Kokkos::create_mirror_view(analytic_jacobian);
235 typename MatrixType::HostMirror host_fad_jacobian =
236 Kokkos::create_mirror_view(fad_jacobian);
237 Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
238 Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
240 if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
241 fbuf <<
"Analytic Jacobian dimension "
242 << host_analytic_jacobian.extent(0)
243 <<
" does not match Fad Jacobian dimension "
244 << host_fad_jacobian.extent(0) << std::endl;
248 const size_t num_entry = host_analytic_jacobian.extent(0);
249 for (
size_t i=0;
i<num_entry; ++
i) {
251 host_analytic_jacobian(
i),
"analytic Jacobian",
252 host_fad_jacobian(
i),
"Fad Jacobian",
265 const int use_print ,
266 const int use_trials ,
270 const bool quadratic ,
279 std::cout.precision(8);
280 std::cout << std::endl
281 <<
"\"Grid Size\" , "
283 <<
"\"Analytic Fill Time\" , "
284 <<
"\"Fad Element Fill Slowdown\" , "
285 <<
"\"Fad Optimized Element Fill Slowdown\" , "
286 <<
"\"Fad QP Fill Slowdown\" , "
289 typedef Kokkos::View< double* , Device > vector_type ;
291 vector_type analytic_residual, fad_residual, fad_opt_residual,
293 matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
296 for (
int n=n_begin; n<=n_end; n+=n_step) {
297 const int use_nodes[] = { n, n, n };
298 Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
302 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
303 use_print, use_trials, use_nodes,
304 analytic_residual, analytic_jacobian );
307 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
308 use_print, use_trials, use_nodes,
309 fad_residual, fad_jacobian);
312 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
313 use_print, use_trials, use_nodes,
314 fad_opt_residual, fad_opt_jacobian);
317 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
318 use_print, use_trials, use_nodes,
319 fad_qp_residual, fad_qp_jacobian);
323 fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
324 use_print, use_trials, use_nodes,
325 analytic_residual, analytic_jacobian );
328 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
329 use_print, use_trials, use_nodes,
330 fad_residual, fad_jacobian);
333 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
334 use_print, use_trials, use_nodes,
335 fad_opt_residual, fad_opt_jacobian);
338 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
339 use_print, use_trials, use_nodes,
340 fad_qp_residual, fad_qp_jacobian);
344 fad_residual, fad_jacobian.coeff,
347 fad_opt_residual, fad_opt_jacobian.coeff,
350 fad_qp_residual, fad_qp_jacobian.coeff,
356 perf_analytic.
scale(s);
358 perf_fad_opt.
scale(s);
359 perf_fad_qp.
scale(s);
361 std::cout.precision(3);
362 std::cout << n <<
" , "
367 << std::fixed << std::setw(6)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)