Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
TestMeanMultiply.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#include <iostream>
43
44// Devices
45#include "Kokkos_Core.hpp"
46
47// Utilities
48#include "Teuchos_CommandLineProcessor.hpp"
49#include "Teuchos_StandardCatchMacros.hpp"
50#ifdef KOKKOS_ENABLE_CUDA
51#include "cuda_runtime_api.h"
52#endif
53
54template <typename Scalar, typename Ordinal, typename Device>
55void performance_test_driver( const Ordinal nGrid,
56 const Ordinal nIter,
57 const Ordinal order,
58 const Ordinal min_var,
59 const Ordinal max_var );
60
61int main(int argc, char *argv[])
62{
63 bool success = true;
64 bool verbose = false;
65 try {
66
67 const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
68 const size_t num_cores_per_socket =
69 Kokkos::hwloc::get_available_cores_per_numa();
70 // const size_t num_threads_per_core =
71 // Kokkos::hwloc::get_available_threads_per_core();
72 // const size_t num_threads =
73 // num_sockets * num_cores_per_socket * num_threads_per_core;
74
75 // Setup command line options
76 Teuchos::CommandLineProcessor CLP;
77 CLP.setDocString(
78 "This test performance of mean-based UQ::PCE multiply routines.\n");
79 int nGrid = 32;
80 CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
81 int nIter = 10;
82 CLP.setOption("ni", &nIter, "Number of multiply iterations");
83 int order = 3;
84 CLP.setOption("order", &order, "Polynomial order");
85 int dim_min = 1;
86 CLP.setOption("dmin", &dim_min, "Starting stochastic dimension");
87 int dim_max = 12;
88 CLP.setOption("dmax", &dim_max, "Stopping stochastic dimension");
89 int numa = num_sockets;
90 CLP.setOption("numa", &numa, "Number of numa nodes");
91 int cores = num_cores_per_socket;
92 CLP.setOption("cores", &cores, "Cores per numa node");
93#ifdef KOKKOS_ENABLE_THREADS
94 int threads = 0;
95 CLP.setOption("threads", &threads, "Number of threads for Threads device");
96#endif
97#ifdef KOKKOS_ENABLE_OPENMP
98 int openmp = 0;
99 CLP.setOption("openmp", &openmp, "Number of threads for OpenMP device");
100#endif
101#ifdef KOKKOS_ENABLE_CUDA
102 bool cuda = false;
103 CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
104 int device_id = 0;
105 CLP.setOption("device", &device_id, "CUDA device ID");
106#endif
107 CLP.parse( argc, argv );
108
109 typedef int Ordinal;
110 typedef double Scalar;
111
112#ifdef KOKKOS_ENABLE_THREADS
113 if (threads > 0) {
114 typedef Kokkos::Threads Device;
115
116 Kokkos::InitializationSettings init_args;
117 init_args.set_num_threads(threads);
118 Kokkos::initialize( init_args );
119
120 std::cout << std::endl
121 << "Threads performance with " << threads
122 << " threads, " << numa << " numas, " << cores
123 << " cores/numa:" << std::endl;
124
125 performance_test_driver<Scalar,Ordinal,Device>(
126 nGrid, nIter, order, dim_min, dim_max);
127
128 Kokkos::finalize();
129 }
130#endif
131
132#ifdef KOKKOS_ENABLE_OPENMP
133 if (openmp > 0) {
134 typedef Kokkos::OpenMP Device;
135
136 Kokkos::InitializationSettings init_args;
137 init_args.set_num_threads(openmp);
138 Kokkos::initialize( init_args );
139
140 std::cout << std::endl
141 << "OpenMP performance with " << openmp
142 << " threads, " << numa << " numas, " << cores
143 << " cores/numa:" << std::endl;
144
145 performance_test_driver<Scalar,Ordinal,Device>(
146 nGrid, nIter, order, dim_min, dim_max);
147
148 Kokkos::finalize();
149 }
150#endif
151
152#ifdef KOKKOS_ENABLE_CUDA
153 if (cuda) {
154 typedef Kokkos::Cuda Device;
155
156 Kokkos::InitializationSettings init_args;
157 init_args.set_device_id(device_id);
158 Kokkos::initialize( init_args );
159
160 cudaDeviceProp deviceProp;
161 cudaGetDeviceProperties(&deviceProp, device_id);
162 std::cout << std::endl
163 << "CUDA performance for device " << device_id << " ("
164 << deviceProp.name << "):"
165 << std::endl;
166
167 performance_test_driver<Scalar,Ordinal,Device>(
168 nGrid, nIter, order, dim_min, dim_max);
169
170 Kokkos::finalize();
171 }
172#endif
173
174 }
175 TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);
176
177 if (success)
178 return 0;
179 return -1;
180}
int main(int argc, char *argv[])
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal order, const Ordinal min_var, const Ordinal max_var)
Definition TestSpMM.hpp:242