42#include "Teuchos_UnitTestHarness.hpp"
43#include "Teuchos_UnitTestRepository.hpp"
44#include "Teuchos_GlobalMPISession.hpp"
57 Teuchos::FancyOStream& out) {
58 typedef Kokkos::Cuda Device;
61 typedef typename Storage::template apply_N<VectorSize>::type
storage_type;
65 KokkosSparse::DeviceConfig dev_config(num_blocks, num_vec_threads, num_row_threads);
67 bool success = test_embedded_vector<Vector>(
68 nGrid,
VectorSize, dev_config, MultiplyOp(), out);
75 Kokkos_CrsMatrix_MP, Multiply_Default,
Storage, MultiplyOp )
77 typedef typename Storage::ordinal_type
Ordinal;
79 const Ordinal ThreadsPerVector = 16;
82 const Ordinal num_vec_threads = 0;
83 const Ordinal num_row_threads = 0;
86 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
90 Kokkos_CrsMatrix_MP, Multiply_1,
Storage, MultiplyOp )
92 typedef typename Storage::ordinal_type
Ordinal;
94 const Ordinal ThreadsPerVector = 16;
97 const Ordinal num_vec_threads = ThreadsPerVector;
98 const Ordinal num_row_threads = 4;
101 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
105 Kokkos_CrsMatrix_MP, Multiply_2,
Storage, MultiplyOp )
107 typedef typename Storage::ordinal_type
Ordinal;
108 const Ordinal NumPerThread = 2;
109 const Ordinal ThreadsPerVector = 16;
112 const Ordinal num_vec_threads = ThreadsPerVector;
113 const Ordinal num_row_threads = 4;
116 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
120 Kokkos_CrsMatrix_MP, Multiply_3,
Storage, MultiplyOp )
122 typedef typename Storage::ordinal_type
Ordinal;
123 const Ordinal NumPerThread = 3;
124 const Ordinal ThreadsPerVector = 16;
127 const Ordinal num_vec_threads = ThreadsPerVector;
128 const Ordinal num_row_threads = 4;
131 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
135 Kokkos_CrsMatrix_MP, Multiply_4,
Storage, MultiplyOp )
137 typedef typename Storage::ordinal_type
Ordinal;
138 const Ordinal NumPerThread = 4;
139 const Ordinal ThreadsPerVector = 16;
142 const Ordinal num_vec_threads = ThreadsPerVector;
143 const Ordinal num_row_threads = 4;
146 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
149#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
150 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
151 Kokkos_CrsMatrix_MP, Multiply_Default, STORAGE, OP ) \
152 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
153 Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
154 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
155 Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP ) \
156 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
157 Kokkos_CrsMatrix_MP, Multiply_3, STORAGE, OP ) \
158 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
159 Kokkos_CrsMatrix_MP, Multiply_4, STORAGE, OP )
164#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
165 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
166 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
167 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
168 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
173 Teuchos::GlobalMPISession mpiSession(&argc, &
argv);
176 Kokkos::InitializationSettings init_args;
177 init_args.set_device_id(0);
178 Kokkos::initialize( init_args );
179 Kokkos::print_configuration(std::cout);
182 int ret = Teuchos::UnitTestRepository::runUnitTestsFromMain(argc,
argv);
const unsigned VectorSize
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_CrsMatrix_MP, Multiply_Default, Storage, MultiplyOp)
int main(int argc, char *argv[])
bool test_cuda_embedded_vector(Ordinal num_blocks, Ordinal num_vec_threads, Ordinal num_row_threads, Teuchos::FancyOStream &out)
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
Stokhos::StandardStorage< int, double > storage_type
Stokhos::StandardStorage< int, double > Storage