Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_OpenMP_MKL_CrsMatrix.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
43#define STOKHOS_OPENMP_MKL_CRSMATRIX_HPP
44
45#include "Kokkos_Macros.hpp"
46#include "Stokhos_ConfigDefs.h"
47#if defined(KOKKOS_ENABLE_OPENMP) && defined(HAVE_STOKHOS_MKL)
48
49#include "Kokkos_Core.hpp"
50#include "Stokhos_CrsMatrix.hpp"
51#include "mkl.h"
52
53// Implementation of CrsMatrix multiply using MKL and OpenMP
54
55namespace Stokhos {
56
57namespace Impl {
58
59 template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
60 struct GatherTranspose {
61 typedef ValueType value_type;
62 typedef OrdinalType ordinal_type;
63 typedef ExecutionSpace execution_space;
64 typedef typename execution_space::size_type size_type;
65 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
66 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
67
68 const multi_vector_type m_x;
69 const trans_multi_vector_type m_xt;
70 const std::vector<ordinal_type> m_indices;
71 const size_t ncol;
72 GatherTranspose(const multi_vector_type & x,
73 const trans_multi_vector_type& xt,
74 const std::vector<ordinal_type> & indices) :
75 m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
76
77 inline void operator()( const size_type row ) const {
78 for (size_t col=0; col<ncol; ++col)
79 m_xt(col,row) = m_x(row,m_indices[col]);
80 }
81
82 static void apply(const multi_vector_type & x,
83 const trans_multi_vector_type& xt,
84 const std::vector<ordinal_type> & indices) {
85 const size_t n = xt.extent(1);
86 Kokkos::parallel_for( n, GatherTranspose(x,xt,indices) );
87 }
88 };
89
90 template <typename ValueType, typename OrdinalType, typename ExecutionSpace>
91 struct ScatterTranspose {
92 typedef ValueType value_type;
93 typedef OrdinalType ordinal_type;
94 typedef ExecutionSpace execution_space;
95 typedef typename execution_space::size_type size_type;
96 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > multi_vector_type ;
97 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
98
99 const multi_vector_type m_x;
100 const trans_multi_vector_type m_xt;
101 const std::vector<ordinal_type> m_indices;
102 const size_t ncol;
103 ScatterTranspose(const multi_vector_type & x,
104 const trans_multi_vector_type& xt,
105 const std::vector<ordinal_type> & indices) :
106 m_x(x), m_xt(xt), m_indices(indices), ncol(indices.size()) {}
107
108 inline void operator()( const size_type row ) const {
109 for (size_t col=0; col<ncol; ++col)
110 m_x(row,m_indices[col]) = m_xt(col,row);
111 }
112
113 static void apply(const multi_vector_type & x,
114 const trans_multi_vector_type& xt,
115 const std::vector<ordinal_type> & indices) {
116 const size_t n = xt.extent(1);
117 Kokkos::parallel_for( n, ScatterTranspose(x,xt,indices) );
118 }
119 };
120
121 template <typename ValueType, typename ExecutionSpace>
122 struct GatherVecTranspose {
123 typedef ValueType value_type;
124 typedef ExecutionSpace execution_space;
125 typedef typename execution_space::size_type size_type;
126 typedef Kokkos::View< value_type* , execution_space > vector_type ;
127 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
128
129 const std::vector<vector_type> m_x;
130 const trans_multi_vector_type m_xt;
131 const size_t ncol;
132 GatherVecTranspose(const std::vector<vector_type> & x,
133 const trans_multi_vector_type& xt) :
134 m_x(x), m_xt(xt), ncol(x.size()) {}
135
136 inline void operator()( const size_type row ) const {
137 for (size_t col=0; col<ncol; ++col)
138 m_xt(col,row) = m_x[col](row);
139 }
140
141 static void apply(const std::vector<vector_type> & x,
142 const trans_multi_vector_type& xt) {
143 const size_t n = xt.extent(1);
144 Kokkos::parallel_for( n, GatherVecTranspose(x,xt) );
145 }
146 };
147
148 template <typename ValueType, typename ExecutionSpace>
149 struct ScatterVecTranspose {
150 typedef ValueType value_type;
151 typedef ExecutionSpace execution_space;
152 typedef typename execution_space::size_type size_type;
153 typedef Kokkos::View< value_type* , execution_space > vector_type ;
154 typedef Kokkos::View< value_type** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
155
156 const std::vector<vector_type> m_x;
157 const trans_multi_vector_type m_xt;
158 const size_t ncol;
159 ScatterVecTranspose(const std::vector<vector_type> & x,
160 const trans_multi_vector_type& xt) :
161 m_x(x), m_xt(xt), ncol(x.size()) {}
162
163 inline void operator()( const size_type row ) const {
164 for (size_t col=0; col<ncol; ++col)
165 m_x[col](row) = m_xt(col,row);
166 }
167
168 static void apply(const std::vector<vector_type> & x,
169 const trans_multi_vector_type& xt) {
170 const size_t n = xt.extent(1);
171 Kokkos::parallel_for( n, ScatterVecTranspose(x,xt) );
172 }
173 };
174
175} // namespace Impl
176
177// Specializations of multiply using Intel MKL
178class MKLMultiply {};
179
180void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
181 const Kokkos::View< double* , Kokkos::OpenMP >& x,
182 Kokkos::View< double* , Kokkos::OpenMP >& y,
183 MKLMultiply tag)
184{
185 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
186 double *A_values = A.values.data() ;
187 MKL_INT *col_indices = A.graph.entries.data() ;
188 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
189 MKL_INT *row_end = row_beg+1;
190 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
191 char trans = 'N';
192 double alpha = 1.0;
193 double beta = 0.0;
194
195 double *x_values = x.data() ;
196 double *y_values = y.data() ;
197
198 mkl_dcsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
199 row_beg, row_end, x_values, &beta, y_values);
200}
201
202void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
203 const Kokkos::View< float* , Kokkos::OpenMP >& x,
204 Kokkos::View< float* , Kokkos::OpenMP >& y,
205 MKLMultiply tag)
206{
207 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
208 float *A_values = A.values.data() ;
209 MKL_INT *col_indices = A.graph.entries.data() ;
210 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
211 MKL_INT *row_end = row_beg+1;
212 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
213 char trans = 'N';
214 float alpha = 1.0;
215 float beta = 0.0;
216
217 float *x_values = x.data() ;
218 float *y_values = y.data() ;
219
220 mkl_scsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
221 row_beg, row_end, x_values, &beta, y_values);
222}
223
224void multiply(const CrsMatrix< double , Kokkos::OpenMP >& A,
225 const std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& x,
226 std::vector< Kokkos::View< double* , Kokkos::OpenMP > >& y,
227 MKLMultiply tag)
228{
229 typedef Kokkos::OpenMP execution_space ;
230 typedef double value_type ;
231 typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
232
233 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
234 double *A_values = A.values.data() ;
235 MKL_INT *col_indices = A.graph.entries.data() ;
236 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
237 MKL_INT *row_end = row_beg+1;
238 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
239 char trans = 'N';
240 double alpha = 1.0;
241 double beta = 0.0;
242
243 // Copy columns of x into a contiguous vector
244 MKL_INT ncol = x.size();
245 trans_multi_vector_type xx( "xx" , ncol , n );
246 trans_multi_vector_type yy( "yy" , ncol , n );
247 Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
248 double *x_values = xx.data() ;
249 double *y_values = yy.data() ;
250
251 // Call MKLs CSR x multi-vector (row-based) multiply
252 mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
253 row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
254
255 // Copy columns out of continguous multivector
256 Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
257}
258
259void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
260 const std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& x,
261 std::vector< Kokkos::View< float* , Kokkos::OpenMP > >& y,
262 MKLMultiply tag)
263{
264 typedef Kokkos::OpenMP execution_space ;
265 typedef float value_type ;
266 typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
267
268 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
269 float *A_values = A.values.data() ;
270 MKL_INT *col_indices = A.graph.entries.data() ;
271 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
272 MKL_INT *row_end = row_beg+1;
273 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
274 char trans = 'N';
275 float alpha = 1.0;
276 float beta = 0.0;
277
278 // Copy columns of x into a contiguous vector
279 MKL_INT ncol = x.size();
280 trans_multi_vector_type xx( "xx" , ncol , n );
281 trans_multi_vector_type yy( "yy" , ncol , n );
282 Impl::GatherVecTranspose<value_type,execution_space>::apply(x,xx);
283 float *x_values = xx.data() ;
284 float *y_values = yy.data() ;
285
286 // Call MKLs CSR x multi-vector (row-based) multiply
287 mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
288 row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
289
290 // Copy columns out of continguous multivector
291 Impl::ScatterVecTranspose<value_type,execution_space>::apply(y,yy);
292}
293
294template <typename ordinal_type>
295void multiply(
296 const CrsMatrix< double , Kokkos::OpenMP >& A,
297 const Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
298 Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
299 const std::vector<ordinal_type>& indices,
300 MKLMultiply tag)
301{
302 typedef Kokkos::OpenMP execution_space ;
303 typedef double value_type ;
304 typedef Kokkos::View< double** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
305
306 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
307 double *A_values = A.values.data() ;
308 MKL_INT *col_indices = A.graph.entries.data() ;
309 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
310 MKL_INT *row_end = row_beg+1;
311 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
312 char trans = 'N';
313 double alpha = 1.0;
314 double beta = 0.0;
315
316 // Copy columns of x into a contiguous vector
317 MKL_INT ncol = indices.size();
318 trans_multi_vector_type xx( "xx" , ncol , n );
319 trans_multi_vector_type yy( "yy" , ncol , n );
320 Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
321 double *x_values = xx.data() ;
322 double *y_values = yy.data() ;
323
324 // Call MKLs CSR x multi-vector (row-based) multiply
325 mkl_dcsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
326 row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
327
328 // Copy columns out of continguous multivector
329 Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
330}
331
332template <typename ordinal_type>
333void multiply(
334 const CrsMatrix< float , Kokkos::OpenMP >& A,
335 const Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& x,
336 Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::OpenMP >& y,
337 const std::vector<ordinal_type>& indices,
338 MKLMultiply tag)
339{
340 typedef Kokkos::OpenMP execution_space ;
341 typedef float value_type ;
342 typedef Kokkos::View< float** , Kokkos::LayoutLeft, execution_space > trans_multi_vector_type ;
343
344 MKL_INT n = A.graph.row_map.extent(0) - 1 ;
345 float *A_values = A.values.data() ;
346 MKL_INT *col_indices = A.graph.entries.data() ;
347 MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.data()) ;
348 MKL_INT *row_end = row_beg+1;
349 char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
350 char trans = 'N';
351 float alpha = 1.0;
352 float beta = 0.0;
353
354 // Copy columns of x into a contiguous vector
355 MKL_INT ncol = indices.size();
356 trans_multi_vector_type xx( "xx" , ncol , n );
357 trans_multi_vector_type yy( "yy" , ncol , n );
358 Impl::GatherTranspose<value_type,ordinal_type,execution_space>::apply(x,xx,indices);
359 float *x_values = xx.data() ;
360 float *y_values = yy.data() ;
361
362 // Call MKLs CSR x multi-vector (row-based) multiply
363 mkl_scsrmm(&trans, &n, &ncol, &n, &alpha, matdescra, A_values, col_indices,
364 row_beg, row_end, x_values, &ncol, &beta, y_values, &ncol);
365
366 // Copy columns out of continguous multivector
367 Impl::ScatterTranspose<value_type,ordinal_type,execution_space>::apply(y,yy,indices);
368}
369
370//----------------------------------------------------------------------------
371
372} // namespace Stokhos
373
374#endif
375
376#endif /* #ifndef STOKHOS_OPENMP_MKL_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
Top-level namespace for Stokhos classes and functions.
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition csr_vector.h:265
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition csr_vector.h:267