Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_CrsMatrix.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_CRSMATRIX_HPP
43#define STOKHOS_CRSMATRIX_HPP
44
45#include <fstream>
46#include <iomanip>
47
48#include "Kokkos_Core.hpp"
49#include "Kokkos_StaticCrsGraph.hpp"
50
51#include "Stokhos_Multiply.hpp"
53
54namespace Stokhos {
55
57 struct Dim3 {
58 size_t x, y, z;
59 Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
60 x(x_), y(y_), z(z_) {}
61 };
62
64 size_t num_blocks;
66
67 DeviceConfig(const size_t num_blocks_,
68 const size_t threads_per_block_x_,
69 const size_t threads_per_block_y_ = 1,
70 const size_t threads_per_block_z_ = 1) :
71 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
72 num_blocks(num_blocks_),
74 {}
75};
76
78template <typename ValueType, typename Device,
79 typename Layout = Kokkos::LayoutRight>
80class CrsMatrix {
81public:
82 typedef Device execution_space;
83 typedef ValueType value_type;
84 typedef Kokkos::View< value_type[], Layout, execution_space > values_type;
85#ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
86 typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , int > graph_type;
87#else
88 typedef Kokkos::StaticCrsGraph< int , Layout, execution_space , void, int > graph_type;
89#endif
90
91 typedef CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout> HostMirror;
92
96
97 CrsMatrix() : dev_config(0, 0) {}
98 CrsMatrix(Stokhos::DeviceConfig dev_config_) : dev_config(dev_config_) {}
99};
100
101// Generic matrix vector multiply kernel for CrsMatrix
102template <typename MatrixValue,
103 typename Layout,
104 typename Device,
105 typename InputVectorType,
106 typename OutputVectorType>
107class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
108 InputVectorType,
109 OutputVectorType,
110 void,
111 IntegralRank<1> >
112{
113public:
114 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
115 typedef InputVectorType input_vector_type;
116 typedef OutputVectorType output_vector_type;
117
118 typedef Device execution_space;
119 typedef typename execution_space::size_type size_type;
120 typedef typename output_vector_type::value_type scalar_type;
121
125
127 const input_vector_type& x,
129 : m_A( A )
130 , m_x( x )
131 , m_y( y )
132 {}
133
134 //--------------------------------------------------------------------------
135
136 KOKKOS_INLINE_FUNCTION
137 void operator()( const size_type iRow ) const
138 {
139 const size_type iEntryBegin = m_A.graph.row_map[iRow];
140 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
141
142 scalar_type sum = 0;
143
144 for ( size_type iEntry = iEntryBegin; iEntry < iEntryEnd; ++iEntry ) {
145 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry) );
146 }
147
148 m_y(iRow) = sum;
149 }
150
151 static void apply( const matrix_type & A,
152 const input_vector_type & x,
154 {
155 const size_t row_count = A.graph.row_map.extent(0) - 1;
156 Kokkos::parallel_for( row_count, Multiply(A,x,y) );
157 }
158};
159
160// Generic matrix multi-vector multiply kernel for CrsMatrix
161template <typename MatrixValue,
162 typename Layout,
163 typename Device,
164 typename InputMultiVectorType,
165 typename OutputMultiVectorType,
166 typename OrdinalType >
167class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
168 InputMultiVectorType,
169 OutputMultiVectorType,
170 std::vector<OrdinalType>,
171 IntegralRank<2> >
172{
173public:
174 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
175 typedef InputMultiVectorType input_multi_vector_type;
176 typedef OutputMultiVectorType output_multi_vector_type;
177 typedef std::vector<OrdinalType> column_indices_type;
178
179 typedef Device execution_space;
180 typedef typename execution_space::size_type size_type;
181 typedef typename output_multi_vector_type::value_type scalar_type;
182
188
192 const column_indices_type& col_indices )
193 : m_A( A )
194 , m_x( x )
195 , m_y( y )
196 , m_col_indices( col_indices )
197 , m_num_vecs( col_indices.size() )
198 {}
199
200 //--------------------------------------------------------------------------
201
202 KOKKOS_INLINE_FUNCTION
203 void operator()( const size_type iRow ) const
204 {
205 const size_type iEntryBegin = m_A.graph.row_map[iRow];
206 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
207
208 for (size_type j=0; j<m_num_vecs; j++) {
209 size_type iCol = m_col_indices[j];
210
211 scalar_type sum = 0.0;
212
213 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
214 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
215 }
216
217 m_y( iRow, iCol ) = sum;
218
219 }
220
221 }
222
223 static void apply( const matrix_type& A,
226 const column_indices_type& col )
227 {
228 const size_t n = A.graph.row_map.extent(0) - 1 ;
229 //Kokkos::parallel_for( n , Multiply(A,x,y,col) );
230
231 const size_t block_size = 20;
232 const size_t num_vecs = col.size();
233 std::vector<OrdinalType> block_col;
234 block_col.reserve(block_size);
235 for (size_t block=0; block<num_vecs; block+=block_size) {
236 const size_t bs =
237 block+block_size <= num_vecs ? block_size : num_vecs-block;
238 block_col.resize(bs);
239 for (size_t i=0; i<bs; ++i)
240 block_col[i] = col[block+i];
241 Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
242 }
243 }
244};
245
246#define USE_NEW 1
247#if USE_NEW
248// Generic matrix multi-vector multiply kernel for CrsMatrix
249// Experimenting with blocking of column and row loops to improve cache
250// performance. Seems to help signficantly on SandyBridge, little difference
251// on MIC (although not extensive investigation of block sizes).
252template <typename MatrixValue,
253 typename Layout,
254 typename Device,
255 typename InputMultiVectorType,
256 typename OutputMultiVectorType >
257class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
258 InputMultiVectorType,
259 OutputMultiVectorType,
260 void,
261 IntegralRank<2> >
262{
263public:
264 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
265 typedef InputMultiVectorType input_multi_vector_type;
266 typedef OutputMultiVectorType output_multi_vector_type;
267
268 typedef Device execution_space;
269 typedef typename execution_space::size_type size_type;
270 typedef typename output_multi_vector_type::value_type scalar_type;
271
272 const matrix_type m_A;
273 const input_multi_vector_type m_x;
274 output_multi_vector_type m_y;
275 const size_type m_num_row;
276 const size_type m_num_col;
277
278 static const size_type m_block_row_size = 32;
279 static const size_type m_block_col_size = 20;
280
281 Multiply( const matrix_type& A,
282 const input_multi_vector_type& x,
283 output_multi_vector_type& y )
284 : m_A( A )
285 , m_x( x )
286 , m_y( y )
287 , m_num_row( A.graph.row_map.extent(0)-1 )
288 , m_num_col( m_y.extent(1) )
289 {
290 }
291
292 //--------------------------------------------------------------------------
293
294 KOKKOS_INLINE_FUNCTION
295 void operator()( const size_type iBlockRow ) const
296 {
297 // Number of rows in this block
298 const size_type num_row =
299 iBlockRow+m_block_row_size <= m_num_row ?
300 m_block_row_size : m_num_row-iBlockRow;
301
302 // Loop over block columns of x
303 for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
304 // Number of columns in this block
305 const size_type num_col =
306 iBlockCol+m_block_col_size <= m_num_col ?
307 m_block_col_size : m_num_col-iBlockCol;
308
309 // Loop over rows in this block of A
310 const size_type iRowEnd = iBlockRow + num_row;
311 for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
312
313 // Range of column entries for this row
314 const size_type iEntryBegin = m_A.graph.row_map[iRow];
315 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
316
317 // Loop over columns in this block of x
318 const size_type iColEnd = iBlockCol + num_col;
319 for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
320
321 // Loop columns of A for this row
322 scalar_type sum = 0.0;
323 for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
324 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
325 }
326 m_y( iRow, iCol ) = sum;
327
328 }
329
330 }
331
332 }
333
334 }
335
336 static void apply( const matrix_type & A,
337 const input_multi_vector_type& x,
338 output_multi_vector_type& y )
339 {
340 // Parallelize over row blocks of size m_block_row_size
341 const size_type num_row = A.graph.row_map.extent(0) - 1;
342 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
343 Kokkos::parallel_for( n , Multiply(A,x,y) );
344 }
345};
346#else
347// Generic matrix multi-vector multiply kernel for CrsMatrix
348template <typename MatrixValue,
349 typename Layout,
350 typename Device,
351 typename InputMultiVectorType,
352 typename OutputMultiVectorType >
353class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
354 InputMultiVectorType,
355 OutputMultiVectorType,
356 void,
357 IntegralRank<2> >
358{
359public:
360 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
361 typedef InputMultiVectorType input_multi_vector_type;
362 typedef OutputMultiVectorType output_multi_vector_type;
363
364 typedef Device execution_space;
365 typedef typename execution_space::size_type size_type;
366 typedef typename output_multi_vector_type::value_type scalar_type;
367
372
376 : m_A( A )
377 , m_x( x )
378 , m_y( y )
379 , m_num_vecs( m_y.extent(1) )
380 {}
381
382 //--------------------------------------------------------------------------
383
384 KOKKOS_INLINE_FUNCTION
385 void operator()( const size_type iRow ) const
386 {
387 const size_type iEntryBegin = m_A.graph.row_map[iRow];
388 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
389
390 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
391
392 scalar_type sum = 0.0;
393
394 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
395 sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
396 }
397
398 m_y( iRow, iCol ) = sum;
399
400 }
401
402 }
403
404 static void apply( const matrix_type& A,
407 {
408 const size_t n = A.graph.row_map.extent(0) - 1 ;
409 Kokkos::parallel_for( n , Multiply(A,x,y) );
410
411 // const size_t block_size = 20;
412 // const size_t num_vecs = col.size();
413 // std::vector<OrdinalType> block_col;
414 // block_col.reserve(block_size);
415 // for (size_t block=0; block<num_vecs; block+=block_size) {
416 // const size_t bs =
417 // block+block_size <= num_vecs ? block_size : num_vecs-block;
418 // block_col.resize(bs);
419 // for (size_t i=0; i<bs; ++i)
420 // block_col[i] = col[block+i];
421 // Kokkos::parallel_for( n , Multiply(A,x,y,block_col) );
422 // }
423 }
424};
425#endif
426
427#if USE_NEW
428// Generic matrix multi-vector multiply kernel for CrsMatrix
429// Experimenting with blocking of column and row loops to improve cache
430// performance. Seems to help signficantly on SandyBridge, little difference
431// on MIC (although not extensive investigation of block sizes).
432template <typename MatrixValue,
433 typename Layout,
434 typename Device,
435 typename InputViewType,
436 typename OutputViewType>
437class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
438 std::vector<InputViewType>,
439 std::vector<OutputViewType>,
440 void,
441 IntegralRank<1> >
442{
443public:
444 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
445 typedef std::vector<InputViewType> input_multi_vector_type;
446 typedef std::vector<OutputViewType> output_multi_vector_type;
447
448 typedef Device execution_space;
449 typedef typename execution_space::size_type size_type;
450 typedef typename OutputViewType::value_type scalar_type;
451
452 const matrix_type m_A;
453 const input_multi_vector_type m_x;
454 output_multi_vector_type m_y;
455 const size_type m_num_row;
456 const size_type m_num_col;
457
458 static const size_type m_block_row_size = 32;
459 static const size_type m_block_col_size = 20;
460
461 Multiply( const matrix_type& A,
462 const input_multi_vector_type& x,
463 output_multi_vector_type& y )
464 : m_A( A )
465 , m_x( x )
466 , m_y( y )
467 , m_num_row( A.graph.row_map.extent(0)-1 )
468 , m_num_col( x.size() )
469 {
470 }
471
472 //--------------------------------------------------------------------------
473
474 KOKKOS_INLINE_FUNCTION
475 void operator()( const size_type iBlockRow ) const
476 {
477 // Number of rows in this block
478 const size_type num_row =
479 iBlockRow+m_block_row_size <= m_num_row ?
480 m_block_row_size : m_num_row-iBlockRow;
481
482 // Loop over block columns of x
483 for (size_type iBlockCol=0; iBlockCol<m_num_col; iBlockCol+=m_block_col_size) {
484 // Number of columns in this block
485 const size_type num_col =
486 iBlockCol+m_block_col_size <= m_num_col ?
487 m_block_col_size : m_num_col-iBlockCol;
488
489 // Loop over rows in this block of A
490 const size_type iRowEnd = iBlockRow + num_row;
491 for (size_type iRow=iBlockRow; iRow<iRowEnd; ++iRow) {
492
493 // Range of column entries for this row
494 const size_type iEntryBegin = m_A.graph.row_map[iRow];
495 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
496
497 // Loop over columns in this block of x
498 const size_type iColEnd = iBlockCol + num_col;
499 for (size_type iCol=iBlockCol; iCol<iColEnd; iCol++) {
500
501 // Loop columns of A for this row
502 scalar_type sum = 0.0;
503 for (size_type iEntry = iEntryBegin; iEntry<iEntryEnd; ++iEntry) {
504 sum += m_A.values(iEntry) * m_x[iCol](m_A.graph.entries(iEntry));
505 }
506 m_y[iCol](iRow) = sum;
507
508 }
509
510 }
511
512 }
513
514 }
515
516 static void apply( const matrix_type & A,
517 const input_multi_vector_type& x,
518 output_multi_vector_type& y )
519 {
520 // Parallelize over row blocks of size m_block_row_size
521 const size_type num_row = A.graph.row_map.extent(0) - 1;
522 const size_type n = (num_row+m_block_row_size-1) / m_block_row_size;
523 Kokkos::parallel_for( n , Multiply(A,x,y) );
524 }
525};
526#else
527// Generic matrix multi-vector multiply kernel for CrsMatrix
528template <typename MatrixValue,
529 typename Layout,
530 typename Device,
531 typename InputViewType,
532 typename OutputViewType>
533class Multiply< CrsMatrix<MatrixValue,Device,Layout>,
534 std::vector<InputViewType>,
535 std::vector<OutputViewType>,
536 void,
537 IntegralRank<1> >
538{
539public:
540 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type;
541 typedef std::vector<InputViewType> input_multi_vector_type;
542 typedef std::vector<OutputViewType> output_multi_vector_type;
543
544 typedef Device execution_space;
545 typedef typename execution_space::size_type size_type;
546 typedef typename OutputViewType::value_type scalar_type;
547
552
556 : m_A( A )
557 , m_x( x )
558 , m_y( y )
559 , m_num_vecs( x.size() )
560 {
561 }
562
563 //--------------------------------------------------------------------------
564
565 KOKKOS_INLINE_FUNCTION
566 void operator()( const size_type iRow ) const
567 {
568 const size_type iEntryBegin = m_A.graph.row_map[iRow];
569 const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
570
571 for (size_type iCol=0; iCol<m_num_vecs; iCol++) {
572
573 scalar_type sum = 0.0;
574
575 for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
576 sum += m_A.values(iEntry) * m_x[iCol]( m_A.graph.entries(iEntry) );
577 }
578
579 m_y[iCol]( iRow) = sum;
580
581 }
582
583 }
584
585 static void apply( const matrix_type & A,
588 {
589 const size_t n = A.graph.row_map.extent(0) - 1 ;
590 Kokkos::parallel_for( n , Multiply(A,x,y) );
591
592 // const size_t block_size = 20;
593 // const size_t num_vecs = x.size();
594 // input_multi_vector_type xx;
595 // output_multi_vector_type yy;
596 // xx.reserve(block_size);
597 // yy.reserve(block_size);
598 // for (size_t block=0; block<num_vecs; block+=block_size) {
599 // const size_t bs =
600 // block+block_size <= num_vecs ? block_size : num_vecs-block;
601 // xx.resize(bs);
602 // yy.resize(bs);
603 // for (size_t i=0; i<bs; ++i) {
604 // xx[i] = x[block+i];
605 // yy[i] = y[block+i];
606 // }
607 // Kokkos::parallel_for( n , Multiply(A,xx,yy) );
608 // }
609 }
610};
611#endif
612
613// Matrix multivector multiply specializations for one column at a time
615template <typename MatrixValue,
616 typename Layout,
617 typename Device,
618 typename InputMultiVectorType,
619 typename OutputMultiVectorType,
620 typename OrdinalType>
621void multiply(const CrsMatrix<MatrixValue,Device,Layout>& A,
622 const InputMultiVectorType& x,
623 OutputMultiVectorType& y,
624 const std::vector<OrdinalType>& col_indices,
626{
627 typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
628
629 typedef Kokkos::View<typename InputMultiVectorType::value_type*, typename InputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> InputVectorType;
630 typedef Kokkos::View<typename OutputMultiVectorType::value_type*, typename OutputMultiVectorType::array_layout, Device, Kokkos::MemoryUnmanaged> OutputVectorType;
631 typedef Multiply<MatrixType,InputVectorType,OutputVectorType> multiply_type;
632 for (size_t i=0; i<col_indices.size(); ++i) {
633 InputVectorType x_view =
634 Kokkos::subview( x , Kokkos::ALL() , col_indices[i] );
635 OutputVectorType y_view =
636 Kokkos::subview( y , Kokkos::ALL() , col_indices[i] );
637 multiply_type::apply( A , x_view , y_view );
638 }
639}
640
641template <typename MatrixValue,
642 typename Layout,
643 typename Device,
644 typename InputVectorType,
645 typename OutputVectorType>
646void multiply(const CrsMatrix<MatrixValue,Device,Layout>& A,
647 const std::vector<InputVectorType>& x,
648 std::vector<OutputVectorType>& y,
650{
651 typedef CrsMatrix<MatrixValue,Device,Layout> MatrixType;
652 typedef Multiply<MatrixType,InputVectorType,OutputVectorType> multiply_type;
653 for (size_t i=0; i<x.size(); ++i) {
654 multiply_type::apply( A , x[i] , y[i] );
655 }
656}
657
658} // namespace Stokhos
659
660//----------------------------------------------------------------------------
661//----------------------------------------------------------------------------
662
663namespace Kokkos {
664
665template <typename ValueType, typename Layout, typename Device>
669 mirror_A.values = Kokkos::create_mirror(A.values);
670 mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
671 mirror_A.dev_config = A.dev_config;
672 return mirror_A;
673}
674
675template <typename ValueType, typename Layout, typename Device>
679 mirror_A.values = Kokkos::create_mirror_view(A.values);
680 mirror_A.graph = Kokkos::create_mirror(A.graph); // this deep copies
681 mirror_A.dev_config = A.dev_config;
682 return mirror_A;
683}
684
685template <typename ValueType, typename Layout, typename DstDevice,
686 typename SrcDevice>
687void
692
693} // namespace Kokkos
694
695//----------------------------------------------------------------------------
696//----------------------------------------------------------------------------
697
698namespace Stokhos {
699
700// MatrixMarket writer for CrsMatrix
701template < typename MatrixValue, typename Layout, typename Device >
702class MatrixMarketWriter< CrsMatrix<MatrixValue,Device,Layout> >
703{
704public:
705 typedef CrsMatrix<MatrixValue,Device,Layout> matrix_type ;
706 typedef Device execution_space ;
707 typedef typename execution_space::size_type size_type ;
708
709 static void write(const matrix_type& A, const std::string& filename) {
710 std::ofstream file(filename.c_str());
711 file.precision(16);
712 file.setf(std::ios::scientific);
713
714 typename matrix_type::HostMirror hA = Kokkos::create_mirror_view(A);
715 Kokkos::deep_copy(hA, A);
716
717 const size_type nRow = hA.graph.row_map.extent(0) - 1 ;
718
719 // Write banner
720 file << "%%MatrixMarket matrix coordinate real general" << std::endl;
721 file << nRow << " " << nRow << " " << hA.values.extent(0) << std::endl;
722
723 for (size_type row=0; row<nRow; ++row) {
724 size_type entryBegin = hA.graph.row_map(row);
725 size_type entryEnd = hA.graph.row_map(row+1);
726 for (size_type entry=entryBegin; entry<entryEnd; ++entry) {
727 file << row+1 << " " << hA.graph.entries(entry)+1 << " "
728 << std::setw(22) << hA.values(entry) << std::endl;
729 }
730 }
731
732 file.close();
733 }
734};
735
736} // namespace Stokhos
737
738#endif /* #ifndef STOKHOS_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
Stokhos::DeviceConfig dev_config
CrsMatrix< ValueType, typename values_type::host_mirror_space, Layout > HostMirror
Kokkos::View< value_type[], Layout, execution_space > values_type
Kokkos::StaticCrsGraph< int, Layout, execution_space, void, int > graph_type
CrsMatrix(Stokhos::DeviceConfig dev_config_)
static void write(const matrix_type &A, const std::string &filename)
Multiply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col_indices)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y, const column_indices_type &col)
static void apply(const matrix_type &A, const input_multi_vector_type &x, output_multi_vector_type &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)
Top-level namespace for Stokhos classes and functions.
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
DeviceConfig(const size_t num_blocks_, const size_t threads_per_block_x_, const size_t threads_per_block_y_=1, const size_t threads_per_block_z_=1)