Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_Cuda_BlockCrsMatrix.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
43#define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
44
45#include <utility>
46#include <sstream>
47#include <stdexcept>
48
49#include "Kokkos_Core.hpp"
50
51#include "Stokhos_Multiply.hpp"
53
54namespace Stokhos {
55
56template< class BlockSpec , typename MatrixValue , typename VectorValue >
58 BlockCrsMatrix< BlockSpec , MatrixValue , Kokkos::Cuda > ,
59 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
60 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
61{
62public:
63
64 typedef Kokkos::Cuda execution_space ;
65 typedef execution_space::size_type size_type ;
66 typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda > block_vector_type ;
67 typedef BlockCrsMatrix< BlockSpec , MatrixValue , execution_space > matrix_type ;
68
72
73 Multiply( const matrix_type & A ,
74 const block_vector_type & x ,
75 const block_vector_type & y )
76 : m_A( A )
77 , m_x( x )
78 , m_y( y )
79 {}
80
81 //--------------------------------------------------------------------------
82 // A( storage_size( m_A.block.size() ) , m_A.graph.row_map.size() );
83 // x( m_A.block.dimension() , m_A.graph.row_map.first_count() );
84 // y( m_A.block.dimension() , m_A.graph.row_map.first_count() );
85 //
86
87 __device__
88 void operator()(void) const
89 {
90 const size_type blockCount = m_A.graph.row_map.extent(0) - 1 ;
91
92 for ( size_type iBlock = blockIdx.x ;
93 iBlock < blockCount ; iBlock += gridDim.x ) {
94 VectorValue y = 0 ;
95
96 const size_type iEntryEnd = m_A.graph.row_map[iBlock+1];
97 size_type iEntry = m_A.graph.row_map[iBlock];
98
99 for ( ; iEntry < iEntryEnd ; ++iEntry ) {
100 const VectorValue * const x = & m_x( 0 , m_A.graph.entries(iEntry) );
101 const MatrixValue * const a = & m_A.values( 0 , iEntry );
102
103 y += BlockMultiply< BlockSpec >::apply( m_A.block , a , x );
104 }
105
106 if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.block.dimension() ) {
107 m_y(threadIdx.x,iBlock) = y ;
108 }
109 }
110 }
111
112 static void apply( const matrix_type & A ,
113 const block_vector_type & x ,
114 const block_vector_type & y )
115 {
116 const size_type thread_max =
117 Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
118
119 const size_type row_count = A.graph.row_map.extent(0) - 1 ;
120
121 const dim3 grid(
122 std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count()[0] ) , 1 , 1 );
123 const dim3 block = BlockMultiply<BlockSpec>::thread_block( A.block );
124
125 const size_type shmem =
126 BlockMultiply<BlockSpec>::template shmem_size<block_vector_type>( A.block );
127
128 if ( thread_max < block.x * block.y ) {
129 std::ostringstream msg ;
130 msg << "Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
131 << " ERROR: block dimension = " << block.x * block.y
132 << " > " << thread_max << "== maximum Cuda threads per block" ;
133 throw std::runtime_error(msg.str());
134 }
135
136 Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>( Multiply(A,x,y) );
137 }
138};
139
140//----------------------------------------------------------------------------
141
142} // namespace Stokhos
143
144#endif /* #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP */
CRS matrix of dense blocks.
Top-level namespace for Stokhos classes and functions.