Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Stokhos_Cuda_CrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_CUDA_CRSMATRIX_HPP
43 #define STOKHOS_CUDA_CRSMATRIX_HPP
44 
45 #include "Stokhos_ConfigDefs.h"
46 #ifdef HAVE_STOKHOS_CUSPARSE
47 
48 #include <utility>
49 #include <sstream>
50 #include <stdexcept>
51 
52 #include <cuda_runtime.h>
53 //#include <cusparse.h>
54 #include <cusparse_v2.h>
55 
56 #include "Kokkos_Core.hpp"
57 
58 #include "Stokhos_Multiply.hpp"
59 #include "Stokhos_CrsMatrix.hpp"
60 
61 // Use new cuSPARSE SpMv/SpMM functions only in CUDA 11 or greater.
62 // (while they exist in some versions of CUDA 10, they appear to not always
63 // product correct results).
64 #define USE_NEW_SPMV (CUSPARSE_VERSION >= 11000)
65 
66 namespace Stokhos {
67 
68 class CudaSparseSingleton {
69 public:
70 
71  cusparseStatus_t status;
72  cusparseHandle_t handle;
73  cusparseMatDescr_t descra;
74 
75  static CudaSparseSingleton & singleton();
76 
77 private:
78 
79  CudaSparseSingleton()
80  {
81  status = cusparseCreate(&handle);
82  if(status != CUSPARSE_STATUS_SUCCESS)
83  {
84  throw std::runtime_error( std::string("ERROR - CUSPARSE Library Initialization failed" ) );
85  }
86 
87  status = cusparseCreateMatDescr(&descra);
88  if(status != CUSPARSE_STATUS_SUCCESS)
89  {
90  throw std::runtime_error( std::string("ERROR - CUSPARSE Library Matrix descriptor failed" ) );
91  }
92 
93  cusparseSetMatType(descra , CUSPARSE_MATRIX_TYPE_GENERAL);
94  cusparseSetMatIndexBase(descra , CUSPARSE_INDEX_BASE_ZERO);
95  }
96 
97  CudaSparseSingleton( const CudaSparseSingleton & );
98  CudaSparseSingleton & operator = ( const CudaSparseSingleton & );
99 };
100 
101 CudaSparseSingleton & CudaSparseSingleton::singleton()
102 {
103  static CudaSparseSingleton s ; return s ;
104 }
105 
106 template<>
107 class Multiply<
108  CrsMatrix< float , Kokkos::Cuda > ,
109  Kokkos::View< float* , Kokkos::Cuda > ,
110  Kokkos::View< float* , Kokkos::Cuda > ,
111  void,
112  IntegralRank<1> >
113 {
114 public:
115  typedef Kokkos::Cuda execution_space ;
116  typedef execution_space::size_type size_type ;
117  typedef Kokkos::View< float* , execution_space > vector_type ;
118  typedef CrsMatrix< float , execution_space > matrix_type ;
119 
120  //--------------------------------------------------------------------------
121 
122  static void apply( const matrix_type & A ,
123  const vector_type & x ,
124  const vector_type & y )
125  {
126  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
127  const float alpha = 1 , beta = 0 ;
128  const int n = A.graph.row_map.extent(0) - 1 ;
129  const int nz = A.graph.entries.extent(0);
130 
131 #if USE_NEW_SPMV
132  using offset_type = typename matrix_type::graph_type::size_type;
133  using entry_type = typename matrix_type::graph_type::data_type;
134  const cusparseIndexType_t myCusparseOffsetType =
135  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
136  const cusparseIndexType_t myCusparseEntryType =
137  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
138  cusparseSpMatDescr_t A_cusparse;
139  cusparseCreateCsr(
140  &A_cusparse, n, n, nz,
141  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
142  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
143  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
144  cusparseDnVecDescr_t x_cusparse, y_cusparse;
145  cusparseCreateDnVec(&x_cusparse, n, (void*)x.data(), CUDA_R_32F);
146  cusparseCreateDnVec(&y_cusparse, n, (void*)y.data(), CUDA_R_32F);
147  size_t bufferSize = 0;
148  void* dBuffer = NULL;
149  cusparseSpMV_bufferSize(
150  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
151  x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MV_ALG_DEFAULT,
152  &bufferSize);
153  cudaMalloc(&dBuffer, bufferSize);
154  cusparseStatus_t status =
155  cusparseSpMV(s.handle ,
156  CUSPARSE_OPERATION_NON_TRANSPOSE ,
157  &alpha ,
158  A_cusparse ,
159  x_cusparse ,
160  &beta ,
161  y_cusparse ,
162  CUDA_R_32F ,
163  CUSPARSE_MV_ALG_DEFAULT ,
164  dBuffer );
165  cudaFree(dBuffer);
166  cusparseDestroyDnVec(x_cusparse);
167  cusparseDestroyDnVec(y_cusparse);
168  cusparseDestroySpMat(A_cusparse);
169 #else
170  cusparseStatus_t status =
171  cusparseScsrmv( s.handle ,
172  CUSPARSE_OPERATION_NON_TRANSPOSE ,
173  n , n , nz ,
174  &alpha ,
175  s.descra ,
176  A.values.data() ,
177  A.graph.row_map.data() ,
178  A.graph.entries.data() ,
179  x.data() ,
180  &beta ,
181  y.data() );
182 #endif
183 
184  if ( CUSPARSE_STATUS_SUCCESS != status ) {
185  throw std::runtime_error( std::string("ERROR - cusparseScsrmv " ) );
186  }
187  }
188 };
189 
190 template<>
191 class Multiply<
192  CrsMatrix< double , Kokkos::Cuda > ,
193  Kokkos::View< double* , Kokkos::Cuda > ,
194  Kokkos::View< double* , Kokkos::Cuda > ,
195  void,
196  IntegralRank<1> >
197 {
198 public:
199  typedef Kokkos::Cuda execution_space ;
200  typedef execution_space::size_type size_type ;
201  typedef Kokkos::View< double* , execution_space > vector_type ;
202  typedef CrsMatrix< double , execution_space > matrix_type ;
203 
204  //--------------------------------------------------------------------------
205 
206  static void apply( const matrix_type & A ,
207  const vector_type & x ,
208  const vector_type & y )
209  {
210  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
211  const double alpha = 1 , beta = 0 ;
212  const int n = A.graph.row_map.extent(0) - 1 ;
213  const int nz = A.graph.entries.extent(0);
214 
215 #if USE_NEW_SPMV
216  using offset_type = typename matrix_type::graph_type::size_type;
217  using entry_type = typename matrix_type::graph_type::data_type;
218  const cusparseIndexType_t myCusparseOffsetType =
219  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
220  const cusparseIndexType_t myCusparseEntryType =
221  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
222  cusparseSpMatDescr_t A_cusparse;
223  cusparseCreateCsr(
224  &A_cusparse, n, n, nz,
225  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
226  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
227  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
228  cusparseDnVecDescr_t x_cusparse, y_cusparse;
229  cusparseCreateDnVec(&x_cusparse, n, (void*)x.data(), CUDA_R_64F);
230  cusparseCreateDnVec(&y_cusparse, n, (void*)y.data(), CUDA_R_64F);
231  size_t bufferSize = 0;
232  void* dBuffer = NULL;
233  cusparseSpMV_bufferSize(
234  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
235  x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MV_ALG_DEFAULT,
236  &bufferSize);
237  cudaMalloc(&dBuffer, bufferSize);
238  cusparseStatus_t status =
239  cusparseSpMV(s.handle ,
240  CUSPARSE_OPERATION_NON_TRANSPOSE ,
241  &alpha ,
242  A_cusparse ,
243  x_cusparse ,
244  &beta ,
245  y_cusparse ,
246  CUDA_R_64F ,
247  CUSPARSE_MV_ALG_DEFAULT ,
248  dBuffer );
249  cudaFree(dBuffer);
250  cusparseDestroyDnVec(x_cusparse);
251  cusparseDestroyDnVec(y_cusparse);
252  cusparseDestroySpMat(A_cusparse);
253 #else
254  cusparseStatus_t status =
255  cusparseDcsrmv( s.handle ,
256  CUSPARSE_OPERATION_NON_TRANSPOSE ,
257  n , n , nz ,
258  &alpha ,
259  s.descra ,
260  A.values.data() ,
261  A.graph.row_map.data() ,
262  A.graph.entries.data() ,
263  x.data() ,
264  &beta ,
265  y.data() );
266 #endif
267 
268  if ( CUSPARSE_STATUS_SUCCESS != status ) {
269  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
270  }
271  }
272 };
273 
274 template <typename Ordinal>
275 class Multiply<
276  CrsMatrix< float , Kokkos::Cuda > ,
277  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
278  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
279  std::vector<Ordinal> ,
280  IntegralRank<2> >
281 {
282 public:
283  typedef Kokkos::Cuda execution_space;
284  typedef execution_space::size_type size_type;
285  typedef Kokkos::View< float*, Kokkos::LayoutLeft, execution_space > vector_type;
286  typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
287  typedef CrsMatrix< float , execution_space > matrix_type;
288 
289  //--------------------------------------------------------------------------
290 
291  static void apply( const matrix_type & A ,
292  const multi_vector_type & x ,
293  const multi_vector_type & y ,
294  const std::vector<Ordinal> & col_indices )
295  {
296  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
297  const float alpha = 1 , beta = 0 ;
298  const int n = A.graph.row_map.extent(0) - 1 ;
299  const int nz = A.graph.entries.extent(0);
300  const size_t ncol = col_indices.size();
301 
302  // Copy columns of x into a contiguous vector
303  vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
304  vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
305 
306  for (size_t col=0; col<ncol; col++) {
307  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
308  vector_type xx_view = Kokkos::subview( xx , span );
309  vector_type x_col =
310  Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
311  Kokkos::deep_copy(xx_view, x_col);
312  }
313 
314  // Sparse matrix-times-multivector
315 #if USE_NEW_SPMV
316  using offset_type = typename matrix_type::graph_type::size_type;
317  using entry_type = typename matrix_type::graph_type::data_type;
318  const cusparseIndexType_t myCusparseOffsetType =
319  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
320  const cusparseIndexType_t myCusparseEntryType =
321  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
322  cusparseSpMatDescr_t A_cusparse;
323  cusparseCreateCsr(
324  &A_cusparse, n, n, nz,
325  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
326  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
327  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
328  cusparseDnMatDescr_t x_cusparse, y_cusparse;
329  cusparseCreateDnMat(
330  &x_cusparse, n, ncol, n,
331  (void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
332  cusparseCreateDnMat(
333  &y_cusparse, n, ncol, n,
334  (void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
335  size_t bufferSize = 0;
336  void* dBuffer = NULL;
337  cusparseSpMM_bufferSize(
338  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
339  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
340  x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
341  &bufferSize);
342  cudaMalloc(&dBuffer, bufferSize);
343  cusparseStatus_t status =
344  cusparseSpMM(s.handle ,
345  CUSPARSE_OPERATION_NON_TRANSPOSE ,
346  CUSPARSE_OPERATION_NON_TRANSPOSE ,
347  &alpha ,
348  A_cusparse ,
349  x_cusparse ,
350  &beta ,
351  y_cusparse ,
352  CUDA_R_32F ,
353  CUSPARSE_MM_ALG_DEFAULT ,
354  dBuffer );
355  cudaFree(dBuffer);
356  cusparseDestroyDnMat(x_cusparse);
357  cusparseDestroyDnMat(y_cusparse);
358  cusparseDestroySpMat(A_cusparse);
359 #else
360  cusparseStatus_t status =
361  cusparseScsrmm( s.handle ,
362  CUSPARSE_OPERATION_NON_TRANSPOSE ,
363  n , ncol , n , nz ,
364  &alpha ,
365  s.descra ,
366  A.values.data() ,
367  A.graph.row_map.data() ,
368  A.graph.entries.data() ,
369  xx.data() ,
370  n ,
371  &beta ,
372  yy.data() ,
373  n );
374 #endif
375 
376  if ( CUSPARSE_STATUS_SUCCESS != status ) {
377  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
378  }
379 
380  // Copy columns out of continguous multivector
381  for (size_t col=0; col<ncol; col++) {
382  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
383  vector_type yy_view = Kokkos::subview( yy , span );
384  vector_type y_col =
385  Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
386  Kokkos::deep_copy(y_col, yy_view );
387  }
388  }
389 };
390 
391 #define USE_CUSPARSE 1
392 #if USE_CUSPARSE
393 
394 template <typename Ordinal>
395 class Multiply<
396  CrsMatrix< double , Kokkos::Cuda > ,
397  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
398  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
399  std::vector<Ordinal> ,
400  IntegralRank<2> >
401 {
402 public:
403  typedef Kokkos::Cuda execution_space;
404  typedef execution_space::size_type size_type;
405  typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
406  typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
407  typedef CrsMatrix< double , execution_space > matrix_type;
408 
409  //--------------------------------------------------------------------------
410 
411 #define USE_TRANSPOSE 0
412 #if USE_TRANSPOSE
413 
414  // A version that copies the vectors to a transposed 2D view and calls
415  // new CUSPARSE function for transpose layout. Seems to be somewhat
416  // slower????
417 
418  struct GatherTranspose {
419  typedef Kokkos::Cuda execution_space;
420  typedef execution_space::size_type size_type;
421 
422  multi_vector_type m_xt;
423  const multi_vector_type m_x;
424  const Kokkos::View<Ordinal*,execution_space> m_col;
425  const size_type m_ncol;
426 
427  GatherTranspose( multi_vector_type& xt,
428  const multi_vector_type& x,
429  const Kokkos::View<Ordinal*,execution_space>& col ) :
430  m_xt(xt), m_x(x), m_col(col), m_ncol(col.extent(0)) {}
431 
432  __device__
433  inline void operator() (size_type i) const {
434  for (size_type j=0; j<m_ncol; ++j)
435  m_xt(j,i) = m_x(i,m_col(j));
436  }
437 
438  static void apply( multi_vector_type& xt,
439  const multi_vector_type& x,
440  const Kokkos::View<Ordinal*,execution_space>& col ) {
441  const size_type n = x.extent(0);
442  Kokkos::parallel_for( n , GatherTranspose(xt,x,col) );
443  }
444  };
445 
446  static void apply( const matrix_type & A ,
447  const multi_vector_type & x ,
448  const multi_vector_type & y ,
449  const std::vector<Ordinal> & col_indices )
450  {
451  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
452  const double alpha = 1 , beta = 0 ;
453  const int n = A.graph.row_map.extent(0) - 1 ;
454  const int nz = A.graph.entries.extent(0);
455  const size_t ncol = col_indices.size();
456 
457  // Copy col_indices to the device
458  Kokkos::View<Ordinal*,execution_space> col_indices_dev(
459  Kokkos::ViewAllocateWithoutInitializing("col_indices"), ncol);
460  typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
461  Kokkos::create_mirror_view(col_indices_dev);
462  for (size_t i=0; i<ncol; ++i)
463  col_indices_host(i) = col_indices[i];
464  Kokkos::deep_copy(col_indices_dev, col_indices_host);
465 
466  // Copy columns of x into a contiguous multi-vector and transpose
467  multi_vector_type xx(
468  Kokkos::ViewAllocateWithoutInitializing("xx"), ncol , n );
469  GatherTranspose::apply(xx, x, col_indices_dev);
470 
471  // Temporary to store result (this is not transposed)
472  multi_vector_type yy(
473  Kokkos::ViewAllocateWithoutInitializing("yy"), n , ncol );
474 
475  // Sparse matrix-times-multivector
476  cusparseStatus_t status =
477  cusparseDcsrmm2( s.handle ,
478  CUSPARSE_OPERATION_NON_TRANSPOSE ,
479  CUSPARSE_OPERATION_TRANSPOSE ,
480  n , ncol , n , nz ,
481  &alpha ,
482  s.descra ,
483  A.values.data() ,
484  A.graph.row_map.data() ,
485  A.graph.entries.data() ,
486  xx.data() ,
487  ncol ,
488  &beta ,
489  yy.data() ,
490  n );
491 
492  if ( CUSPARSE_STATUS_SUCCESS != status ) {
493  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
494  }
495 
496  // Copy columns out of continguous multivector
497  for (size_t col=0; col<ncol; col++) {
498  vector_type yy_view =
499  Kokkos::subview( yy , Kokkos::ALL(), col );
500  vector_type y_col =
501  Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
502  Kokkos::deep_copy(y_col, yy_view );
503  }
504  }
505 #else
506  static void apply( const matrix_type & A ,
507  const multi_vector_type & x ,
508  const multi_vector_type & y ,
509  const std::vector<Ordinal> & col_indices )
510  {
511  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
512  const double alpha = 1 , beta = 0 ;
513  const int n = A.graph.row_map.extent(0) - 1 ;
514  const int nz = A.graph.entries.extent(0);
515  const size_t ncol = col_indices.size();
516 
517  // Copy columns of x into a contiguous vector
518  vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
519  vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
520 
521  for (size_t col=0; col<ncol; col++) {
522  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
523  vector_type xx_view = Kokkos::subview( xx , span );
524  vector_type x_col =
525  Kokkos::subview( x, Kokkos::ALL(), col_indices[col] );
526  Kokkos::deep_copy(xx_view, x_col);
527  }
528 
529  // Sparse matrix-times-multivector
530 #if USE_NEW_SPMV
531  using offset_type = typename matrix_type::graph_type::size_type;
532  using entry_type = typename matrix_type::graph_type::data_type;
533  const cusparseIndexType_t myCusparseOffsetType =
534  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
535  const cusparseIndexType_t myCusparseEntryType =
536  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
537  cusparseSpMatDescr_t A_cusparse;
538  cusparseCreateCsr(
539  &A_cusparse, n, n, nz,
540  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
541  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
542  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
543  cusparseDnMatDescr_t x_cusparse, y_cusparse;
544  cusparseCreateDnMat(
545  &x_cusparse, n, ncol, n,
546  (void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
547  cusparseCreateDnMat(
548  &y_cusparse, n, ncol, n,
549  (void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
550  size_t bufferSize = 0;
551  void* dBuffer = NULL;
552  cusparseSpMM_bufferSize(
553  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
554  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
555  x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
556  &bufferSize);
557  cudaMalloc(&dBuffer, bufferSize);
558  cusparseStatus_t status =
559  cusparseSpMM(s.handle ,
560  CUSPARSE_OPERATION_NON_TRANSPOSE ,
561  CUSPARSE_OPERATION_NON_TRANSPOSE ,
562  &alpha ,
563  A_cusparse ,
564  x_cusparse ,
565  &beta ,
566  y_cusparse ,
567  CUDA_R_64F ,
568  CUSPARSE_MM_ALG_DEFAULT ,
569  dBuffer );
570  cudaFree(dBuffer);
571  cusparseDestroyDnMat(x_cusparse);
572  cusparseDestroyDnMat(y_cusparse);
573  cusparseDestroySpMat(A_cusparse);
574 #else
575  cusparseStatus_t status =
576  cusparseDcsrmm( s.handle ,
577  CUSPARSE_OPERATION_NON_TRANSPOSE ,
578  n , ncol , n , nz ,
579  &alpha ,
580  s.descra ,
581  A.values.data() ,
582  A.graph.row_map.data() ,
583  A.graph.entries.data() ,
584  xx.data() ,
585  n ,
586  &beta ,
587  yy.data() ,
588  n );
589 #endif
590 
591  if ( CUSPARSE_STATUS_SUCCESS != status ) {
592  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
593  }
594 
595  // Copy columns out of continguous multivector
596  for (size_t col=0; col<ncol; col++) {
597  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
598  vector_type yy_view = Kokkos::subview( yy , span );
599  vector_type y_col =
600  Kokkos::subview( y, Kokkos::ALL(), col_indices[col] );
601  Kokkos::deep_copy(y_col, yy_view );
602  }
603  }
604 #endif
605 };
606 
607 template <>
608 class Multiply<
609  CrsMatrix< float , Kokkos::Cuda > ,
610  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
611  Kokkos::View< float** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
612  void ,
613  IntegralRank<2> >
614 {
615 public:
616  typedef Kokkos::Cuda execution_space;
617  typedef execution_space::size_type size_type;
618  typedef Kokkos::View< float**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
619  typedef CrsMatrix< float , execution_space > matrix_type;
620 
621  //--------------------------------------------------------------------------
622 
623  static void apply( const matrix_type & A ,
624  const multi_vector_type & x ,
625  const multi_vector_type & y )
626  {
627  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
628  const float alpha = 1 , beta = 0 ;
629  const int n = A.graph.row_map.extent(0) - 1 ;
630  const int nz = A.graph.entries.extent(0);
631  const size_t ncol = x.extent(1);
632 
633  // Sparse matrix-times-multivector
634 #if USE_NEW_SPMV
635  using offset_type = typename matrix_type::graph_type::size_type;
636  using entry_type = typename matrix_type::graph_type::data_type;
637  const cusparseIndexType_t myCusparseOffsetType =
638  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
639  const cusparseIndexType_t myCusparseEntryType =
640  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
641  cusparseSpMatDescr_t A_cusparse;
642  cusparseCreateCsr(
643  &A_cusparse, n, n, nz,
644  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
645  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
646  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
647  cusparseDnMatDescr_t x_cusparse, y_cusparse;
648  cusparseCreateDnMat(
649  &x_cusparse, n, ncol, x.stride(1),
650  (void*)x.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
651  cusparseCreateDnMat(
652  &y_cusparse, n, ncol, y.stride(1),
653  (void*)y.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
654  size_t bufferSize = 0;
655  void* dBuffer = NULL;
656  cusparseSpMM_bufferSize(
657  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
658  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
659  x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
660  &bufferSize);
661  cudaMalloc(&dBuffer, bufferSize);
662  cusparseStatus_t status =
663  cusparseSpMM(s.handle ,
664  CUSPARSE_OPERATION_NON_TRANSPOSE ,
665  CUSPARSE_OPERATION_NON_TRANSPOSE ,
666  &alpha ,
667  A_cusparse ,
668  x_cusparse ,
669  &beta ,
670  y_cusparse ,
671  CUDA_R_32F ,
672  CUSPARSE_MM_ALG_DEFAULT ,
673  dBuffer );
674  cudaFree(dBuffer);
675  cusparseDestroyDnMat(x_cusparse);
676  cusparseDestroyDnMat(y_cusparse);
677  cusparseDestroySpMat(A_cusparse);
678 #else
679  cusparseStatus_t status =
680  cusparseScsrmm( s.handle ,
681  CUSPARSE_OPERATION_NON_TRANSPOSE ,
682  n , ncol , n , nz ,
683  &alpha ,
684  s.descra ,
685  A.values.data() ,
686  A.graph.row_map.data() ,
687  A.graph.entries.data() ,
688  x.data() ,
689  n ,
690  &beta ,
691  y.data() ,
692  n );
693 #endif
694 
695  if ( CUSPARSE_STATUS_SUCCESS != status ) {
696  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
697  }
698  }
699 };
700 
701 template <>
702 class Multiply<
703  CrsMatrix< double , Kokkos::Cuda > ,
704  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
705  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
706  void ,
707  IntegralRank<2> >
708 {
709 public:
710  typedef Kokkos::Cuda execution_space;
711  typedef execution_space::size_type size_type;
712  typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
713  typedef CrsMatrix< double , execution_space > matrix_type;
714 
715  //--------------------------------------------------------------------------
716 
717  static void apply( const matrix_type & A ,
718  const multi_vector_type & x ,
719  const multi_vector_type & y )
720  {
721  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
722  const double alpha = 1 , beta = 0 ;
723  const int n = A.graph.row_map.extent(0) - 1 ;
724  const int nz = A.graph.entries.extent(0);
725  const size_t ncol = x.extent(1);
726 
727  // Sparse matrix-times-multivector
728 #if USE_NEW_SPMV
729  using offset_type = typename matrix_type::graph_type::size_type;
730  using entry_type = typename matrix_type::graph_type::data_type;
731  const cusparseIndexType_t myCusparseOffsetType =
732  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
733  const cusparseIndexType_t myCusparseEntryType =
734  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
735  cusparseSpMatDescr_t A_cusparse;
736  cusparseCreateCsr(
737  &A_cusparse, n, n, nz,
738  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
739  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
740  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
741  cusparseDnMatDescr_t x_cusparse, y_cusparse;
742  cusparseCreateDnMat(
743  &x_cusparse, n, ncol, x.stride(1),
744  (void*)x.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
745  cusparseCreateDnMat(
746  &y_cusparse, n, ncol, y.stride(1),
747  (void*)y.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
748  size_t bufferSize = 0;
749  void* dBuffer = NULL;
750  cusparseSpMM_bufferSize(
751  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
752  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
753  x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
754  &bufferSize);
755  cudaMalloc(&dBuffer, bufferSize);
756  cusparseStatus_t status =
757  cusparseSpMM(s.handle ,
758  CUSPARSE_OPERATION_NON_TRANSPOSE ,
759  CUSPARSE_OPERATION_NON_TRANSPOSE ,
760  &alpha ,
761  A_cusparse ,
762  x_cusparse ,
763  &beta ,
764  y_cusparse ,
765  CUDA_R_64F ,
766  CUSPARSE_MM_ALG_DEFAULT ,
767  dBuffer );
768  cudaFree(dBuffer);
769  cusparseDestroyDnMat(x_cusparse);
770  cusparseDestroyDnMat(y_cusparse);
771  cusparseDestroySpMat(A_cusparse);
772 #else
773  cusparseStatus_t status =
774  cusparseDcsrmm( s.handle ,
775  CUSPARSE_OPERATION_NON_TRANSPOSE ,
776  n , ncol , n , nz ,
777  &alpha ,
778  s.descra ,
779  A.values.data() ,
780  A.graph.row_map.data() ,
781  A.graph.entries.data() ,
782  x.data() ,
783  n ,
784  &beta ,
785  y.data() ,
786  n );
787 #endif
788 
789  if ( CUSPARSE_STATUS_SUCCESS != status ) {
790  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
791  }
792  }
793 };
794 
795 #else
796 // Working on creating a version that doesn't copy vectors to a contiguous
797 // 2-D view and doesn't call CUSPARSE. Not done yet.
798 template <typename Ordinal>
799 class Multiply<
800  CrsMatrix< double , Kokkos::Cuda > ,
801  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
802  Kokkos::View< double** , Kokkos::LayoutLeft, Kokkos::Cuda > ,
803  std::vector<Ordinal> ,
804  IntegralRank<2> >
805 {
806 public:
807  typedef Kokkos::Cuda execution_space;
808  typedef execution_space::size_type size_type;
809  typedef Kokkos::View< double*, Kokkos::LayoutLeft, execution_space > vector_type;
810  typedef Kokkos::View< double**, Kokkos::LayoutLeft, execution_space > multi_vector_type;
811  typedef CrsMatrix< double , execution_space > matrix_type;
812  typedef Kokkos::View< size_type*, execution_space > column_indices_type;
813 
814  const matrix_type m_A;
815  const multi_vector_type m_x;
816  multi_vector_type m_y;
817  const column_indices_type m_col;
818  const size_type m_num_col;
819 
820  Multiply( const matrix_type& A,
821  const multi_vector_type& x,
822  multi_vector_type& y,
823  const column_indices_type& col) :
824  m_A(A),
825  m_x(x),
826  m_y(y),
827  m_col(col),
828  m_num_col(col.extent(0)) {}
829 
830  __device__
831  inline void operator() ( const size_type iRow ) const {
832  const size_type iEntryBegin = m_A.graph.row_map[iRow];
833  const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
834 
835  for (size_type j=0; j<m_num_col; j++) {
836  size_type iCol = m_col_indices[j];
837 
838  scalar_type sum = 0.0;
839 
840  for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
841  sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
842  }
843 
844  m_y( iRow, iCol ) = sum;
845 
846  }
847  }
848 
849  //--------------------------------------------------------------------------
850 
851  static void apply( const matrix_type & A ,
852  const multi_vector_type & x ,
853  const multi_vector_type & y ,
854  const std::vector<Ordinal> & col_indices )
855  {
856  // Copy col_indices to the device
857  Kokkos::View<Ordinal*,execution_space> col_indices_dev(
858  Kokkos::ViewAllocateWithoutInitializing("col_indices"), ncol);
859  typename Kokkos::View<Ordinal*,execution_space>::HostMirror col_indices_host =
860  Kokkos::create_mirror_view(col_indices_dev);
861  for (size_t i=0; i<ncol; ++i)
862  col_indices_host(i) = col_indices[i];
863  Kokkos::deep_copy(col_indices_dev, col_indices_host);
864 
865  const size_t n = A.graph.row_map.extent(0) - 1 ;
866  Kokkos::parallel_for( n , Multiply(A,x,y,col_indices_dev) );
867  }
868 };
869 
870 #endif
871 
872 template<>
873 class Multiply<
874  CrsMatrix< float , Kokkos::Cuda > ,
875  std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
876  std::vector< Kokkos::View< float* , Kokkos::Cuda > >,
877  void,
878  IntegralRank<1> >
879 {
880 public:
881  typedef Kokkos::Cuda execution_space ;
882  typedef execution_space::size_type size_type ;
883  typedef Kokkos::View< float* , execution_space > vector_type ;
884  typedef CrsMatrix< float , execution_space > matrix_type ;
885 
886  //--------------------------------------------------------------------------
887 
888  static void apply( const matrix_type & A ,
889  const std::vector<vector_type> & x ,
890  const std::vector<vector_type> & y )
891  {
892  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
893  const float alpha = 1 , beta = 0 ;
894  const int n = A.graph.row_map.extent(0) - 1 ;
895  const int nz = A.graph.entries.extent(0);
896  const size_t ncol = x.size();
897 
898  // Copy columns of x into a contiguous vector
899  vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
900  vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
901 
902  for (size_t col=0; col<ncol; col++) {
903  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
904  vector_type xx_view = Kokkos::subview( xx , span );
905  Kokkos::deep_copy(xx_view, x[col]);
906  }
907 
908  // Sparse matrix-times-multivector
909 #if USE_NEW_SPMV
910  using offset_type = typename matrix_type::graph_type::size_type;
911  using entry_type = typename matrix_type::graph_type::data_type;
912  const cusparseIndexType_t myCusparseOffsetType =
913  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
914  const cusparseIndexType_t myCusparseEntryType =
915  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
916  cusparseSpMatDescr_t A_cusparse;
917  cusparseCreateCsr(
918  &A_cusparse, n, n, nz,
919  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
920  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
921  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
922  cusparseDnMatDescr_t x_cusparse, y_cusparse;
923  cusparseCreateDnMat(
924  &x_cusparse, n, ncol, n,
925  (void*)xx.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
926  cusparseCreateDnMat(
927  &y_cusparse, n, ncol, n,
928  (void*)yy.data(), CUDA_R_32F, CUSPARSE_ORDER_COL);
929  size_t bufferSize = 0;
930  void* dBuffer = NULL;
931  cusparseSpMM_bufferSize(
932  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
933  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
934  x_cusparse, &beta, y_cusparse, CUDA_R_32F, CUSPARSE_MM_ALG_DEFAULT,
935  &bufferSize);
936  cudaMalloc(&dBuffer, bufferSize);
937  cusparseStatus_t status =
938  cusparseSpMM(s.handle ,
939  CUSPARSE_OPERATION_NON_TRANSPOSE ,
940  CUSPARSE_OPERATION_NON_TRANSPOSE ,
941  &alpha ,
942  A_cusparse ,
943  x_cusparse ,
944  &beta ,
945  y_cusparse ,
946  CUDA_R_32F ,
947  CUSPARSE_MM_ALG_DEFAULT ,
948  dBuffer );
949  cudaFree(dBuffer);
950  cusparseDestroyDnMat(x_cusparse);
951  cusparseDestroyDnMat(y_cusparse);
952  cusparseDestroySpMat(A_cusparse);
953 #else
954  cusparseStatus_t status =
955  cusparseScsrmm( s.handle ,
956  CUSPARSE_OPERATION_NON_TRANSPOSE ,
957  n , ncol , n , nz ,
958  &alpha ,
959  s.descra ,
960  A.values.data() ,
961  A.graph.row_map.data() ,
962  A.graph.entries.data() ,
963  xx.data() ,
964  n ,
965  &beta ,
966  yy.data() ,
967  n );
968 #endif
969 
970  if ( CUSPARSE_STATUS_SUCCESS != status ) {
971  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
972  }
973 
974  // Copy columns out of continguous multivector
975  for (size_t col=0; col<ncol; col++) {
976  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
977  vector_type yy_view = Kokkos::subview( yy , span );
978  Kokkos::deep_copy(y[col], yy_view );
979  }
980  }
981 };
982 
983 template<>
984 class Multiply<
985  CrsMatrix< double , Kokkos::Cuda > ,
986  std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
987  std::vector< Kokkos::View< double* , Kokkos::Cuda > >,
988  void,
989  IntegralRank<1> >
990 {
991 public:
992  typedef Kokkos::Cuda execution_space ;
993  typedef execution_space::size_type size_type ;
994  typedef Kokkos::View< double* , execution_space > vector_type ;
995  typedef CrsMatrix< double , execution_space > matrix_type ;
996 
997  //--------------------------------------------------------------------------
998 
999  static void apply( const matrix_type & A ,
1000  const std::vector<vector_type> & x ,
1001  const std::vector<vector_type> & y )
1002  {
1003  CudaSparseSingleton & s = CudaSparseSingleton::singleton();
1004  const double alpha = 1 , beta = 0 ;
1005  const int n = A.graph.row_map.extent(0) - 1 ;
1006  const int nz = A.graph.entries.extent(0);
1007  const size_t ncol = x.size();
1008 
1009  // Copy columns of x into a contiguous vector
1010  vector_type xx( Kokkos::ViewAllocateWithoutInitializing("xx"), n * ncol );
1011  vector_type yy( Kokkos::ViewAllocateWithoutInitializing("yy"), n * ncol );
1012 
1013  for (size_t col=0; col<ncol; col++) {
1014  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1015  vector_type xx_view = Kokkos::subview( xx , span );
1016  Kokkos::deep_copy(xx_view, x[col]);
1017  }
1018 
1019  // Sparse matrix-times-multivector
1020 #if USE_NEW_SPMV
1021  using offset_type = typename matrix_type::graph_type::size_type;
1022  using entry_type = typename matrix_type::graph_type::data_type;
1023  const cusparseIndexType_t myCusparseOffsetType =
1024  sizeof(offset_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1025  const cusparseIndexType_t myCusparseEntryType =
1026  sizeof(entry_type) == 4 ? CUSPARSE_INDEX_32I : CUSPARSE_INDEX_64I;
1027  cusparseSpMatDescr_t A_cusparse;
1028  cusparseCreateCsr(
1029  &A_cusparse, n, n, nz,
1030  (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
1031  (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
1032  CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1033  cusparseDnMatDescr_t x_cusparse, y_cusparse;
1034  cusparseCreateDnMat(
1035  &x_cusparse, n, ncol, n,
1036  (void*)xx.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1037  cusparseCreateDnMat(
1038  &y_cusparse, n, ncol, n,
1039  (void*)yy.data(), CUDA_R_64F, CUSPARSE_ORDER_COL);
1040  size_t bufferSize = 0;
1041  void* dBuffer = NULL;
1042  cusparseSpMM_bufferSize(
1043  s.handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
1044  CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
1045  x_cusparse, &beta, y_cusparse, CUDA_R_64F, CUSPARSE_MM_ALG_DEFAULT,
1046  &bufferSize);
1047  cudaMalloc(&dBuffer, bufferSize);
1048  cusparseStatus_t status =
1049  cusparseSpMM(s.handle ,
1050  CUSPARSE_OPERATION_NON_TRANSPOSE ,
1051  CUSPARSE_OPERATION_NON_TRANSPOSE ,
1052  &alpha ,
1053  A_cusparse ,
1054  x_cusparse ,
1055  &beta ,
1056  y_cusparse ,
1057  CUDA_R_64F ,
1058  CUSPARSE_MM_ALG_DEFAULT ,
1059  dBuffer );
1060  cudaFree(dBuffer);
1061  cusparseDestroyDnMat(x_cusparse);
1062  cusparseDestroyDnMat(y_cusparse);
1063  cusparseDestroySpMat(A_cusparse);
1064 #else
1065  cusparseStatus_t status =
1066  cusparseDcsrmm( s.handle ,
1067  CUSPARSE_OPERATION_NON_TRANSPOSE ,
1068  n , ncol , n , nz ,
1069  &alpha ,
1070  s.descra ,
1071  A.values.data() ,
1072  A.graph.row_map.data() ,
1073  A.graph.entries.data() ,
1074  xx.data() ,
1075  n ,
1076  &beta ,
1077  yy.data() ,
1078  n );
1079 #endif
1080 
1081  if ( CUSPARSE_STATUS_SUCCESS != status ) {
1082  throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
1083  }
1084 
1085  // Copy columns out of continguous multivector
1086  for (size_t col=0; col<ncol; col++) {
1087  const std::pair< size_t , size_t > span( n * col , n * ( col + 1 ) );
1088  vector_type yy_view = Kokkos::subview( yy , span );
1089  Kokkos::deep_copy(y[col], yy_view );
1090  }
1091  }
1092 };
1093 
1094 //----------------------------------------------------------------------------
1095 
1096 } // namespace Stokhos
1097 
1098 #endif /* #ifdef HAVE_STOKHOS_CUSPARSE */
1099 
1100 #endif /* #ifndef STOKHOS_CUDA_CRSMATRIX_HPP */
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
Top-level namespace for Stokhos classes and functions.
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
Stokhos::CrsMatrix< ValueType, Device, Layout >::HostMirror create_mirror_view(const Stokhos::CrsMatrix< ValueType, Device, Layout > &A)