Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Stokhos_Tpetra_Utilities.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_TPETRA_UTILITIES_HPP
43 #define STOKHOS_TPETRA_UTILITIES_HPP
44 
47 #include "Tpetra_CrsMatrix.hpp"
48 
49 namespace Stokhos {
50 
52 
55  template <class ViewType>
57  public:
58  typedef ViewType MeanViewType;
60  typedef typename ViewType::size_type size_type;
61 
62  GetMeanValsFunc(const ViewType& vals) {
63  mean_vals = ViewType("mean-values", vals.extent(0));
65  }
66 
67  MeanViewType getMeanValues() const { return mean_vals; }
68 
69  private:
71  };
72 
74 
76  template <class Storage, class ... P>
77  class GetMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
78  P... > > {
79  public:
81  typedef Kokkos::View< Scalar*, P... > ViewType;
84  typedef typename ViewType::size_type size_type;
85 
86  GetMeanValsFunc(const ViewType& vals_) : vals(vals_) {
87  const size_type nnz = vals.extent(0);
88  typename Scalar::cijk_type mean_cijk =
89  Stokhos::create_mean_based_product_tensor<execution_space, typename Storage::ordinal_type, typename Storage::value_type>();
90  mean_vals = Kokkos::make_view<ViewType>("mean-values", mean_cijk, nnz, 1);
91  Kokkos::parallel_for( nnz, *this );
92  }
93 
94  KOKKOS_INLINE_FUNCTION
95  void operator() (const size_type i) const {
96  mean_vals(i) = vals(i).fastAccessCoeff(0);
97  }
98 
99  MeanViewType getMeanValues() const { return mean_vals; }
100 
101  private:
104  };
105 
107 
109  template <class Storage, class ... P>
110  class GetMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
111  P... > > {
112  public:
114  typedef Kokkos::View< Scalar*, P... > ViewType;
117  typedef typename ViewType::size_type size_type;
118 
119  GetMeanValsFunc(const ViewType& vals_) :
120  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
121  {
122  const size_type nnz = vals.extent(0);
123  mean_vals = ViewType("mean-values", nnz, 1);
124  Kokkos::parallel_for( nnz, *this );
125  }
126 
127  KOKKOS_INLINE_FUNCTION
128  void operator() (const size_type i) const
129  {
130  typename Scalar::value_type s = 0.0;
131  for (size_type j=0; j<vec_size; ++j)
132  s += vals(i).fastAccessCoeff(j);
133  mean_vals(i) = s;
134  }
135 
137 
138  private:
142  };
143 
145 
148  template <class ViewType>
150  public:
151  typedef ViewType MeanViewType;
153  typedef typename ViewType::size_type size_type;
154 
155  GetScalarMeanValsFunc(const ViewType& vals) {
156  mean_vals = ViewType("mean-values", vals.extent(0));
157  Kokkos::deep_copy( mean_vals, vals );
158  }
159 
161 
162  private:
164  };
165 
167 
169  template <class Storage, class ... P>
170  class GetScalarMeanValsFunc< Kokkos::View< Sacado::UQ::PCE<Storage>*,
171  P... > > {
172  public:
174  typedef typename Scalar::value_type MeanScalar;
175  typedef Kokkos::View< Scalar*, P... > ViewType;
176  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
178  typedef typename ViewType::size_type size_type;
179 
180  GetScalarMeanValsFunc(const ViewType& vals_) : vals(vals_) {
181  const size_type nnz = vals.extent(0);
182  mean_vals = MeanViewType("mean-values", nnz);
183  Kokkos::parallel_for( nnz, *this );
184  }
185 
186  KOKKOS_INLINE_FUNCTION
187  void operator() (const size_type i) const {
188  mean_vals(i) = vals(i).fastAccessCoeff(0);
189  }
190 
192 
193  private:
196  };
197 
199 
201  template <class Storage, class ... P>
202  class GetScalarMeanValsFunc< Kokkos::View< Sacado::MP::Vector<Storage>*,
203  P... > > {
204  public:
206  typedef typename Scalar::value_type MeanScalar;
207  typedef Kokkos::View< Scalar*, P... > ViewType;
208  typedef Kokkos::View< MeanScalar*, P... > MeanViewType;
210  typedef typename ViewType::size_type size_type;
211 
213  vals(vals_), vec_size(Kokkos::dimension_scalar(vals))
214  {
215  const size_type nnz = vals.extent(0);
216  mean_vals = ViewType("mean-values", nnz);
217  Kokkos::parallel_for( nnz, *this );
218  }
219 
220  KOKKOS_INLINE_FUNCTION
221  void operator() (const size_type i) const
222  {
223  typename Scalar::value_type s = 0.0;
224  for (size_type j=0; j<vec_size; ++j)
225  s += vals(i).fastAccessCoeff(j);
226  mean_vals(i) = s;
227  }
228 
230 
231  private:
235  };
236 
237  template <typename Scalar, typename LO, typename GO, typename N>
238  Teuchos::RCP< Tpetra::CrsMatrix<Scalar,LO,GO,N> >
239  build_mean_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
240  {
241  using Teuchos::RCP;
242  using Teuchos::rcp;
243  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
244  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
245  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
246 
247  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
248  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
249  const size_t ncols = kokkos_matrix.numCols();
251  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
252  MeanFunc meanfunc(matrix_values);
253  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
254 
255  // From here on we are assuming that
256  // KokkosMeanMatrixValuesType == KokkosMatrixValuestype
257 
258  RCP < MatrixType > mean_matrix =
259  rcp( new MatrixType(A.getCrsGraph(), mean_matrix_values) );
260  mean_matrix->fillComplete();
261  return mean_matrix;
262  }
263 
264  template <typename Scalar, typename LO, typename GO, typename N>
265  Teuchos::RCP< Tpetra::CrsMatrix<typename Scalar::value_type,LO,GO,N> >
266  build_mean_scalar_matrix(const Tpetra::CrsMatrix<Scalar,LO,GO,N>& A)
267  {
268  using Teuchos::RCP;
269  using Teuchos::rcp;
270  typedef typename Scalar::value_type BaseScalar;
271  typedef Tpetra::CrsMatrix<Scalar,LO,GO,N> MatrixType;
272  typedef Tpetra::CrsMatrix<BaseScalar,LO,GO,N> ScalarMatrixType;
273  typedef typename MatrixType::local_matrix_device_type KokkosMatrixType;
274  typedef typename ScalarMatrixType::local_matrix_device_type ScalarKokkosMatrixType;
275  typedef typename KokkosMatrixType::values_type KokkosMatrixValuesType;
276 
277  KokkosMatrixType kokkos_matrix = A.getLocalMatrixDevice();
278  KokkosMatrixValuesType matrix_values = kokkos_matrix.values;
280  typedef typename MeanFunc::MeanViewType KokkosMeanMatrixValuesType;
281  MeanFunc meanfunc(matrix_values);
282  KokkosMeanMatrixValuesType mean_matrix_values = meanfunc.getMeanValues();
283 
284  // From here on we are assuming that
285  // KokkosMeanMatrixValuesType == ScalarKokkosMatrixValuesType
286 
287  RCP < ScalarMatrixType > mean_matrix =
288  rcp( new ScalarMatrixType(A.getCrsGraph(), mean_matrix_values) );
289  mean_matrix->fillComplete();
290  return mean_matrix;
291  }
292 
293  namespace Impl {
294 
295  // Functor for copying a PCE view to a scalar view
296  // (Assumes view is rank 2, LayoutLeft)
297  template <typename ExecSpace>
298  struct CopyPCE2Scalar {
299  typedef ExecSpace exec_space;
300  template <typename DstView, typename SrcView>
301  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
302  impl(dst,src);
303  }
304 
305  template <typename DstView, typename SrcView>
306  void impl(const DstView& dst, const SrcView& src) {
307  typedef typename SrcView::non_const_value_type Scalar;
308  const size_t m = src.extent(0);
309  const size_t n = src.extent(1);
310  const size_t p = Kokkos::dimension_scalar(src);
311  Kokkos::RangePolicy<exec_space> policy(0,m);
312  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
313  {
314  for (size_t j=0; j<n; ++j) {
315  const Scalar& s = src(i,j);
316  for (size_t k=0; k<p; ++k)
317  dst(i,j*p+k) = s.fastAccessCoeff(k);
318  }
319  });
320  }
321  };
322 
323  // Functor for copying a scalar view to a PCE view
324  // (Assumes view is rank 2, LayoutLeft)
325  template <typename ExecSpace>
326  struct CopyScalar2PCE {
327  typedef ExecSpace exec_space;
328 
329  template <typename DstView, typename SrcView>
330  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
331  impl(dst,src);
332  }
333 
334  template <typename DstView, typename SrcView>
335  void impl(const DstView& dst, const SrcView& src) {
336  typedef typename DstView::non_const_value_type Scalar;
337  const size_t m = dst.extent(0);
338  const size_t n = dst.extent(1);
339  const size_t p = Kokkos::dimension_scalar(dst);
340 
341  Kokkos::RangePolicy<exec_space> policy(0,m);
342  Kokkos::parallel_for( policy, KOKKOS_LAMBDA(const size_t i)
343  {
344  for (size_t j=0; j<n; ++j) {
345  Scalar& d = dst(i,j);
346  for (size_t k=0; k<p; ++k)
347  d.fastAccessCoeff(k) = src(i,j*p+k);
348  }
349  });
350  }
351  };
352 
353 #ifdef KOKKOS_ENABLE_CUDA
354  // Specialization for CopyPCE2Scalar specifically for Cuda that ensures
355  // coalesced reads and writes
356  template <>
357  struct CopyPCE2Scalar<Kokkos::Cuda> {
358  typedef Kokkos::Cuda exec_space;
359 
360  template <typename DstView, typename SrcView>
361  CopyPCE2Scalar(const DstView& dst, const SrcView& src) {
362  impl(dst,src);
363  }
364 
365  template <typename DstView, typename SrcView>
366  void impl(const DstView& dst, const SrcView& src) {
367  typedef typename DstView::non_const_value_type Scalar;
368  typedef Kokkos::TeamPolicy<exec_space> Policy;
369  typedef typename Policy::member_type Member;
370 
371  const size_t m = src.extent(0);
372  const size_t n = src.extent(1);
373  const size_t p = Kokkos::dimension_scalar(src);
374 
375  const size_t ChunkSize = 16;
376  const size_t M = (m+ChunkSize-1)/ChunkSize;
377 
378  Policy policy(M,ChunkSize,ChunkSize);
379  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
380  {
381  __shared__ Scalar tmp[ChunkSize][ChunkSize];
382  const size_t i_block = blockIdx.x*ChunkSize;
383 
384  for (size_t j=0; j<n; ++j) {
385  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
386 
387  // Make sure previous iteration has completed before overwriting tmp
388  __syncthreads();
389 
390  // Read ChunkSize x ChunkSize block (coalesced on k)
391  size_t i = i_block + threadIdx.y;
392  size_t k = k_block + threadIdx.x;
393  if (i < m && k < p)
394  tmp[threadIdx.y][threadIdx.x] = src(i,j).fastAccessCoeff(k);
395 
396  // Wait for all threads to finish
397  __syncthreads();
398 
399  // Write ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
400  i = i_block + threadIdx.x;
401  k = k_block + threadIdx.y;
402  if (i < m && k < p)
403  dst(i,j*p+k) = tmp[threadIdx.x][threadIdx.y];
404 
405  }
406  }
407  });
408  }
409  };
410 
411  // Specialization for Scalar2PCE specifically for Cuda that ensures
412  // coalesced reads and writes
413  template <>
414  struct CopyScalar2PCE<Kokkos::Cuda> {
415  typedef Kokkos::Cuda exec_space;
416 
417  template <typename DstView, typename SrcView>
418  CopyScalar2PCE(const DstView& dst, const SrcView& src) {
419  impl(dst,src);
420  }
421 
422  template <typename DstView, typename SrcView>
423  void impl(const DstView& dst, const SrcView& src) {
424  typedef typename SrcView::non_const_value_type Scalar;
425  typedef Kokkos::TeamPolicy<exec_space> Policy;
426  typedef typename Policy::member_type Member;
427 
428  const size_t m = dst.extent(0);
429  const size_t n = dst.extent(1);
430  const size_t p = Kokkos::dimension_scalar(dst);
431 
432  const size_t ChunkSize = 16;
433  const size_t M = (m+ChunkSize-1)/ChunkSize;
434 
435  Policy policy(M,ChunkSize,ChunkSize);
436  Kokkos::parallel_for( policy, [=] __device__(const Member& team)
437  {
438  __shared__ Scalar tmp[ChunkSize][ChunkSize];
439  const size_t i_block = blockIdx.x*ChunkSize;
440 
441  for (size_t j=0; j<n; ++j) {
442  for (size_t k_block=0; k_block<p; k_block+=ChunkSize) {
443 
444  // Make sure previous iteration has completed before overwriting tmp
445  __syncthreads();
446 
447  // Read ChunkSize x ChunkSize block (coalesced on i for LayoutLeft)
448  size_t i = i_block + threadIdx.x;
449  size_t k = k_block + threadIdx.y;
450  if (i < m && k < p)
451  tmp[threadIdx.x][threadIdx.y] = src(i,j*p+k);
452 
453  // Wait for all threads to finish
454  __syncthreads();
455 
456  // Write ChunkSize x ChunkSize block (coalesced on k)
457  i = i_block + threadIdx.y;
458  k = k_block + threadIdx.x;
459  if (i < m && k < p)
460  dst(i,j).fastAccessCoeff(k) = tmp[threadIdx.y][threadIdx.x];
461 
462  }
463  }
464  });
465  }
466  };
467 #endif
468 
469  } // Impl
470 
471  template <typename DstView, typename SrcView>
472  void copy_pce_to_scalar(const DstView& dst, const SrcView& src)
473  {
475  }
476 
477  template <typename DstView, typename SrcView>
478  void copy_scalar_to_pce(const DstView& dst, const SrcView& src)
479  {
481  }
482 
483  // Tpetra operator wrapper allowing a mean0-based operator (with double
484  // scalar type) to be applied to a UQ::PCE multi-vector
485  template <typename Scalar,
486  typename LocalOrdinal,
487  typename GlobalOrdinal,
488  typename Node>
490  virtual public Tpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> {
491  public:
495  typedef Node node_type;
497  typedef Tpetra::Operator<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_op_type;
498 
499  MeanBasedTpetraOperator(const Teuchos::RCP<const scalar_op_type>& mb_op_) :
500  mb_op(mb_op_) {}
501 
503 
504  virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
505  getDomainMap() const {
506  return mb_op->getDomainMap();
507  }
508 
509  virtual Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
510  getRangeMap() const {
511  return mb_op->getRangeMap();
512  }
513 
514  virtual void
515  apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
516  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
517  Teuchos::ETransp mode = Teuchos::NO_TRANS,
518  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(),
519  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const
520  {
521  typedef typename scalar_mv_type::device_type device_type;
522 
523  auto xv = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
524  auto yv = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
525  const size_t pce_size = Kokkos::dimension_scalar(xv);
526  if (X_s == Teuchos::null ||
527  X_s->getNumVectors() != X.getNumVectors()*pce_size)
528  X_s = Teuchos::rcp(new scalar_mv_type(X.getMap(),
529  X.getNumVectors()*pce_size));
530  if (Y_s == Teuchos::null ||
531  Y_s->getNumVectors() != Y.getNumVectors()*pce_size)
532  Y_s = Teuchos::rcp(new scalar_mv_type(Y.getMap(),
533  Y.getNumVectors()*pce_size));
534  base_scalar_type alpha_s = alpha.fastAccessCoeff(0);
535  base_scalar_type beta_s = beta.fastAccessCoeff(0);
536 
537  {
538  auto xv_s = X_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
539  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadWrite);
540 
541  copy_pce_to_scalar(xv_s, xv);
542  if (beta_s != 0.0)
543  copy_pce_to_scalar(yv_s, yv);
544  }
545 
546  mb_op->apply(*X_s, *Y_s, mode, alpha_s, beta_s);
547 
548  {
549  auto yv_s = Y_s->getLocalViewDevice(Tpetra::Access::ReadOnly);
550  copy_scalar_to_pce(yv, yv_s);
551  }
552  }
553 
554  virtual bool hasTransposeApply() const {
555  return mb_op->hasTransposeApply();
556  }
557 
558  private:
559 
560  typedef Tpetra::MultiVector<base_scalar_type,LocalOrdinal,GlobalOrdinal,Node> scalar_mv_type;
561  mutable Teuchos::RCP<scalar_mv_type> X_s, Y_s;
562  Teuchos::RCP<const scalar_op_type> mb_op;
563 
564  };
565 
566 }
567 
568 #endif // STOKHOS_TPETRA_UTILITIES_HPP
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, N > > build_mean_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
void impl(const DstView &dst, const SrcView &src)
Stokhos::StandardStorage< int, double > Storage
CopyPCE2Scalar(const DstView &dst, const SrcView &src)
Kokkos::DefaultExecutionSpace execution_space
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const
void copy_pce_to_scalar(const DstView &dst, const SrcView &src)
void impl(const DstView &dst, const SrcView &src)
Teuchos::RCP< scalar_mv_type > X_s
Teuchos::RCP< const scalar_op_type > mb_op
Teuchos::RCP< Tpetra::CrsMatrix< typename Scalar::value_type, LO, GO, N > > build_mean_scalar_matrix(const Tpetra::CrsMatrix< Scalar, LO, GO, N > &A)
Tpetra::MultiVector< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_mv_type
GetMeanValsFunc(const ViewType &vals)
Top-level namespace for Stokhos classes and functions.
KokkosClassic::DefaultNode::DefaultNodeType Node
ViewType::execution_space execution_space
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Get mean values matrix for mean-based preconditioning.
MeanBasedTpetraOperator(const Teuchos::RCP< const scalar_op_type > &mb_op_)
Get mean values matrix for mean-based preconditioning.
void copy_scalar_to_pce(const DstView &dst, const SrcView &src)
CopyScalar2PCE(const DstView &dst, const SrcView &src)
ViewType::execution_space execution_space
virtual void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
virtual Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const
Tpetra::Operator< base_scalar_type, LocalOrdinal, GlobalOrdinal, Node > scalar_op_type
Teuchos::RCP< scalar_mv_type > Y_s