42 #ifndef STOKHOS_MULTIPLY_HPP 43 #define STOKHOS_MULTIPLY_HPP 49 #include "Kokkos_Core.hpp" 60 template <size_t N, bool OK = is_power_of_two<N>::value>
90 template <
typename MatrixType,
91 typename InputVectorType,
92 typename OutputVectorType,
93 typename ColumnIndicesType = void,
98 template <
typename MatrixType,
99 typename InputVectorType,
100 typename OutputVectorType>
102 const InputVectorType&
x,
103 OutputVectorType&
y) {
105 multiply_type::apply( A,
x,
y );
112 template<
class... Ts>
struct make_void {
typedef void type; };
113 template<
class... Ts>
114 using replace_me_with_void_t_in_cxx17 =
115 typename make_void<Ts...>::type;
117 template<
class T,
class = replace_me_with_
void_t_in_cxx17<> >
118 struct const_type_impl {
123 struct const_type_impl<T,
124 replace_me_with_void_t_in_cxx17<typename T::const_type> > {
125 using type =
typename T::const_type;
129 using const_type_t =
typename const_type_impl<T>::type;
133 template <
typename MatrixType,
134 typename InputVectorType,
135 typename OutputVectorType>
137 const InputVectorType&
x,
143 using input_vector_type = const_type_t<InputVectorType>;
144 using multiply_type =
146 multiply_type::apply( A,
x,
y );
149 template <
typename MatrixType,
150 typename InputVectorType,
151 typename OutputVectorType,
152 typename ColumnIndicesType>
154 const InputVectorType&
x,
156 const ColumnIndicesType& col) {
158 multiply_type::apply( A,
x,
y, col );
161 template <
typename MatrixType,
162 typename InputVectorType,
163 typename OutputVectorType,
164 typename ColumnIndicesType>
166 const InputVectorType&
x,
168 const ColumnIndicesType& col,
171 multiply_type::apply( A,
x,
y, col );
182 template <
typename scalar_type,
typename execution_space,
typename size_type>
183 KOKKOS_INLINE_FUNCTION
184 Kokkos::pair<size_type, size_type>
186 const size_type work_count,
187 const size_type thread_count,
188 const size_type thread_rank)
190 #if defined( KOKKOS_ENABLE_CUDA ) 192 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
194 enum { cache_line = 64 };
197 enum { work_align = cache_line /
sizeof(
scalar_type) };
199 enum { work_mask = work_align - 1 };
201 const size_type work_per_thread =
202 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
203 thread_count ) << work_shift ;
205 size_type work_begin = thread_rank * work_per_thread;
206 size_type work_end = work_begin + work_per_thread;
207 if (work_begin > work_count)
208 work_begin = work_count;
209 if (work_end > work_count)
210 work_end = work_count;
212 return Kokkos::make_pair(work_begin, work_end);
217 template <
typename Scalar>
218 KOKKOS_INLINE_FUNCTION
224 template <
typename Scalar>
225 KOKKOS_INLINE_FUNCTION
230 template <
typename Value>
234 template <
typename Scalar>
235 KOKKOS_INLINE_FUNCTION
240 template <
typename Value>
244 template <
typename Scalar>
245 KOKKOS_INLINE_FUNCTION
250 template <
typename Value>
255 template <
typename Scalar>
256 KOKKOS_INLINE_FUNCTION
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledAssign(const Value &a_)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
Top-level namespace for Stokhos classes and functions.
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
MultiplyScaledUpdate(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
IntegralRank< T::Rank > type
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y