Newer
Older
/***************************************************************************
-------------------
begin : Nov 8, 2012
copyright : (C) 2012 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include <TNL/Exceptions/CudaSupportMissing.h>
Jakub Klinkovský
committed
#include <TNL/Containers/Algorithms/VectorOperations.h>
#include <TNL/Containers/Algorithms/CudaPrefixSumKernel.h>
Jakub Klinkovský
committed
namespace Containers {
Jakub Klinkovský
committed
namespace Algorithms {
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
template< typename Vector, typename ResultType >
ResultType
VectorOperations< Devices::Cuda >::
getVectorSum( const Vector& v )
{
TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
if( std::is_same< ResultType, bool >::value )
abort();
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
return Reduction< Devices::Cuda >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
}
template< Algorithms::PrefixSumType Type,
typename Vector >
void
VectorOperations< Devices::Cuda >::
prefixSum( Vector& v,
typename Vector::IndexType begin,
typename Vector::IndexType end )
{
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
PrefixSum< Devices::Cuda, Type >::perform( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 );
}
template< Algorithms::PrefixSumType Type, typename Vector, typename Flags >
void
VectorOperations< Devices::Cuda >::
segmentedPrefixSum( Vector& v,
Flags& f,
typename Vector::IndexType begin,
typename Vector::IndexType end )
{
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
SegmentedPrefixSum< Devices::Cuda, Type >::perform( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 );
}
Jakub Klinkovský
committed
} // namespace Algorithms