Commit a24fd066 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Replaced global cuda_callable lambdas with functors to avoid lots of nvcc warnings

Functors can also be better optimized by the host compiler, because nvcc sucks...
parent cd3db363
Loading
Loading
Loading
Loading
+22 −10
Original line number Diff line number Diff line
@@ -21,13 +21,25 @@ namespace Containers {
namespace detail {

////
// Lambdas used together with StaticFor for static loop unrolling in the
// Functors used together with StaticFor for static loop unrolling in the
// implementation of the StaticArray
template< typename LeftValue, typename RightValue = LeftValue >
auto assignArrayLambda = [] __cuda_callable__ ( int i, LeftValue* data, const RightValue* v ) { data[ i ] = v[ i ]; };
struct assignArrayFunctor
{
   __cuda_callable__ void operator()( int i, LeftValue* data, const RightValue* v ) const
   {
      data[ i ] = v[ i ];
   }
};

template< typename LeftValue, typename RightValue = LeftValue >
auto assignValueLambda = [] __cuda_callable__ ( int i, LeftValue* data, const RightValue v ) { data[ i ] = v; };
struct assignValueFunctor
{
   __cuda_callable__ void operator()( int i, LeftValue* data, const RightValue v ) const
   {
      data[ i ] = v;
   }
};

////
// StaticArrayComparator does static loop unrolling of array comparison
@@ -111,21 +123,21 @@ template< int Size, typename Value >
__cuda_callable__
StaticArray< Size, Value >::StaticArray( const Value v[ Size ] )
{
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< Value >, data, v );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< Value >{}, data, v );
}

template< int Size, typename Value >
__cuda_callable__
StaticArray< Size, Value >::StaticArray( const Value& v )
{
   StaticFor< 0, Size >::exec( detail::assignValueLambda< Value >, data, v );
   StaticFor< 0, Size >::exec( detail::assignValueFunctor< Value >{}, data, v );
}

template< int Size, typename Value >
__cuda_callable__
StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v )
{
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< Value >, data, v.getData() );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< Value >{}, data, v.getData() );
}

template< int Size, typename Value >
@@ -246,7 +258,7 @@ template< int Size, typename Value >
__cuda_callable__
StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const StaticArray< Size, Value >& array )
{
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< Value >, data, array.getData() );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< Value >{}, data, array.getData() );
   return *this;
}

@@ -255,7 +267,7 @@ template< int Size, typename Value >
__cuda_callable__
StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const Array& array )
{
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< Value, typename Array::ValueType >, data, array.getData() );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< Value, typename Array::ValueType >{}, data, array.getData() );
   return *this;
}

@@ -282,7 +294,7 @@ StaticArray< Size, Value >::
operator StaticArray< Size, OtherValue >() const
{
   StaticArray< Size, OtherValue > aux;
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< OtherValue, Value >, aux.getData(), data );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< OtherValue, Value >{}, aux.getData(), data );
   return aux;
}

@@ -290,7 +302,7 @@ template< int Size, typename Value >
__cuda_callable__
void StaticArray< Size, Value >::setValue( const ValueType& val )
{
   StaticFor< 0, Size >::exec( detail::assignValueLambda< Value >, data, val );
   StaticFor< 0, Size >::exec( detail::assignValueFunctor< Value >{}, data, val );
}

template< int Size, typename Value >
+27 −9
Original line number Diff line number Diff line
@@ -20,16 +20,34 @@ namespace Containers {
namespace detail {

////
// Lambdas used together with StaticFor for static loop unrolling in the
// Functors used together with StaticFor for static loop unrolling in the
// implementation of the StaticVector
template< typename LeftReal, typename RightReal = LeftReal >
auto addVectorLambda = [] __cuda_callable__ ( int i, LeftReal* data, const RightReal* v ) { data[ i ] += v[ i ]; };
struct addVectorFunctor
{
   void __cuda_callable__ operator()( int i, LeftReal* data, const RightReal* v ) const
   {
      data[ i ] += v[ i ];
   }
};

template< typename LeftReal, typename RightReal = LeftReal >
auto subtractVectorLambda = [] __cuda_callable__ ( int i, LeftReal* data, const RightReal* v ) { data[ i ] -= v[ i ]; };
struct subtractVectorFunctor
{
   void __cuda_callable__ operator()( int i, LeftReal* data, const RightReal* v ) const
   {
      data[ i ] -= v[ i ];
   }
};

template< typename LeftReal, typename RightReal = LeftReal >
auto scalarMultiplicationLambda = [] __cuda_callable__ ( int i, LeftReal* data, const RightReal v ) { data[ i ] *= v; };
struct scalarMultiplicationFunctor
{
   void __cuda_callable__ operator()( int i, LeftReal* data, const RightReal v ) const
   {
      data[ i ] *= v;
   }
};

} // namespace detail

@@ -90,7 +108,7 @@ template< int Size, typename Real >
__cuda_callable__
StaticVector< Size, Real >& StaticVector< Size, Real >::operator+=( const StaticVector& v )
{
   StaticFor< 0, Size >::exec( detail::addVectorLambda< Real >, this->getData(), v.getData() );
   StaticFor< 0, Size >::exec( detail::addVectorFunctor< Real >{}, this->getData(), v.getData() );
   return *this;
}

@@ -98,7 +116,7 @@ template< int Size, typename Real >
__cuda_callable__
StaticVector< Size, Real >& StaticVector< Size, Real >::operator-=( const StaticVector& v )
{
   StaticFor< 0, Size >::exec( detail::subtractVectorLambda< Real >, this->getData(), v.getData() );
   StaticFor< 0, Size >::exec( detail::subtractVectorFunctor< Real >{}, this->getData(), v.getData() );
   return *this;
}

@@ -106,7 +124,7 @@ template< int Size, typename Real >
__cuda_callable__
StaticVector< Size, Real >& StaticVector< Size, Real >::operator*=( const Real& c )
{
   StaticFor< 0, Size >::exec( detail::scalarMultiplicationLambda< Real >, this->getData(), c );
   StaticFor< 0, Size >::exec( detail::scalarMultiplicationFunctor< Real >{}, this->getData(), c );
   return *this;
}

@@ -114,7 +132,7 @@ template< int Size, typename Real >
__cuda_callable__
StaticVector< Size, Real >& StaticVector< Size, Real >::operator/=( const Real& c )
{
   StaticFor< 0, Size >::exec( detail::scalarMultiplicationLambda< Real >, this->getData(), 1.0 / c );
   StaticFor< 0, Size >::exec( detail::scalarMultiplicationFunctor< Real >{}, this->getData(), 1.0 / c );
   return *this;
}

@@ -125,7 +143,7 @@ StaticVector< Size, Real >::
operator StaticVector< Size, OtherReal >() const
{
   StaticVector< Size, OtherReal > aux;
   StaticFor< 0, Size >::exec( detail::assignArrayLambda< OtherReal, Real >, aux.getData(), this->getData() );
   StaticFor< 0, Size >::exec( detail::assignArrayFunctor< OtherReal, Real >{}, aux.getData(), this->getData() );
   return aux;
}