Commit 13b89a71 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Removed volatile reduction completely

parent cbc2fff9
Loading
Loading
Loading
Loading
+17 −34
Original line number Diff line number Diff line
@@ -31,8 +31,7 @@ getVectorMax( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
}

template< typename Device >
@@ -49,8 +48,7 @@ getVectorMin( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a =  TNL::min( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a =  TNL::min( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
}

template< typename Device >
@@ -67,8 +65,7 @@ getVectorAbsMax( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
}

template< typename Device >
@@ -85,8 +82,7 @@ getVectorAbsMin( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
}

template< typename Device >
@@ -103,8 +99,7 @@ getVectorL1Norm( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 );
}

template< typename Device >
@@ -121,8 +116,7 @@ getVectorL2Norm( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  data[ i ] * data[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ) );
}

template< typename Device >
@@ -146,8 +140,7 @@ getVectorLpNorm( const Vector& v,
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  TNL::pow( TNL::abs( data[ i ] ), p ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p );
}

template< typename Device >
@@ -167,8 +160,7 @@ getVectorSum( const Vector& v )
   const auto* data = v.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i )  -> ResultType { return  data[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, ( ResultType ) 0 );
}

template< typename Device >
@@ -188,8 +180,7 @@ getVectorDifferenceMax( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  data1[ i ] - data2[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
}

template< typename Device >
@@ -209,8 +200,7 @@ getVectorDifferenceMin( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  data1[ i ] - data2[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
}

template< typename Device >
@@ -230,8 +220,7 @@ getVectorDifferenceAbsMax( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  TNL::abs( data1[ i ] - data2[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
}

template< typename Device >
@@ -251,8 +240,7 @@ getVectorDifferenceAbsMin( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  TNL::abs( data1[ i ] - data2[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
}

template< typename Device >
@@ -272,8 +260,7 @@ getVectorDifferenceL1Norm( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  TNL::abs( data1[ i ] - data2[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 );
}

template< typename Device >
@@ -296,8 +283,7 @@ getVectorDifferenceL2Norm( const Vector1& v1,
      return diff * diff;
   };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
   return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ) );
}

template< typename Device >
@@ -324,8 +310,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
   return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 ), 1.0 / p );
}

template< typename Device >
@@ -345,8 +330,7 @@ getVectorDifferenceSum( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  data1[ i ] - data2[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 );
}

template< typename Device >
@@ -366,8 +350,7 @@ getScalarProduct( const Vector1& v1,
   const auto* data2 = v2.getData();
   auto fetch = [=] __cuda_callable__ ( IndexType i ) { return  data1[ i ] * data2[ i ]; };
   auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
   return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, ( ResultType ) 0 );
}

} // namespace Benchmarks
+3 −6
Original line number Diff line number Diff line
@@ -135,8 +135,7 @@ compare( const Element1* destination,

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return  ( destination[ i ] == source[ i ] ); };
   auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
   return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
   return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true );
}

template< typename Element,
@@ -153,8 +152,7 @@ containsValue( const Element* data,

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return  ( data[ i ] == value ); };
   auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a |= b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a |= b; };
   return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, false );
   return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, false );
}

template< typename Element,
@@ -171,8 +169,7 @@ containsOnlyValue( const Element* data,

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return  ( data[ i ] == value ); };
   auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
   auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
   return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
   return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true );
}


+0 −6
Original line number Diff line number Diff line
@@ -32,7 +32,6 @@ struct Multireduction< Devices::Host >
    *                 the i-th value to be reduced from the j-th dataset
    *                 (i = 0,...,size-1; j = 0,...,n-1)
    *    reduction: callable object representing the reduction operation
    *    volatileReduction: callable object representing the reduction operation
    *    size: the size of each dataset
    *    n: number of datasets to be reduced
    *    result: output array of size = n
@@ -40,13 +39,11 @@ struct Multireduction< Devices::Host >
   template< typename Result,
             typename DataFetcher,
             typename Reduction,
             typename VolatileReduction,
             typename Index >
   static void
   reduce( const Result zero,
           DataFetcher dataFetcher,
           const Reduction reduction,
           const VolatileReduction volatileReduction,
           const Index size,
           const int n,
           Result* result );
@@ -62,7 +59,6 @@ struct Multireduction< Devices::Cuda >
    *                 the i-th value to be reduced from the j-th dataset
    *                 (i = 0,...,size-1; j = 0,...,n-1)
    *    reduction: callable object representing the reduction operation
    *    volatileReduction: callable object representing the reduction operation
    *    size: the size of each dataset
    *    n: number of datasets to be reduced
    *    hostResult: output array of size = n
@@ -70,13 +66,11 @@ struct Multireduction< Devices::Cuda >
   template< typename Result,
             typename DataFetcher,
             typename Reduction,
             typename VolatileReduction,
             typename Index >
   static void
   reduce( const Result zero,
           DataFetcher dataFetcher,
           const Reduction reduction,
           const VolatileReduction volatileReduction,
           const Index size,
           const int n,
           Result* hostResult );
+1 −5
Original line number Diff line number Diff line
@@ -33,14 +33,12 @@ namespace Algorithms {
template< typename Result,
          typename DataFetcher,
          typename Reduction,
          typename VolatileReduction,
          typename Index >
void
Multireduction< Devices::Host >::
reduce( const Result zero,
        DataFetcher dataFetcher,
        const Reduction reduction,
        const VolatileReduction volatileReduction,
        const Index size,
        const int n,
        Result* result )
@@ -173,14 +171,12 @@ reduce( const Result zero,
template< typename Result,
          typename DataFetcher,
          typename Reduction,
          typename VolatileReduction,
          typename Index >
void
Multireduction< Devices::Cuda >::
reduce( const Result zero,
        DataFetcher dataFetcher,
        const Reduction reduction,
        const VolatileReduction volatileReduction,
        const Index size,
        const int n,
        Result* hostResult )
@@ -218,7 +214,7 @@ reduce( const Result zero,

   // finish the reduction on the host
   auto dataFetcherFinish = [&] ( int i, int k ) { return resultArray[ i + k * reducedSize ]; };
   Multireduction< Devices::Host >::reduce( zero, dataFetcherFinish, reduction, volatileReduction, reducedSize, n, hostResult );
   Multireduction< Devices::Host >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult );

   #ifdef CUDA_REDUCTION_PROFILING
      timer.stop();
+0 −8
Original line number Diff line number Diff line
@@ -30,24 +30,20 @@ struct Reduction< Devices::Host >
   template< typename Index,
             typename Result,
             typename ReductionOperation,
             typename VolatileReductionOperation,
             typename DataFetcher >
   static Result
   reduce( const Index size,
           ReductionOperation& reduction,
           VolatileReductionOperation& volatileReduction,
           DataFetcher& dataFetcher,
           const Result& zero );

   template< typename Index,
             typename Result,
             typename ReductionOperation,
             typename VolatileReductionOperation,
             typename DataFetcher >
   static std::pair< Index, Result >
   reduceWithArgument( const Index size,
                       ReductionOperation& reduction,
                       VolatileReductionOperation& volatileReduction,
                       DataFetcher& dataFetcher,
                       const Result& zero );
};
@@ -58,24 +54,20 @@ struct Reduction< Devices::Cuda >
   template< typename Index,
             typename Result,
             typename ReductionOperation,
             typename VolatileReductionOperation,
             typename DataFetcher >
   static Result
   reduce( const Index size,
           ReductionOperation& reduction,
           VolatileReductionOperation& volatileReduction,
           DataFetcher& dataFetcher,
           const Result& zero );

   template< typename Index,
             typename Result,
             typename ReductionOperation,
             typename VolatileReductionOperation,
             typename DataFetcher >
   static std::pair< Index, Result >
   reduceWithArgument( const Index size,
                       ReductionOperation& reduction,
                       VolatileReductionOperation& volatileReduction,
                       DataFetcher& dataFetcher,
                       const Result& zero );
};
Loading