Loading src/TNL/Containers/Algorithms/reduction-operations.h +38 −57 Original line number Diff line number Diff line Loading @@ -218,21 +218,17 @@ class tnlParallelReductionSum result += data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result += data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result += data; }; #endif }; template< typename Real, typename Index > Loading Loading @@ -263,21 +259,17 @@ class tnlParallelReductionMin result = tnlCudaMin( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = tnlCudaMin( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = tnlCudaMin( result, data ); }; #endif }; template< typename Real, typename Index > Loading Loading @@ -308,19 +300,17 @@ class tnlParallelReductionMax result = tnlCudaMax( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = tnlCudaMax( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = tnlCudaMax( result, data ); }; #endif }; template< typename Real, typename Index > Loading Loading @@ -351,22 +341,17 @@ class tnlParallelReductionLogicalAnd result = result && data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = result && data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = result && data; }; #endif }; Loading Loading @@ -398,20 +383,17 @@ class tnlParallelReductionLogicalOr result = result || data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = result || data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = result || data; }; #endif }; template< typename Real, typename Index > Loading Loading @@ -909,4 +891,3 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind } // namespace Containers } // namespace TNL Loading
src/TNL/Containers/Algorithms/reduction-operations.h +38 −57 Original line number Diff line number Diff line Loading @@ -218,21 +218,17 @@ class tnlParallelReductionSum result += data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result += data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result += data; }; #endif }; template< typename Real, typename Index > Loading Loading @@ -263,21 +259,17 @@ class tnlParallelReductionMin result = tnlCudaMin( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = tnlCudaMin( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = tnlCudaMin( result, data ); }; #endif }; template< typename Real, typename Index > Loading Loading @@ -308,19 +300,17 @@ class tnlParallelReductionMax result = tnlCudaMax( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = tnlCudaMax( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = tnlCudaMax( result, data ); }; #endif }; template< typename Real, typename Index > Loading Loading @@ -351,22 +341,17 @@ class tnlParallelReductionLogicalAnd result = result && data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = result && data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = result && data; }; #endif }; Loading Loading @@ -398,20 +383,17 @@ class tnlParallelReductionLogicalOr result = result || data1[ index ]; } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, __cuda_callable__ void commonReductionOnDevice( ResultType& result, const ResultType& data ) { result = result || data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, __cuda_callable__ void commonReductionOnDevice( volatile ResultType& result, volatile const ResultType& data ) { result = result || data; }; #endif }; template< typename Real, typename Index > Loading Loading @@ -909,4 +891,3 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind } // namespace Containers } // namespace TNL