From 15dcbd438ec93bd6dd9679f5cab9b36395f5bfed Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Thu, 10 Dec 2015 23:00:19 +0100 Subject: [PATCH] Optimizing CUDA L2 norm. --- src/core/cuda/CMakeLists.txt | 2 +- src/core/cuda/cuda-prefix-sum_impl.cu | 16 +- src/core/cuda/cuda-prefix-sum_impl.h | 26 +- src/core/cuda/cuda-reduction-abs-max_impl.cu | 20 +- src/core/cuda/cuda-reduction-abs-min_impl.cu | 20 +- src/core/cuda/cuda-reduction-abs-sum_impl.cu | 20 +- src/core/cuda/cuda-reduction-and_impl.cu | 20 +- .../cuda/cuda-reduction-diff-abs-max_impl.cu | 20 +- .../cuda/cuda-reduction-diff-abs-min_impl.cu | 20 +- .../cuda/cuda-reduction-diff-abs-sum_impl.cu | 20 +- .../cuda/cuda-reduction-diff-l2-norm_impl.cu | 16 +- .../cuda/cuda-reduction-diff-lp-norm_impl.cu | 16 +- src/core/cuda/cuda-reduction-diff-max_impl.cu | 20 +- src/core/cuda/cuda-reduction-diff-min_impl.cu | 20 +- src/core/cuda/cuda-reduction-diff-sum_impl.cu | 20 +- .../cuda/cuda-reduction-equalities_impl.cu | 20 +- .../cuda/cuda-reduction-inequalities_impl.cu | 20 +- src/core/cuda/cuda-reduction-l2-norm_impl.cu | 16 +- src/core/cuda/cuda-reduction-lp-norm_impl.cu | 14 +- src/core/cuda/cuda-reduction-max_impl.cu | 20 +- src/core/cuda/cuda-reduction-min_impl.cu | 20 +- src/core/cuda/cuda-reduction-or_impl.cu | 20 +- .../cuda-reduction-scalar-product_impl.cu | 20 +- src/core/cuda/cuda-reduction-sum_impl.cu | 22 +- src/core/cuda/cuda-reduction_impl.h | 10 +- src/core/cuda/reduction-operations.h | 162 ++++----- src/core/cuda/tnlCudaReduction.h | 4 +- src/core/cuda/tnlCudaReduction_impl.h | 8 +- src/core/vectors/CMakeLists.txt | 15 +- .../vectors/tnlVectorOperationsCuda_impl.cpp | 328 ++++++++++++++++++ ...mpl.cu => tnlVectorOperationsCuda_impl.cu} | 53 ++- ...l.cpp => tnlVectorOperationsHost_impl.cpp} | 307 +++------------- tests/benchmarks/tnl-cuda-benchmarks.h | 61 +++- 33 files changed, 816 insertions(+), 580 deletions(-) create mode 100644 src/core/vectors/tnlVectorOperationsCuda_impl.cpp rename src/core/vectors/{tnlVectorOperations_impl.cu => tnlVectorOperationsCuda_impl.cu} (85%) rename src/core/vectors/{tnlVectorOperations_impl.cpp => tnlVectorOperationsHost_impl.cpp} (52%) diff --git a/src/core/cuda/CMakeLists.txt b/src/core/cuda/CMakeLists.txt index b1e0aababd..815e594637 100755 --- a/src/core/cuda/CMakeLists.txt +++ b/src/core/cuda/CMakeLists.txt @@ -34,7 +34,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu ${CURRENT_DIR}/cuda-prefix-sum_impl.cu - PARENT_SCOPE ) + PARENT_SCOPE ) endif() set( tnl_core_cuda_SOURCES diff --git a/src/core/cuda/cuda-prefix-sum_impl.cu b/src/core/cuda/cuda-prefix-sum_impl.cu index 74f3e85fb7..58393298d2 100644 --- a/src/core/cuda/cuda-prefix-sum_impl.cu +++ b/src/core/cuda/cuda-prefix-sum_impl.cu @@ -23,7 +23,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, - const tnlParallelReductionSum< int, int >& operation, + tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); @@ -32,7 +32,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, - const tnlParallelReductionSum< float, int >& operation, + tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); #endif @@ -40,7 +40,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, - const tnlParallelReductionSum< double, int >& operation, + tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE @@ -48,7 +48,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, - const tnlParallelReductionSum< long double, int >& operation, + tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif @@ -57,7 +57,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, - const tnlParallelReductionSum< int, long int >& operation, + tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); @@ -66,7 +66,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, - const tnlParallelReductionSum< float, long int >& operation, + tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); #endif @@ -74,7 +74,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, - const tnlParallelReductionSum< double, long int >& operation, + tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE @@ -82,7 +82,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, - const tnlParallelReductionSum< long double, long int >& operation, + tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif diff --git a/src/core/cuda/cuda-prefix-sum_impl.h b/src/core/cuda/cuda-prefix-sum_impl.h index ad2d02aa33..2c5644cc20 100644 --- a/src/core/cuda/cuda-prefix-sum_impl.h +++ b/src/core/cuda/cuda-prefix-sum_impl.h @@ -30,7 +30,7 @@ template< typename DataType, typename Operation, typename Index > __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumType, - const Operation operation, + Operation operation, const Index size, const Index elementsInBlock, const DataType* input, @@ -159,7 +159,7 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT template< typename DataType, typename Operation, typename Index > -__global__ void cudaSecondPhaseBlockPrefixSum( const Operation operation, +__global__ void cudaSecondPhaseBlockPrefixSum( Operation operation, const Index size, const Index elementsInBlock, const Index gridShift, @@ -186,7 +186,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, - const Operation& operation, + Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, @@ -276,7 +276,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaGridPrefixSum( enumPrefixSumType prefixSumType, - const Operation& operation, + Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, @@ -313,7 +313,7 @@ bool cudaPrefixSum( const Index size, const Index blockSize, const DataType *deviceInput, DataType* deviceOutput, - const Operation& operation, + Operation& operation, const enumPrefixSumType prefixSumType ) { /**** @@ -361,7 +361,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, - const tnlParallelReductionSum< int, int >& operation, + tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); @@ -369,14 +369,14 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, - const tnlParallelReductionSum< float, int >& operation, + tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, - const tnlParallelReductionSum< double, int >& operation, + tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE @@ -384,7 +384,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, - const tnlParallelReductionSum< long double, int >& operation, + tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif @@ -393,7 +393,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, - const tnlParallelReductionSum< int, long int >& operation, + tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); @@ -401,14 +401,14 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, - const tnlParallelReductionSum< float, long int >& operation, + tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, - const tnlParallelReductionSum< double, long int >& operation, + tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE @@ -416,7 +416,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, - const tnlParallelReductionSum< long double, long int >& operation, + tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif diff --git a/src/core/cuda/cuda-reduction-abs-max_impl.cu b/src/core/cuda/cuda-reduction-abs-max_impl.cu index 8540fc71a3..e69a225ada 100644 --- a/src/core/cuda/cuda-reduction-abs-max_impl.cu +++ b/src/core/cuda/cuda-reduction-abs-max_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > > - ( const tnlParallelReductionAbsMax< char, int >& operation, + ( tnlParallelReductionAbsMax< char, int >& operation, const typename tnlParallelReductionAbsMax< char, int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > > - ( const tnlParallelReductionAbsMax< int, int >& operation, + ( tnlParallelReductionAbsMax< int, int >& operation, const typename tnlParallelReductionAbsMax< int, int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > > - ( const tnlParallelReductionAbsMax< float, int >& operation, + ( tnlParallelReductionAbsMax< float, int >& operation, const typename tnlParallelReductionAbsMax< float, int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > - ( const tnlParallelReductionAbsMax< double, int>& operation, + ( tnlParallelReductionAbsMax< double, int>& operation, const typename tnlParallelReductionAbsMax< double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > > - ( const tnlParallelReductionAbsMax< long double, int>& operation, + ( tnlParallelReductionAbsMax< long double, int>& operation, const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2, @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > - ( const tnlParallelReductionAbsMax< char, long int >& operation, + ( tnlParallelReductionAbsMax< char, long int >& operation, const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2, @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > > - ( const tnlParallelReductionAbsMax< int, long int >& operation, + ( tnlParallelReductionAbsMax< int, long int >& operation, const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > > - ( const tnlParallelReductionAbsMax< float, long int >& operation, + ( tnlParallelReductionAbsMax< float, long int >& operation, const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > > - ( const tnlParallelReductionAbsMax< double, long int>& operation, + ( tnlParallelReductionAbsMax< double, long int>& operation, const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > > - ( const tnlParallelReductionAbsMax< long double, long int>& operation, + ( tnlParallelReductionAbsMax< long double, long int>& operation, const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-abs-min_impl.cu b/src/core/cuda/cuda-reduction-abs-min_impl.cu index 629fa37ddc..21da19efa4 100644 --- a/src/core/cuda/cuda-reduction-abs-min_impl.cu +++ b/src/core/cuda/cuda-reduction-abs-min_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > > - ( const tnlParallelReductionAbsMin< char, int >& operation, + ( tnlParallelReductionAbsMin< char, int >& operation, const typename tnlParallelReductionAbsMin< char, int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > > - ( const tnlParallelReductionAbsMin< int, int >& operation, + ( tnlParallelReductionAbsMin< int, int >& operation, const typename tnlParallelReductionAbsMin< int, int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > > - ( const tnlParallelReductionAbsMin< float, int >& operation, + ( tnlParallelReductionAbsMin< float, int >& operation, const typename tnlParallelReductionAbsMin< float, int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > - ( const tnlParallelReductionAbsMin< double, int>& operation, + ( tnlParallelReductionAbsMin< double, int>& operation, const typename tnlParallelReductionAbsMin< double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > > - ( const tnlParallelReductionAbsMin< long double, int>& operation, + ( tnlParallelReductionAbsMin< long double, int>& operation, const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2, @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > > - ( const tnlParallelReductionAbsMin< char, long int >& operation, + ( tnlParallelReductionAbsMin< char, long int >& operation, const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2, @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > > - ( const tnlParallelReductionAbsMin< int, long int >& operation, + ( tnlParallelReductionAbsMin< int, long int >& operation, const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > > - ( const tnlParallelReductionAbsMin< float, long int >& operation, + ( tnlParallelReductionAbsMin< float, long int >& operation, const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > > - ( const tnlParallelReductionAbsMin< double, long int>& operation, + ( tnlParallelReductionAbsMin< double, long int>& operation, const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > > - ( const tnlParallelReductionAbsMin< long double, long int>& operation, + ( tnlParallelReductionAbsMin< long double, long int>& operation, const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-abs-sum_impl.cu b/src/core/cuda/cuda-reduction-abs-sum_impl.cu index a023631a03..96c8ab7a9c 100644 --- a/src/core/cuda/cuda-reduction-abs-sum_impl.cu +++ b/src/core/cuda/cuda-reduction-abs-sum_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > > - ( const tnlParallelReductionAbsSum< char, int >& operation, + ( tnlParallelReductionAbsSum< char, int >& operation, const typename tnlParallelReductionAbsSum< char, int > :: IndexType size, const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsSum< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > > - ( const tnlParallelReductionAbsSum< int, int >& operation, + ( tnlParallelReductionAbsSum< int, int >& operation, const typename tnlParallelReductionAbsSum< int, int > :: IndexType size, const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsSum< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > > - ( const tnlParallelReductionAbsSum< float, int >& operation, + ( tnlParallelReductionAbsSum< float, int >& operation, const typename tnlParallelReductionAbsSum< float, int > :: IndexType size, const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsSum< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > > - ( const tnlParallelReductionAbsSum< double, int>& operation, + ( tnlParallelReductionAbsSum< double, int>& operation, const typename tnlParallelReductionAbsSum< double, int > :: IndexType size, const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > > - ( const tnlParallelReductionAbsSum< long double, int>& operation, + ( tnlParallelReductionAbsSum< long double, int>& operation, const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size, const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput2, @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > > - ( const tnlParallelReductionAbsSum< char, long int >& operation, + ( tnlParallelReductionAbsSum< char, long int >& operation, const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size, const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput2, @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > > - ( const tnlParallelReductionAbsSum< int, long int >& operation, + ( tnlParallelReductionAbsSum< int, long int >& operation, const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size, const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > > - ( const tnlParallelReductionAbsSum< float, long int >& operation, + ( tnlParallelReductionAbsSum< float, long int >& operation, const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size, const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > > - ( const tnlParallelReductionAbsSum< double, long int>& operation, + ( tnlParallelReductionAbsSum< double, long int>& operation, const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size, const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > > - ( const tnlParallelReductionAbsSum< long double, long int>& operation, + ( tnlParallelReductionAbsSum< long double, long int>& operation, const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-and_impl.cu b/src/core/cuda/cuda-reduction-and_impl.cu index ac71e46e19..1baa1699f1 100644 --- a/src/core/cuda/cuda-reduction-and_impl.cu +++ b/src/core/cuda/cuda-reduction-and_impl.cu @@ -24,28 +24,28 @@ * Logical AND */ template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > > - ( const tnlParallelReductionLogicalAnd< char, int >& operation, + ( tnlParallelReductionLogicalAnd< char, int >& operation, const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > > - ( const tnlParallelReductionLogicalAnd< int, int >& operation, + ( tnlParallelReductionLogicalAnd< int, int >& operation, const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > > - ( const tnlParallelReductionLogicalAnd< float, int >& operation, + ( tnlParallelReductionLogicalAnd< float, int >& operation, const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > > - ( const tnlParallelReductionLogicalAnd< double, int>& operation, + ( tnlParallelReductionLogicalAnd< double, int>& operation, const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput2, @@ -53,7 +53,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > > - ( const tnlParallelReductionLogicalAnd< long double, int>& operation, + ( tnlParallelReductionLogicalAnd< long double, int>& operation, const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput2, @@ -62,28 +62,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > > - ( const tnlParallelReductionLogicalAnd< char, long int >& operation, + ( tnlParallelReductionLogicalAnd< char, long int >& operation, const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > > - ( const tnlParallelReductionLogicalAnd< int, long int >& operation, + ( tnlParallelReductionLogicalAnd< int, long int >& operation, const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > > - ( const tnlParallelReductionLogicalAnd< float, long int >& operation, + ( tnlParallelReductionLogicalAnd< float, long int >& operation, const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > > - ( const tnlParallelReductionLogicalAnd< double, long int>& operation, + ( tnlParallelReductionLogicalAnd< double, long int>& operation, const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput2, @@ -91,7 +91,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > > - ( const tnlParallelReductionLogicalAnd< long double, long int>& operation, + ( tnlParallelReductionLogicalAnd< long double, long int>& operation, const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size, const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-abs-max_impl.cu b/src/core/cuda/cuda-reduction-diff-abs-max_impl.cu index 291810ef74..a65ec09dee 100644 --- a/src/core/cuda/cuda-reduction-diff-abs-max_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-abs-max_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > > - ( const tnlParallelReductionDiffAbsMax< char, int >& operation, + ( tnlParallelReductionDiffAbsMax< char, int >& operation, const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > > - ( const tnlParallelReductionDiffAbsMax< int, int >& operation, + ( tnlParallelReductionDiffAbsMax< int, int >& operation, const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > > - ( const tnlParallelReductionDiffAbsMax< float, int >& operation, + ( tnlParallelReductionDiffAbsMax< float, int >& operation, const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > > - ( const tnlParallelReductionDiffAbsMax< double, int>& operation, + ( tnlParallelReductionDiffAbsMax< double, int>& operation, const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > > - ( const tnlParallelReductionDiffAbsMax< long double, int>& operation, + ( tnlParallelReductionDiffAbsMax< long double, int>& operation, const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > > - ( const tnlParallelReductionDiffAbsMax< char, long int >& operation, + ( tnlParallelReductionDiffAbsMax< char, long int >& operation, const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > > - ( const tnlParallelReductionDiffAbsMax< int, long int >& operation, + ( tnlParallelReductionDiffAbsMax< int, long int >& operation, const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > > - ( const tnlParallelReductionDiffAbsMax< float, long int >& operation, + ( tnlParallelReductionDiffAbsMax< float, long int >& operation, const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > > - ( const tnlParallelReductionDiffAbsMax< double, long int>& operation, + ( tnlParallelReductionDiffAbsMax< double, long int>& operation, const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > > - ( const tnlParallelReductionDiffAbsMax< long double, long int>& operation, + ( tnlParallelReductionDiffAbsMax< long double, long int>& operation, const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-abs-min_impl.cu b/src/core/cuda/cuda-reduction-diff-abs-min_impl.cu index d9ce714abd..9610ee2675 100644 --- a/src/core/cuda/cuda-reduction-diff-abs-min_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-abs-min_impl.cu @@ -26,28 +26,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > > - ( const tnlParallelReductionDiffAbsMin< char, int >& operation, + ( tnlParallelReductionDiffAbsMin< char, int >& operation, const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > > - ( const tnlParallelReductionDiffAbsMin< int, int >& operation, + ( tnlParallelReductionDiffAbsMin< int, int >& operation, const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > > - ( const tnlParallelReductionDiffAbsMin< float, int >& operation, + ( tnlParallelReductionDiffAbsMin< float, int >& operation, const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > > - ( const tnlParallelReductionDiffAbsMin< double, int>& operation, + ( tnlParallelReductionDiffAbsMin< double, int>& operation, const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput2, @@ -55,7 +55,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > > - ( const tnlParallelReductionDiffAbsMin< long double, int>& operation, + ( tnlParallelReductionDiffAbsMin< long double, int>& operation, const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput2, @@ -64,28 +64,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > > - ( const tnlParallelReductionDiffAbsMin< char, long int >& operation, + ( tnlParallelReductionDiffAbsMin< char, long int >& operation, const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > > - ( const tnlParallelReductionDiffAbsMin< int, long int >& operation, + ( tnlParallelReductionDiffAbsMin< int, long int >& operation, const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > > - ( const tnlParallelReductionDiffAbsMin< float, long int >& operation, + ( tnlParallelReductionDiffAbsMin< float, long int >& operation, const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > > - ( const tnlParallelReductionDiffAbsMin< double, long int>& operation, + ( tnlParallelReductionDiffAbsMin< double, long int>& operation, const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput2, @@ -93,7 +93,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > > - ( const tnlParallelReductionDiffAbsMin< long double, long int>& operation, + ( tnlParallelReductionDiffAbsMin< long double, long int>& operation, const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-abs-sum_impl.cu b/src/core/cuda/cuda-reduction-diff-abs-sum_impl.cu index 5298d03349..b70f953add 100644 --- a/src/core/cuda/cuda-reduction-diff-abs-sum_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-abs-sum_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > > - ( const tnlParallelReductionDiffAbsSum< char, int >& operation, + ( tnlParallelReductionDiffAbsSum< char, int >& operation, const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > > - ( const tnlParallelReductionDiffAbsSum< int, int >& operation, + ( tnlParallelReductionDiffAbsSum< int, int >& operation, const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > > - ( const tnlParallelReductionDiffAbsSum< float, int >& operation, + ( tnlParallelReductionDiffAbsSum< float, int >& operation, const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > > - ( const tnlParallelReductionDiffAbsSum< double, int>& operation, + ( tnlParallelReductionDiffAbsSum< double, int>& operation, const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > > - ( const tnlParallelReductionDiffAbsSum< long double, int>& operation, + ( tnlParallelReductionDiffAbsSum< long double, int>& operation, const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > > - ( const tnlParallelReductionDiffAbsSum< char, long int >& operation, + ( tnlParallelReductionDiffAbsSum< char, long int >& operation, const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > > - ( const tnlParallelReductionDiffAbsSum< int, long int >& operation, + ( tnlParallelReductionDiffAbsSum< int, long int >& operation, const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > > - ( const tnlParallelReductionDiffAbsSum< float, long int >& operation, + ( tnlParallelReductionDiffAbsSum< float, long int >& operation, const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > > - ( const tnlParallelReductionDiffAbsSum< double, long int>& operation, + ( tnlParallelReductionDiffAbsSum< double, long int>& operation, const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > > - ( const tnlParallelReductionDiffAbsSum< long double, long int>& operation, + ( tnlParallelReductionDiffAbsSum< long double, long int>& operation, const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu b/src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu index 4f3e95b749..a6bfae5b94 100644 --- a/src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-l2-norm_impl.cu @@ -24,14 +24,14 @@ * Diff L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > > - ( const tnlParallelReductionDiffL2Norm< float, int >& operation, + ( tnlParallelReductionDiffL2Norm< float, int >& operation, const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > > - ( const tnlParallelReductionDiffL2Norm< double, int>& operation, + ( tnlParallelReductionDiffL2Norm< double, int>& operation, const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2, @@ -39,7 +39,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > > - ( const tnlParallelReductionDiffL2Norm< long double, int>& operation, + ( tnlParallelReductionDiffL2Norm< long double, int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2, @@ -48,28 +48,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > > - ( const tnlParallelReductionDiffL2Norm< char, long int >& operation, + ( tnlParallelReductionDiffL2Norm< char, long int >& operation, const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > > - ( const tnlParallelReductionDiffL2Norm< int, long int >& operation, + ( tnlParallelReductionDiffL2Norm< int, long int >& operation, const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > > - ( const tnlParallelReductionDiffL2Norm< float, long int >& operation, + ( tnlParallelReductionDiffL2Norm< float, long int >& operation, const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > > - ( const tnlParallelReductionDiffL2Norm< double, long int>& operation, + ( tnlParallelReductionDiffL2Norm< double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2, @@ -77,7 +77,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > > - ( const tnlParallelReductionDiffL2Norm< long double, long int>& operation, + ( tnlParallelReductionDiffL2Norm< long double, long int>& operation, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-lp-norm_impl.cu b/src/core/cuda/cuda-reduction-diff-lp-norm_impl.cu index 2359564477..a762c1d194 100644 --- a/src/core/cuda/cuda-reduction-diff-lp-norm_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-lp-norm_impl.cu @@ -24,14 +24,14 @@ * Diff Lp Norm */ template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > > - ( const tnlParallelReductionDiffLpNorm< float, int >& operation, + ( tnlParallelReductionDiffLpNorm< float, int >& operation, const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > > - ( const tnlParallelReductionDiffLpNorm< double, int>& operation, + ( tnlParallelReductionDiffLpNorm< double, int>& operation, const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput2, @@ -39,7 +39,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > > - ( const tnlParallelReductionDiffLpNorm< long double, int>& operation, + ( tnlParallelReductionDiffLpNorm< long double, int>& operation, const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput2, @@ -48,28 +48,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > > - ( const tnlParallelReductionDiffLpNorm< char, long int >& operation, + ( tnlParallelReductionDiffLpNorm< char, long int >& operation, const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > > - ( const tnlParallelReductionDiffLpNorm< int, long int >& operation, + ( tnlParallelReductionDiffLpNorm< int, long int >& operation, const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > > - ( const tnlParallelReductionDiffLpNorm< float, long int >& operation, + ( tnlParallelReductionDiffLpNorm< float, long int >& operation, const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > > - ( const tnlParallelReductionDiffLpNorm< double, long int>& operation, + ( tnlParallelReductionDiffLpNorm< double, long int>& operation, const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput2, @@ -77,7 +77,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > > - ( const tnlParallelReductionDiffLpNorm< long double, long int>& operation, + ( tnlParallelReductionDiffLpNorm< long double, long int>& operation, const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-max_impl.cu b/src/core/cuda/cuda-reduction-diff-max_impl.cu index fe91ae6ef6..822d9e779c 100644 --- a/src/core/cuda/cuda-reduction-diff-max_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-max_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > > - ( const tnlParallelReductionDiffMax< char, int >& operation, + ( tnlParallelReductionDiffMax< char, int >& operation, const typename tnlParallelReductionDiffMax< char, int > :: IndexType size, const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > > - ( const tnlParallelReductionDiffMax< int, int >& operation, + ( tnlParallelReductionDiffMax< int, int >& operation, const typename tnlParallelReductionDiffMax< int, int > :: IndexType size, const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > > - ( const tnlParallelReductionDiffMax< float, int >& operation, + ( tnlParallelReductionDiffMax< float, int >& operation, const typename tnlParallelReductionDiffMax< float, int > :: IndexType size, const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > > - ( const tnlParallelReductionDiffMax< double, int>& operation, + ( tnlParallelReductionDiffMax< double, int>& operation, const typename tnlParallelReductionDiffMax< double, int > :: IndexType size, const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > > - ( const tnlParallelReductionDiffMax< long double, int>& operation, + ( tnlParallelReductionDiffMax< long double, int>& operation, const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size, const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, i #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > > - ( const tnlParallelReductionDiffMax< char, long int >& operation, + ( tnlParallelReductionDiffMax< char, long int >& operation, const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size, const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > > - ( const tnlParallelReductionDiffMax< int, long int >& operation, + ( tnlParallelReductionDiffMax< int, long int >& operation, const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size, const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > > - ( const tnlParallelReductionDiffMax< float, long int >& operation, + ( tnlParallelReductionDiffMax< float, long int >& operation, const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size, const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > > - ( const tnlParallelReductionDiffMax< double, long int>& operation, + ( tnlParallelReductionDiffMax< double, long int>& operation, const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size, const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long i #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > > - ( const tnlParallelReductionDiffMax< long double, long int>& operation, + ( tnlParallelReductionDiffMax< long double, long int>& operation, const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-min_impl.cu b/src/core/cuda/cuda-reduction-diff-min_impl.cu index ed13335b8b..5973ad0bcf 100644 --- a/src/core/cuda/cuda-reduction-diff-min_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-min_impl.cu @@ -26,28 +26,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > > - ( const tnlParallelReductionDiffMin< char, int >& operation, + ( tnlParallelReductionDiffMin< char, int >& operation, const typename tnlParallelReductionDiffMin< char, int > :: IndexType size, const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > > - ( const tnlParallelReductionDiffMin< int, int >& operation, + ( tnlParallelReductionDiffMin< int, int >& operation, const typename tnlParallelReductionDiffMin< int, int > :: IndexType size, const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > > - ( const tnlParallelReductionDiffMin< float, int >& operation, + ( tnlParallelReductionDiffMin< float, int >& operation, const typename tnlParallelReductionDiffMin< float, int > :: IndexType size, const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > > - ( const tnlParallelReductionDiffMin< double, int>& operation, + ( tnlParallelReductionDiffMin< double, int>& operation, const typename tnlParallelReductionDiffMin< double, int > :: IndexType size, const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput2, @@ -55,7 +55,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > > - ( const tnlParallelReductionDiffMin< long double, int>& operation, + ( tnlParallelReductionDiffMin< long double, int>& operation, const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size, const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput2, @@ -64,28 +64,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, i #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > > - ( const tnlParallelReductionDiffMin< char, long int >& operation, + ( tnlParallelReductionDiffMin< char, long int >& operation, const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size, const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > > - ( const tnlParallelReductionDiffMin< int, long int >& operation, + ( tnlParallelReductionDiffMin< int, long int >& operation, const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size, const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > > - ( const tnlParallelReductionDiffMin< float, long int >& operation, + ( tnlParallelReductionDiffMin< float, long int >& operation, const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size, const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > > - ( const tnlParallelReductionDiffMin< double, long int>& operation, + ( tnlParallelReductionDiffMin< double, long int>& operation, const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size, const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput2, @@ -93,7 +93,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long i #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > > - ( const tnlParallelReductionDiffMin< long double, long int>& operation, + ( tnlParallelReductionDiffMin< long double, long int>& operation, const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-diff-sum_impl.cu b/src/core/cuda/cuda-reduction-diff-sum_impl.cu index aa08778ea7..8f2eb27587 100644 --- a/src/core/cuda/cuda-reduction-diff-sum_impl.cu +++ b/src/core/cuda/cuda-reduction-diff-sum_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > > - ( const tnlParallelReductionDiffSum< char, int >& operation, + ( tnlParallelReductionDiffSum< char, int >& operation, const typename tnlParallelReductionDiffSum< char, int > :: IndexType size, const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > > - ( const tnlParallelReductionDiffSum< int, int >& operation, + ( tnlParallelReductionDiffSum< int, int >& operation, const typename tnlParallelReductionDiffSum< int, int > :: IndexType size, const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > > - ( const tnlParallelReductionDiffSum< float, int >& operation, + ( tnlParallelReductionDiffSum< float, int >& operation, const typename tnlParallelReductionDiffSum< float, int > :: IndexType size, const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > > - ( const tnlParallelReductionDiffSum< double, int>& operation, + ( tnlParallelReductionDiffSum< double, int>& operation, const typename tnlParallelReductionDiffSum< double, int > :: IndexType size, const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > > - ( const tnlParallelReductionDiffSum< long double, int>& operation, + ( tnlParallelReductionDiffSum< long double, int>& operation, const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size, const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, i #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > > - ( const tnlParallelReductionDiffSum< char, long int >& operation, + ( tnlParallelReductionDiffSum< char, long int >& operation, const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size, const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > > - ( const tnlParallelReductionDiffSum< int, long int >& operation, + ( tnlParallelReductionDiffSum< int, long int >& operation, const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size, const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > > - ( const tnlParallelReductionDiffSum< float, long int >& operation, + ( tnlParallelReductionDiffSum< float, long int >& operation, const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size, const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > > - ( const tnlParallelReductionDiffSum< double, long int>& operation, + ( tnlParallelReductionDiffSum< double, long int>& operation, const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size, const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long i #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > > - ( const tnlParallelReductionDiffSum< long double, long int>& operation, + ( tnlParallelReductionDiffSum< long double, long int>& operation, const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size, const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-equalities_impl.cu b/src/core/cuda/cuda-reduction-equalities_impl.cu index 6bf7f02630..398012099e 100644 --- a/src/core/cuda/cuda-reduction-equalities_impl.cu +++ b/src/core/cuda/cuda-reduction-equalities_impl.cu @@ -24,28 +24,28 @@ * Equalities */ template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > > - ( const tnlParallelReductionEqualities< char, int >& operation, + ( tnlParallelReductionEqualities< char, int >& operation, const typename tnlParallelReductionEqualities< char, int > :: IndexType size, const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > > - ( const tnlParallelReductionEqualities< int, int >& operation, + ( tnlParallelReductionEqualities< int, int >& operation, const typename tnlParallelReductionEqualities< int, int > :: IndexType size, const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > > - ( const tnlParallelReductionEqualities< float, int >& operation, + ( tnlParallelReductionEqualities< float, int >& operation, const typename tnlParallelReductionEqualities< float, int > :: IndexType size, const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > > - ( const tnlParallelReductionEqualities< double, int>& operation, + ( tnlParallelReductionEqualities< double, int>& operation, const typename tnlParallelReductionEqualities< double, int > :: IndexType size, const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput2, @@ -53,7 +53,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > > - ( const tnlParallelReductionEqualities< long double, int>& operation, + ( tnlParallelReductionEqualities< long double, int>& operation, const typename tnlParallelReductionEqualities< long double, int > :: IndexType size, const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput2, @@ -62,28 +62,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > > - ( const tnlParallelReductionEqualities< char, long int >& operation, + ( tnlParallelReductionEqualities< char, long int >& operation, const typename tnlParallelReductionEqualities< char, long int > :: IndexType size, const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > > - ( const tnlParallelReductionEqualities< int, long int >& operation, + ( tnlParallelReductionEqualities< int, long int >& operation, const typename tnlParallelReductionEqualities< int, long int > :: IndexType size, const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > > - ( const tnlParallelReductionEqualities< float, long int >& operation, + ( tnlParallelReductionEqualities< float, long int >& operation, const typename tnlParallelReductionEqualities< float, long int > :: IndexType size, const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionEqualities< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > > - ( const tnlParallelReductionEqualities< double, long int>& operation, + ( tnlParallelReductionEqualities< double, long int>& operation, const typename tnlParallelReductionEqualities< double, long int > :: IndexType size, const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput2, @@ -91,7 +91,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, lon #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > > - ( const tnlParallelReductionEqualities< long double, long int>& operation, + ( tnlParallelReductionEqualities< long double, long int>& operation, const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size, const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-inequalities_impl.cu b/src/core/cuda/cuda-reduction-inequalities_impl.cu index 828c88af10..479d9b2a31 100644 --- a/src/core/cuda/cuda-reduction-inequalities_impl.cu +++ b/src/core/cuda/cuda-reduction-inequalities_impl.cu @@ -24,28 +24,28 @@ * Inequalities */ template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > > - ( const tnlParallelReductionInequalities< char, int >& operation, + ( tnlParallelReductionInequalities< char, int >& operation, const typename tnlParallelReductionInequalities< char, int > :: IndexType size, const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > > - ( const tnlParallelReductionInequalities< int, int >& operation, + ( tnlParallelReductionInequalities< int, int >& operation, const typename tnlParallelReductionInequalities< int, int > :: IndexType size, const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > > - ( const tnlParallelReductionInequalities< float, int >& operation, + ( tnlParallelReductionInequalities< float, int >& operation, const typename tnlParallelReductionInequalities< float, int > :: IndexType size, const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > > - ( const tnlParallelReductionInequalities< double, int>& operation, + ( tnlParallelReductionInequalities< double, int>& operation, const typename tnlParallelReductionInequalities< double, int > :: IndexType size, const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput2, @@ -53,7 +53,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, i #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > > - ( const tnlParallelReductionInequalities< long double, int>& operation, + ( tnlParallelReductionInequalities< long double, int>& operation, const typename tnlParallelReductionInequalities< long double, int > :: IndexType size, const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput2, @@ -62,28 +62,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long doub #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > > - ( const tnlParallelReductionInequalities< char, long int >& operation, + ( tnlParallelReductionInequalities< char, long int >& operation, const typename tnlParallelReductionInequalities< char, long int > :: IndexType size, const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > > - ( const tnlParallelReductionInequalities< int, long int >& operation, + ( tnlParallelReductionInequalities< int, long int >& operation, const typename tnlParallelReductionInequalities< int, long int > :: IndexType size, const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > > - ( const tnlParallelReductionInequalities< float, long int >& operation, + ( tnlParallelReductionInequalities< float, long int >& operation, const typename tnlParallelReductionInequalities< float, long int > :: IndexType size, const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionInequalities< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > > - ( const tnlParallelReductionInequalities< double, long int>& operation, + ( tnlParallelReductionInequalities< double, long int>& operation, const typename tnlParallelReductionInequalities< double, long int > :: IndexType size, const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput2, @@ -91,7 +91,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, l #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > > - ( const tnlParallelReductionInequalities< long double, long int>& operation, + ( tnlParallelReductionInequalities< long double, long int>& operation, const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size, const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-l2-norm_impl.cu b/src/core/cuda/cuda-reduction-l2-norm_impl.cu index 29e6b265c1..214a15b53b 100644 --- a/src/core/cuda/cuda-reduction-l2-norm_impl.cu +++ b/src/core/cuda/cuda-reduction-l2-norm_impl.cu @@ -1,5 +1,5 @@ /*************************************************************************** - cuda-reduction-lp-norm_impl.cu - description + cuda-reduction-l2-norm_impl.cu - description ------------------- begin : Jan 19, 2014 copyright : (C) 2014 by Tomas Oberhuber @@ -24,14 +24,14 @@ * L2 Norm */ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > > - ( const tnlParallelReductionL2Norm< float, int >& operation, + ( tnlParallelReductionL2Norm< float, int >& operation, const typename tnlParallelReductionL2Norm< float, int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > > - ( const tnlParallelReductionL2Norm< double, int>& operation, + ( tnlParallelReductionL2Norm< double, int>& operation, const typename tnlParallelReductionL2Norm< double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2, @@ -39,7 +39,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > > - ( const tnlParallelReductionL2Norm< long double, int>& operation, + ( tnlParallelReductionL2Norm< long double, int>& operation, const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2, @@ -48,21 +48,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, in #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > > - ( const tnlParallelReductionL2Norm< int, long int >& operation, + ( tnlParallelReductionL2Norm< int, long int >& operation, const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > > - ( const tnlParallelReductionL2Norm< float, long int >& operation, + ( tnlParallelReductionL2Norm< float, long int >& operation, const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > > - ( const tnlParallelReductionL2Norm< double, long int>& operation, + ( tnlParallelReductionL2Norm< double, long int>& operation, const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2, @@ -70,7 +70,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > > - ( const tnlParallelReductionL2Norm< long double, long int>& operation, + ( tnlParallelReductionL2Norm< long double, long int>& operation, const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-lp-norm_impl.cu b/src/core/cuda/cuda-reduction-lp-norm_impl.cu index a5f5d6644c..4de9501590 100644 --- a/src/core/cuda/cuda-reduction-lp-norm_impl.cu +++ b/src/core/cuda/cuda-reduction-lp-norm_impl.cu @@ -24,14 +24,14 @@ * Lp Norm */ template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > > - ( const tnlParallelReductionLpNorm< float, int >& operation, + ( tnlParallelReductionLpNorm< float, int >& operation, const typename tnlParallelReductionLpNorm< float, int > :: IndexType size, const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput2, typename tnlParallelReductionLpNorm< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > > - ( const tnlParallelReductionLpNorm< double, int>& operation, + ( tnlParallelReductionLpNorm< double, int>& operation, const typename tnlParallelReductionLpNorm< double, int > :: IndexType size, const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput2, @@ -39,7 +39,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > > - ( const tnlParallelReductionLpNorm< long double, int>& operation, + ( tnlParallelReductionLpNorm< long double, int>& operation, const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size, const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput2, @@ -48,21 +48,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, in #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > > - ( const tnlParallelReductionLpNorm< int, long int >& operation, + ( tnlParallelReductionLpNorm< int, long int >& operation, const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size, const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionLpNorm< int, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > > - ( const tnlParallelReductionLpNorm< float, long int >& operation, + ( tnlParallelReductionLpNorm< float, long int >& operation, const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size, const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > > - ( const tnlParallelReductionLpNorm< double, long int>& operation, + ( tnlParallelReductionLpNorm< double, long int>& operation, const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size, const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput2, @@ -70,7 +70,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > > - ( const tnlParallelReductionLpNorm< long double, long int>& operation, + ( tnlParallelReductionLpNorm< long double, long int>& operation, const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size, const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-max_impl.cu b/src/core/cuda/cuda-reduction-max_impl.cu index cba153c81b..cfca4156f9 100644 --- a/src/core/cuda/cuda-reduction-max_impl.cu +++ b/src/core/cuda/cuda-reduction-max_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > > - ( const tnlParallelReductionMax< char, int >& operation, + ( tnlParallelReductionMax< char, int >& operation, const typename tnlParallelReductionMax< char, int > :: IndexType size, const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > > - ( const tnlParallelReductionMax< int, int >& operation, + ( tnlParallelReductionMax< int, int >& operation, const typename tnlParallelReductionMax< int, int > :: IndexType size, const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > > - ( const tnlParallelReductionMax< float, int >& operation, + ( tnlParallelReductionMax< float, int >& operation, const typename tnlParallelReductionMax< float, int > :: IndexType size, const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > > - ( const tnlParallelReductionMax< double, int>& operation, + ( tnlParallelReductionMax< double, int>& operation, const typename tnlParallelReductionMax< double, int > :: IndexType size, const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > > - ( const tnlParallelReductionMax< long double, int>& operation, + ( tnlParallelReductionMax< long double, int>& operation, const typename tnlParallelReductionMax< long double, int > :: IndexType size, const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > > - ( const tnlParallelReductionMax< char, long int >& operation, + ( tnlParallelReductionMax< char, long int >& operation, const typename tnlParallelReductionMax< char, long int > :: IndexType size, const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionMax< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > > - ( const tnlParallelReductionMax< int, long int >& operation, + ( tnlParallelReductionMax< int, long int >& operation, const typename tnlParallelReductionMax< int, long int > :: IndexType size, const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > > - ( const tnlParallelReductionMax< float, long int >& operation, + ( tnlParallelReductionMax< float, long int >& operation, const typename tnlParallelReductionMax< float, long int > :: IndexType size, const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > > - ( const tnlParallelReductionMax< double, long int>& operation, + ( tnlParallelReductionMax< double, long int>& operation, const typename tnlParallelReductionMax< double, long int > :: IndexType size, const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > > - ( const tnlParallelReductionMax< long double, long int>& operation, + ( tnlParallelReductionMax< long double, long int>& operation, const typename tnlParallelReductionMax< long double, long int > :: IndexType size, const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-min_impl.cu b/src/core/cuda/cuda-reduction-min_impl.cu index dc5a1f4140..535b38a32f 100644 --- a/src/core/cuda/cuda-reduction-min_impl.cu +++ b/src/core/cuda/cuda-reduction-min_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > > - ( const tnlParallelReductionMin< char, int >& operation, + ( tnlParallelReductionMin< char, int >& operation, const typename tnlParallelReductionMin< char, int > :: IndexType size, const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > > - ( const tnlParallelReductionMin< int, int >& operation, + ( tnlParallelReductionMin< int, int >& operation, const typename tnlParallelReductionMin< int, int > :: IndexType size, const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > > - ( const tnlParallelReductionMin< float, int >& operation, + ( tnlParallelReductionMin< float, int >& operation, const typename tnlParallelReductionMin< float, int > :: IndexType size, const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > > - ( const tnlParallelReductionMin< double, int>& operation, + ( tnlParallelReductionMin< double, int>& operation, const typename tnlParallelReductionMin< double, int > :: IndexType size, const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > > - ( const tnlParallelReductionMin< long double, int>& operation, + ( tnlParallelReductionMin< long double, int>& operation, const typename tnlParallelReductionMin< long double, int > :: IndexType size, const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > > - ( const tnlParallelReductionMin< char, long int >& operation, + ( tnlParallelReductionMin< char, long int >& operation, const typename tnlParallelReductionMin< char, long int > :: IndexType size, const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionMin< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > > - ( const tnlParallelReductionMin< int, long int >& operation, + ( tnlParallelReductionMin< int, long int >& operation, const typename tnlParallelReductionMin< int, long int > :: IndexType size, const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > > - ( const tnlParallelReductionMin< float, long int >& operation, + ( tnlParallelReductionMin< float, long int >& operation, const typename tnlParallelReductionMin< float, long int > :: IndexType size, const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > > - ( const tnlParallelReductionMin< double, long int>& operation, + ( tnlParallelReductionMin< double, long int>& operation, const typename tnlParallelReductionMin< double, long int > :: IndexType size, const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput2, @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > > - ( const tnlParallelReductionMin< long double, long int>& operation, + ( tnlParallelReductionMin< long double, long int>& operation, const typename tnlParallelReductionMin< long double, long int > :: IndexType size, const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-or_impl.cu b/src/core/cuda/cuda-reduction-or_impl.cu index 811ec445fd..f312e5ff45 100644 --- a/src/core/cuda/cuda-reduction-or_impl.cu +++ b/src/core/cuda/cuda-reduction-or_impl.cu @@ -24,28 +24,28 @@ * Logical OR */ template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > > - ( const tnlParallelReductionLogicalOr< char, int >& operation, + ( tnlParallelReductionLogicalOr< char, int >& operation, const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size, const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > > - ( const tnlParallelReductionLogicalOr< int, int >& operation, + ( tnlParallelReductionLogicalOr< int, int >& operation, const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size, const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > > - ( const tnlParallelReductionLogicalOr< float, int >& operation, + ( tnlParallelReductionLogicalOr< float, int >& operation, const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size, const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > > - ( const tnlParallelReductionLogicalOr< double, int>& operation, + ( tnlParallelReductionLogicalOr< double, int>& operation, const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size, const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput2, @@ -53,7 +53,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > > - ( const tnlParallelReductionLogicalOr< long double, int>& operation, + ( tnlParallelReductionLogicalOr< long double, int>& operation, const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size, const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput2, @@ -62,28 +62,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > > - ( const tnlParallelReductionLogicalOr< char, long int >& operation, + ( tnlParallelReductionLogicalOr< char, long int >& operation, const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size, const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > > - ( const tnlParallelReductionLogicalOr< int, long int >& operation, + ( tnlParallelReductionLogicalOr< int, long int >& operation, const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size, const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > > - ( const tnlParallelReductionLogicalOr< float, long int >& operation, + ( tnlParallelReductionLogicalOr< float, long int >& operation, const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size, const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > > - ( const tnlParallelReductionLogicalOr< double, long int>& operation, + ( tnlParallelReductionLogicalOr< double, long int>& operation, const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size, const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput2, @@ -91,7 +91,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > > - ( const tnlParallelReductionLogicalOr< long double, long int>& operation, + ( tnlParallelReductionLogicalOr< long double, long int>& operation, const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size, const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-scalar-product_impl.cu b/src/core/cuda/cuda-reduction-scalar-product_impl.cu index 082d65540a..6de27958bd 100644 --- a/src/core/cuda/cuda-reduction-scalar-product_impl.cu +++ b/src/core/cuda/cuda-reduction-scalar-product_impl.cu @@ -24,28 +24,28 @@ * ScalarProduct */ template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > > - ( const tnlParallelReductionScalarProduct< char, int >& operation, + ( tnlParallelReductionScalarProduct< char, int >& operation, const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size, const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > > - ( const tnlParallelReductionScalarProduct< int, int >& operation, + ( tnlParallelReductionScalarProduct< int, int >& operation, const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size, const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > > - ( const tnlParallelReductionScalarProduct< float, int >& operation, + ( tnlParallelReductionScalarProduct< float, int >& operation, const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size, const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > > - ( const tnlParallelReductionScalarProduct< double, int>& operation, + ( tnlParallelReductionScalarProduct< double, int>& operation, const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size, const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput2, @@ -53,7 +53,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > > - ( const tnlParallelReductionScalarProduct< long double, int>& operation, + ( tnlParallelReductionScalarProduct< long double, int>& operation, const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size, const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput2, @@ -62,28 +62,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long dou #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > > - ( const tnlParallelReductionScalarProduct< char, long int >& operation, + ( tnlParallelReductionScalarProduct< char, long int >& operation, const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size, const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > > - ( const tnlParallelReductionScalarProduct< int, long int >& operation, + ( tnlParallelReductionScalarProduct< int, long int >& operation, const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size, const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > > - ( const tnlParallelReductionScalarProduct< float, long int >& operation, + ( tnlParallelReductionScalarProduct< float, long int >& operation, const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size, const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > > - ( const tnlParallelReductionScalarProduct< double, long int>& operation, + ( tnlParallelReductionScalarProduct< double, long int>& operation, const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size, const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput2, @@ -91,7 +91,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > > - ( const tnlParallelReductionScalarProduct< long double, long int>& operation, + ( tnlParallelReductionScalarProduct< long double, long int>& operation, const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size, const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput2, diff --git a/src/core/cuda/cuda-reduction-sum_impl.cu b/src/core/cuda/cuda-reduction-sum_impl.cu index 8447ea5f00..62ed0a2beb 100644 --- a/src/core/cuda/cuda-reduction-sum_impl.cu +++ b/src/core/cuda/cuda-reduction-sum_impl.cu @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > > - ( const tnlParallelReductionSum< char, int >& operation, + ( tnlParallelReductionSum< char, int >& operation, const typename tnlParallelReductionSum< char, int > :: IndexType size, const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput2, typename tnlParallelReductionSum< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > > - ( const tnlParallelReductionSum< int, int >& operation, + ( tnlParallelReductionSum< int, int >& operation, const typename tnlParallelReductionSum< int, int > :: IndexType size, const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput2, typename tnlParallelReductionSum< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > > - ( const tnlParallelReductionSum< float, int >& operation, + ( tnlParallelReductionSum< float, int >& operation, const typename tnlParallelReductionSum< float, int > :: IndexType size, const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput2, typename tnlParallelReductionSum< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > > - ( const tnlParallelReductionSum< double, int>& operation, + ( tnlParallelReductionSum< double, int>& operation, const typename tnlParallelReductionSum< double, int > :: IndexType size, const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput2, @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > > - ( const tnlParallelReductionSum< long double, int>& operation, + ( tnlParallelReductionSum< long double, int>& operation, const typename tnlParallelReductionSum< long double, int > :: IndexType size, const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput2, @@ -63,28 +63,28 @@ template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > > - ( const tnlParallelReductionSum< char, long int >& operation, + ( tnlParallelReductionSum< char, long int >& operation, const typename tnlParallelReductionSum< char, long int > :: IndexType size, const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput2, typename tnlParallelReductionSum< char, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > > - ( const tnlParallelReductionSum< int, long int >& operation, + ( tnlParallelReductionSum< int, long int >& operation, const typename tnlParallelReductionSum< int, long int > :: IndexType size, const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionSum< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > > - ( const tnlParallelReductionSum< float, long int >& operation, + ( tnlParallelReductionSum< float, long int >& operation, const typename tnlParallelReductionSum< float, long int > :: IndexType size, const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionSum< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > > - ( const tnlParallelReductionSum< double, long int>& operation, + ( tnlParallelReductionSum< double, long int>& operation, const typename tnlParallelReductionSum< double, long int > :: IndexType size, const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput2, @@ -92,11 +92,11 @@ template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > > - ( const tnlParallelReductionSum< long double, long int>& operation, + ( tnlParallelReductionSum< long double, long int>& operation, const typename tnlParallelReductionSum< long double, long int > :: IndexType size, const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput2, typename tnlParallelReductionSum< long double, long int> :: ResultType& result ); #endif #endif -#endif \ No newline at end of file +#endif diff --git a/src/core/cuda/cuda-reduction_impl.h b/src/core/cuda/cuda-reduction_impl.h index 389d166ca9..e8795ec7bc 100644 --- a/src/core/cuda/cuda-reduction_impl.h +++ b/src/core/cuda/cuda-reduction_impl.h @@ -42,14 +42,14 @@ using namespace std; * are reduced on CPU. The constant must not be larger * than maximal CUDA grid size. */ -const int minGPUReductionDataSize = 128;//65536; //16384;//1024;//256; +const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256; //static tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize ); #ifdef HAVE_CUDA template< typename Operation, int blockSize > -__global__ void tnlCUDAReductionKernel( const Operation operation, +__global__ void tnlCUDAReductionKernel( Operation operation, const typename Operation :: IndexType size, const typename Operation :: RealType* input1, const typename Operation :: RealType* input2, @@ -60,7 +60,7 @@ __global__ void tnlCUDAReductionKernel( const Operation operation, }; template< typename Operation > -typename Operation::IndexType reduceOnCudaDevice( const Operation& operation, +typename Operation::IndexType reduceOnCudaDevice( Operation& operation, const typename Operation::IndexType size, const typename Operation::RealType* input1, const typename Operation::RealType* input2, @@ -135,7 +135,7 @@ typename Operation::IndexType reduceOnCudaDevice( const Operation& operation, #endif template< typename Operation > -bool reductionOnCudaDevice( const Operation& operation, +bool reductionOnCudaDevice( Operation& operation, const typename Operation :: IndexType size, const typename Operation :: RealType* deviceInput1, const typename Operation :: RealType* deviceInput2, @@ -231,7 +231,6 @@ bool reductionOnCudaDevice( const Operation& operation, /**** * Sum */ - extern template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > > ( const tnlParallelReductionSum< char, int >& operation, const typename tnlParallelReductionSum< char, int > :: IndexType size, @@ -311,7 +310,6 @@ extern template bool reductionOnCudaDevice< tnlParallelReductionSum< long double /**** * Min */ - extern template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > > ( const tnlParallelReductionMin< char, int >& operation, const typename tnlParallelReductionMin< char, int > :: IndexType size, diff --git a/src/core/cuda/reduction-operations.h b/src/core/cuda/reduction-operations.h index 8733bcd881..b328d5b3f6 100644 --- a/src/core/cuda/reduction-operations.h +++ b/src/core/cuda/reduction-operations.h @@ -203,17 +203,17 @@ class tnlParallelReductionSum ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + data1[ idx ]; }; - __cuda_callable__ ResultType initialValue() const { return 0; }; + __cuda_callable__ ResultType initialValue() { return 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += data1[ index ]; } @@ -221,13 +221,13 @@ class tnlParallelReductionSum #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, - const ResultType& data ) const + const ResultType& data ) { result += data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, - volatile const ResultType& data ) const + volatile const ResultType& data ) { result += data; }; @@ -248,30 +248,30 @@ class tnlParallelReductionMin ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Min( current, data1[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return tnlMaxValue< ResultType>(); }; + __cuda_callable__ ResultType initialValue() { return tnlMaxValue< ResultType>(); }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMin( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, - const ResultType& data ) const + const ResultType& data ) { result = tnlCudaMin( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, - volatile const ResultType& data ) const + volatile const ResultType& data ) { result = tnlCudaMin( result, data ); }; @@ -293,30 +293,30 @@ class tnlParallelReductionMax ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Max( current, data1[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return tnlMinValue< ResultType>(); }; + __cuda_callable__ ResultType initialValue() { return tnlMinValue< ResultType>(); }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMax( result, data1[ index ] ); } #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, - const ResultType& data ) const + const ResultType& data ) { result = tnlCudaMax( result, data ); }; __device__ void commonReductionOnDevice( volatile ResultType& result, - volatile const ResultType& data ) const + volatile const ResultType& data ) { result = tnlCudaMax( result, data ); }; @@ -336,17 +336,17 @@ class tnlParallelReductionLogicalAnd ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current && data1[ idx ]; }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) true; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) true; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = result && data1[ index ]; } @@ -354,13 +354,13 @@ class tnlParallelReductionLogicalAnd #ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, - const ResultType& data ) const + const ResultType& data ) { result = result && data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, - volatile const ResultType& data ) const + volatile const ResultType& data ) { result = result && data; }; @@ -383,31 +383,31 @@ class tnlParallelReductionLogicalOr ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current || data1[ idx ]; }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) false; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) false; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = result || data1[ index ]; } -#ifdef HAVE_CUDA +#ifdef HAVE_CUDA __device__ void commonReductionOnDevice( ResultType& result, - const ResultType& data ) const + const ResultType& data ) { result = result || data; }; __device__ void commonReductionOnDevice( volatile ResultType& result, - volatile const ResultType& data ) const + volatile const ResultType& data ) { result = result || data; }; @@ -427,17 +427,17 @@ class tnlParallelReductionAbsSum : public tnlParallelReductionSum< Real, Index > ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + tnlAbs( data1[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += tnlCudaAbs( data1[ index ] ); } @@ -456,17 +456,17 @@ class tnlParallelReductionAbsMin : public tnlParallelReductionMin< Real, Index > ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Min( current, tnlAbs( data1[ idx ] ) ); }; - __cuda_callable__ ResultType initialValue() const { return tnlMaxValue< ResultType>(); }; + __cuda_callable__ ResultType initialValue() { return tnlMaxValue< ResultType>(); }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] ) ); } @@ -485,17 +485,17 @@ class tnlParallelReductionAbsMax : public tnlParallelReductionMax< Real, Index > ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Max( current, tnlAbs( data1[ idx ] ) ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] ) ); } @@ -514,19 +514,21 @@ class tnlParallelReductionL2Norm : public tnlParallelReductionSum< Real, Index > ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { - return current + data1[ idx ] * data1[ idx ]; + const RealType& aux = data1[ idx ]; + return current + aux * aux; }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { - result += data1[ index ] * data1[ index ]; + const RealType& aux = data1[ index ]; + result += aux * aux; } }; @@ -549,17 +551,17 @@ class tnlParallelReductionLpNorm : public tnlParallelReductionSum< Real, Index > ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + pow( tnlAbs( data1[ idx ] ), p ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += tnlCudaPow( tnlCudaAbs( data1[ index ] ), p ); } @@ -582,17 +584,17 @@ class tnlParallelReductionEqualities : public tnlParallelReductionLogicalAnd< bo ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current && ( data1[ idx ] == data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) true; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) true; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = result && ( data1[ index ] == data2[ index ] ); } @@ -611,17 +613,17 @@ class tnlParallelReductionInequalities : public tnlParallelReductionLogicalAnd< ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current && ( data1[ idx ] != data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) false; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) false; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = result && ( data1[ index ] != data2[ index ] ); } @@ -640,17 +642,17 @@ class tnlParallelReductionScalarProduct : public tnlParallelReductionSum< Real, ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + ( data1[ idx ] * data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ inline void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += data1[ index ] * data2[ index ]; } @@ -669,17 +671,17 @@ class tnlParallelReductionDiffSum : public tnlParallelReductionSum< Real, Index ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + ( data1[ idx ] - data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += data1[ index ] - data2[ index ]; } @@ -698,17 +700,17 @@ class tnlParallelReductionDiffMin : public tnlParallelReductionMin< Real, Index ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Min( current, data1[ idx ] - data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return tnlMaxValue< ResultType>(); }; + __cuda_callable__ ResultType initialValue() { return tnlMaxValue< ResultType>(); }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMin( result, data1[ index ] - data2[ index ] ); } @@ -727,17 +729,17 @@ class tnlParallelReductionDiffMax : public tnlParallelReductionMax< Real, Index ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Max( current, data1[ idx ] - data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMax( result, data1[ index ] - data2[ index ] ); } @@ -756,17 +758,17 @@ class tnlParallelReductionDiffAbsSum : public tnlParallelReductionMax< Real, Ind ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + tnlAbs( data1[ idx ] - data2[ idx ] ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += tnlCudaAbs( data1[ index ] - data2[ index ] ); } @@ -785,17 +787,17 @@ class tnlParallelReductionDiffAbsMin : public tnlParallelReductionMin< Real, Ind ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Min( current, tnlAbs( data1[ idx ] - data2[ idx ] ) ); }; - __cuda_callable__ ResultType initialValue() const { return tnlMaxValue< ResultType>(); }; + __cuda_callable__ ResultType initialValue() { return tnlMaxValue< ResultType>(); }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMin( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) ); } @@ -814,17 +816,17 @@ class tnlParallelReductionDiffAbsMax : public tnlParallelReductionMax< Real, Ind ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return Max( current, tnlAbs( data1[ idx ] - data2[ idx ] ) ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result = tnlCudaMax( result, tnlCudaAbs( data1[ index ] - data2[ index ] ) ); } @@ -843,22 +845,26 @@ class tnlParallelReductionDiffL2Norm : public tnlParallelReductionSum< Real, Ind ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { - const RealType aux( data2[ idx ] - data1[ idx ] ); + this->aux = data2[ idx ] - data1[ idx ]; return current + aux * aux; }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { - const RealType aux( data2[ index ] - data1[ index ] ); + this->aux = data2[ index ] - data1[ index ]; result += aux * aux; } + + protected: + + RealType aux; }; template< typename Real, typename Index > @@ -879,17 +885,17 @@ class tnlParallelReductionDiffLpNorm : public tnlParallelReductionSum< Real, Ind ResultType reduceOnHost( const IndexType idx, const ResultType& current, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { return current + pow( tnlAbs( data1[ idx ] - data2[ idx ] ), p ); }; - __cuda_callable__ ResultType initialValue() const { return ( ResultType ) 0; }; + __cuda_callable__ ResultType initialValue() { return ( ResultType ) 0; }; __cuda_callable__ void cudaFirstReduction( ResultType& result, const IndexType index, const RealType* data1, - const RealType* data2 ) const + const RealType* data2 ) { result += tnlCudaPow( tnlCudaAbs( data1[ index ] - data2[ index ] ), p ); } diff --git a/src/core/cuda/tnlCudaReduction.h b/src/core/cuda/tnlCudaReduction.h index 9b7bf7ab2c..d5084cff8b 100644 --- a/src/core/cuda/tnlCudaReduction.h +++ b/src/core/cuda/tnlCudaReduction.h @@ -30,7 +30,7 @@ class tnlCUDAReduction typedef typename Operation::ResultType ResultType; - __device__ static void reduce( const Operation operation, + __device__ static void reduce( Operation& operation, const IndexType size, const RealType* input1, const RealType* input2, @@ -47,7 +47,7 @@ class tnlCUDAReduction< tnlParallelReductionScalarProduct< Real, Index >, blockS typedef typename Operation::RealType RealType; typedef typename Operation::ResultType ResultType; - __device__ static void reduce( const Operation operation, + __device__ static void reduce( Operation operation, const IndexType size, const RealType* input1, const RealType* input2, diff --git a/src/core/cuda/tnlCudaReduction_impl.h b/src/core/cuda/tnlCudaReduction_impl.h index 5187141a1a..5fa295f65e 100644 --- a/src/core/cuda/tnlCudaReduction_impl.h +++ b/src/core/cuda/tnlCudaReduction_impl.h @@ -22,7 +22,7 @@ template< typename Operation, int blockSize > __device__ void tnlCUDAReduction< Operation, blockSize >:: -reduce( const Operation operation, +reduce( Operation& operation, const IndexType size, const RealType* input1, const RealType* input2, @@ -158,7 +158,7 @@ template< typename Real, typename Index, int blockSize > __device__ void tnlCUDAReduction< tnlParallelReductionScalarProduct< Real, Index >, blockSize >:: -reduce( const Operation operation, +reduce( Operation& operation, const IndexType size, const RealType* input1, const RealType* input2, @@ -182,7 +182,7 @@ reduce( const Operation operation, * sequential reduction. */ sdata[ tid ] = ( RealType ) 0; - while( gid + 4 * gridSize < size ) + /*while( gid + 4 * gridSize < size ) { sdata[ tid ] += input1[ gid ] * input2[ gid ]; sdata[ tid ] += input1[ gid + gridSize ] * input2[ gid + gridSize ]; @@ -195,7 +195,7 @@ reduce( const Operation operation, sdata[ tid ] += input1[ gid ] * input2[ gid ]; sdata[ tid ] += input1[ gid + gridSize ] * input2[ gid + gridSize ]; gid += 2*gridSize; - } + }*/ while( gid < size ) { sdata[ tid ] += input1[ gid ] * input2[ gid ]; diff --git a/src/core/vectors/CMakeLists.txt b/src/core/vectors/CMakeLists.txt index 63b3ef39b8..72b06ef4ab 100755 --- a/src/core/vectors/CMakeLists.txt +++ b/src/core/vectors/CMakeLists.txt @@ -18,7 +18,7 @@ set( headers tnlVector.h SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/core/vectors ) set( common_SOURCES - ${CURRENT_DIR}/tnlVectorOperations_impl.cpp + ${CURRENT_DIR}/tnlVectorOperationsHost_impl.cpp ${CURRENT_DIR}/tnlMultiVector_impl.cpp ${CURRENT_DIR}/tnlSharedVector_impl.cpp ${CURRENT_DIR}/tnlVector_impl.cpp @@ -27,14 +27,19 @@ set( common_SOURCES IF( BUILD_CUDA ) set( tnl_core_vectors_CUDA__SOURCES ${common_SOURCES} - ${CURRENT_DIR}/tnlVectorOperations_impl.cu + ${CURRENT_DIR}/tnlVectorOperationsCuda_impl.cu ${CURRENT_DIR}/tnlVector_impl.cu ${CURRENT_DIR}/tnlStaticVector_impl.cu PARENT_SCOPE ) + set( tnl_core_vectors_SOURCES + ${common_SOURCES} + PARENT_SCOPE ) +else() + set( tnl_core_vectors_SOURCES + ${common_SOURCES} + ${CURRENT_DIR}/tnlVectorOperationsCuda_impl.cpp + PARENT_SCOPE ) ENDIF() -set( tnl_core_vectors_SOURCES - ${common_SOURCES} - PARENT_SCOPE ) INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/core/vectors ) \ No newline at end of file diff --git a/src/core/vectors/tnlVectorOperationsCuda_impl.cpp b/src/core/vectors/tnlVectorOperationsCuda_impl.cpp new file mode 100644 index 0000000000..c9b21196d2 --- /dev/null +++ b/src/core/vectors/tnlVectorOperationsCuda_impl.cpp @@ -0,0 +1,328 @@ +/*************************************************************************** + tnlVectorOperationsCuda_impl.cpp - description + ------------------- + begin : Dec 10, 2015 + copyright : (C) 2015 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <core/vectors/tnlVectorOperations.h> + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +/**** + * Max + */ +template int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * Min + */ +template int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * Abs max + */ +template int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + + +/**** + * Abs min + */ +template int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * L2 norm + */ +template int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * L1 norm + */ +template int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * Lp norm + */ +template int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< int, tnlCuda, int >& v, const int& p ); +template long int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long int, tnlCuda, int >& v, const long int& p ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< float, tnlCuda, int >& v, const float& p ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< double, tnlCuda, int >& v, const double& p ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long double, tnlCuda, int >& v, const long double& p ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< int, tnlCuda, long int >& v, const int& p ); +template long int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long int, tnlCuda, long int >& v, const long int& p ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< float, tnlCuda, long int >& v, const float& p ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< double, tnlCuda, long int >& v, const double& p ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long double, tnlCuda, long int >& v, const long double& p ); +#endif +#endif + + + +/**** + * Sum + */ +template int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * Difference max + */ +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); +#endif +#endif + +/**** + * Difference min + */ +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); +#endif +#endif + +/**** + * Difference abs max + */ +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); +#endif +#endif + + +/**** + * Difference abs min + */ +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); +template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); +#endif +#endif + +#endif + + diff --git a/src/core/vectors/tnlVectorOperations_impl.cu b/src/core/vectors/tnlVectorOperationsCuda_impl.cu similarity index 85% rename from src/core/vectors/tnlVectorOperations_impl.cu rename to src/core/vectors/tnlVectorOperationsCuda_impl.cu index b8af43c24d..6190d289a3 100644 --- a/src/core/vectors/tnlVectorOperations_impl.cu +++ b/src/core/vectors/tnlVectorOperationsCuda_impl.cu @@ -1,5 +1,5 @@ /*************************************************************************** - tnlVectorOperations_impl.cu - description + tnlVectorOperationsCuda_impl.cu - description ------------------- begin : Jul 20, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -120,6 +120,55 @@ template long double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlV #endif #endif +/**** + * L2 norm + */ +template int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL2Norm( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif + +/**** + * L1 norm + */ +template int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< int, tnlCuda, int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long int, tnlCuda, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< float, tnlCuda, int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< double, tnlCuda, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long double, tnlCuda, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< int, tnlCuda, long int >& v ); +template long int tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long int, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< float, tnlCuda, long int >& v ); +#endif +template double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< double, tnlCuda, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlCuda >::getVectorL1Norm( const tnlVector< long double, tnlCuda, long int >& v ); +#endif +#endif /**** * Lp norm @@ -146,6 +195,8 @@ template long double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlV #endif #endif + + /**** * Sum */ diff --git a/src/core/vectors/tnlVectorOperations_impl.cpp b/src/core/vectors/tnlVectorOperationsHost_impl.cpp similarity index 52% rename from src/core/vectors/tnlVectorOperations_impl.cpp rename to src/core/vectors/tnlVectorOperationsHost_impl.cpp index dbd2275c17..6b95318297 100644 --- a/src/core/vectors/tnlVectorOperations_impl.cpp +++ b/src/core/vectors/tnlVectorOperationsHost_impl.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - tnlVectorOperations_impl.cpp - description + tnlVectorOperationsHost_impl.cpp - description ------------------- begin : Jul 20, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -119,6 +119,57 @@ template long double tnlVectorOperations< tnlHost >::getVectorAbsMin( const tnlV #endif #endif +/**** + * L1 norm + */ +template int tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< int, tnlHost, int >& v ); +template long int tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< long int, tnlHost, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< float, tnlHost, int >& v ); +#endif +template double tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< double, tnlHost, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< long double, tnlHost, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< int, tnlHost, long int >& v ); +template long int tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< long int, tnlHost, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< float, tnlHost, long int >& v ); +#endif +template double tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< double, tnlHost, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlHost >::getVectorL1Norm( const tnlVector< long double, tnlHost, long int >& v ); +#endif +#endif + +/**** + * L2 norm + */ +template int tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< int, tnlHost, int >& v ); +template long int tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< long int, tnlHost, int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< float, tnlHost, int >& v ); +#endif +template double tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< double, tnlHost, int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< long double, tnlHost, int >& v ); +#endif + +#ifdef INSTANTIATE_LONG_INT +template int tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< int, tnlHost, long int >& v ); +template long int tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< long int, tnlHost, long int >& v ); +#ifdef INSTANTIATE_FLOAT +template float tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< float, tnlHost, long int >& v ); +#endif +template double tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< double, tnlHost, long int >& v ); +#ifdef INSTANTIATE_LONG_DOUBLE +template long double tnlVectorOperations< tnlHost >::getVectorL2Norm( const tnlVector< long double, tnlHost, long int >& v ); +#endif +#endif + + /**** * Lp norm */ @@ -144,6 +195,8 @@ template long double tnlVectorOperations< tnlHost >::getVectorLpNorm( const tnlV #endif #endif + + /**** * Sum */ @@ -269,258 +322,6 @@ template long double tnlVectorOperations< tnlHost >::getVectorDifferenceAbsMin( #endif #endif -/**** - * Max - */ -template int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< int, tnlCuda, int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long int, tnlCuda, int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< float, tnlCuda, int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< double, tnlCuda, int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long double, tnlCuda, int >& v ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< int, tnlCuda, long int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long int, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< float, tnlCuda, long int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< double, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorMax( const tnlVector< long double, tnlCuda, long int >& v ); -#endif -#endif - - -/**** - * Min - */ -template int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< int, tnlCuda, int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long int, tnlCuda, int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< float, tnlCuda, int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< double, tnlCuda, int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long double, tnlCuda, int >& v ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< int, tnlCuda, long int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long int, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< float, tnlCuda, long int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< double, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorMin( const tnlVector< long double, tnlCuda, long int >& v ); -#endif -#endif - -/**** - * Abs max - */ -template int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< int, tnlCuda, int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long int, tnlCuda, int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< float, tnlCuda, int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< double, tnlCuda, int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long double, tnlCuda, int >& v ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< int, tnlCuda, long int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long int, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< float, tnlCuda, long int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< double, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorAbsMax( const tnlVector< long double, tnlCuda, long int >& v ); -#endif -#endif - -/**** - * Abs min - */ -template int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< int, tnlCuda, int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long int, tnlCuda, int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< float, tnlCuda, int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< double, tnlCuda, int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long double, tnlCuda, int >& v ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< int, tnlCuda, long int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long int, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< float, tnlCuda, long int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< double, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorAbsMin( const tnlVector< long double, tnlCuda, long int >& v ); -#endif -#endif - -/**** - * Lp norm - */ -template int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< int, tnlCuda, int >& v, const int& p ); -template long int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long int, tnlCuda, int >& v, const long int& p ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< float, tnlCuda, int >& v, const float& p ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< double, tnlCuda, int >& v, const double& p ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long double, tnlCuda, int >& v, const long double& p ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< int, tnlCuda, long int >& v, const int& p ); -template long int tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long int, tnlCuda, long int >& v, const long int& p ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< float, tnlCuda, long int >& v, const float& p ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< double, tnlCuda, long int >& v, const double& p ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorLpNorm( const tnlVector< long double, tnlCuda, long int >& v, const long double& p ); -#endif -#endif - -/**** - * Sum - */ -template int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< int, tnlCuda, int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long int, tnlCuda, int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< float, tnlCuda, int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< double, tnlCuda, int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long double, tnlCuda, int >& v ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< int, tnlCuda, long int >& v ); -template long int tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long int, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< float, tnlCuda, long int >& v ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< double, tnlCuda, long int >& v ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorSum( const tnlVector< long double, tnlCuda, long int >& v ); -#endif -#endif - - -/**** - * Difference max - */ -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMax( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); -#endif -#endif - - -/**** - * Difference min - */ -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceMin( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); -#endif -#endif - -/**** - * Difference abs max - */ -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMax( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); -#endif -#endif - -/**** - * Difference abs min - */ -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< int, tnlCuda, int >& v1, const tnlVector< int, tnlCuda, int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long int, tnlCuda, int >& v1, const tnlVector< long int, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< float, tnlCuda, int >& v1, const tnlVector< float, tnlCuda, int >& v2); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< double, tnlCuda, int >& v1, const tnlVector< double, tnlCuda, int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long double, tnlCuda, int >& v1, const tnlVector< long double, tnlCuda, int >& v2 ); -#endif - -#ifdef INSTANTIATE_LONG_INT -template int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< int, tnlCuda, long int >& v1, const tnlVector< int, tnlCuda, long int >& v2 ); -template long int tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long int, tnlCuda, long int >& v1, const tnlVector< long int, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_FLOAT -template float tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< float, tnlCuda, long int >& v1, const tnlVector< float, tnlCuda, long int >& v2 ); -#endif -template double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< double, tnlCuda, long int >& v1, const tnlVector< double, tnlCuda, long int >& v2 ); -#ifdef INSTANTIATE_LONG_DOUBLE -template long double tnlVectorOperations< tnlCuda >::getVectorDifferenceAbsMin( const tnlVector< long double, tnlCuda, long int >& v1, const tnlVector< long double, tnlCuda, long int >& v2 ); -#endif -#endif #endif diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h index d043077408..6da40ffd0c 100644 --- a/tests/benchmarks/tnl-cuda-benchmarks.h +++ b/tests/benchmarks/tnl-cuda-benchmarks.h @@ -21,6 +21,7 @@ #include <tnlConfig.h> #include <core/vectors/tnlVector.h> #include <core/tnlTimerRT.h> +#include <matrices/tnlSlicedEllpackMatrix.h> #ifdef HAVE_CUBLAS //#include <cublas.h> @@ -115,7 +116,6 @@ int main( int argc, char* argv[] ) timer.start(); for( int i = 0; i < loops; i++ ) resultDevice = deviceVector.scalarProduct( deviceVector ); - cout << "Time: " << timer.getTime() << endl; timer.stop(); bandwidth = 2 * datasetSize / timer.getTime(); cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; @@ -142,7 +142,6 @@ int main( int argc, char* argv[] ) bandwidth = 2 * datasetSize / timer.getTime(); cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; #endif -#endif cout << "Benchmarking L2 norm on CPU: "; timer.reset(); @@ -150,7 +149,7 @@ int main( int argc, char* argv[] ) for( int i = 0; i < loops; i++ ) resultHost = hostVector.lpNorm( 2.0 ); timer.stop(); - bandwidth = 2 * datasetSize / timer.getTime(); + bandwidth = datasetSize / timer.getTime(); cout << bandwidth << " GB/sec." << endl; cout << "Benchmarking L2 norm on GPU: " << endl; @@ -158,9 +157,8 @@ int main( int argc, char* argv[] ) timer.start(); for( int i = 0; i < loops; i++ ) resultDevice = deviceVector.lpNorm( 2.0 ); - cout << "Time: " << timer.getTime() << endl; timer.stop(); - bandwidth = 2 * datasetSize / timer.getTime(); + bandwidth = datasetSize / timer.getTime(); cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl; if( resultHost != resultDevice ) { @@ -168,7 +166,7 @@ int main( int argc, char* argv[] ) //return EXIT_FAILURE; } - + /* cout << "Benchmarking prefix-sum on CPU ..." << endl; timer.reset(); timer.start(); @@ -192,8 +190,57 @@ int main( int argc, char* argv[] ) { cerr << "Error in prefix sum at position " << i << ": " << hostVector.getElement( i ) << " != " << deviceVector.getElement( i ) << endl; } - +*/ + /**** + * Sliced Ellpack test + */ + const int elementsPerRow( 5 ); + tnlSlicedEllpackMatrix< double, tnlHost > hostMatrix; + tnlSlicedEllpackMatrix< double, tnlCuda > deviceMatrix; + tnlVector< int, tnlHost, int > hostRowLengths; + tnlVector< int, tnlCuda, int > deviceRowLengths; + hostRowLengths.setSize( size ); + deviceRowLengths.setSize( size ); + hostMatrix.setDimensions( size, size ); + deviceMatrix.setDimensions( size, size ); + hostRowLengths.setValue( elementsPerRow ); + deviceRowLengths.setValue( elementsPerRow ); + hostMatrix.setCompressedRowsLengths( hostRowLengths ); + deviceMatrix.setCompressedRowsLengths( deviceRowLengths ); + int elements( 0 ); + for( int row = 0; row < size; row++ ) + { + int col = Max( 0, row - elementsPerRow / 2 ); + for( int element = 0; element < elementsPerRow; element++ ) + { + if( col + element < size ) + { + hostMatrix.setElement( row, col + element, 1.0 ); + deviceMatrix.setElement( row, col + element, 1.0 ); + elements++; + } + } + } + datasetSize = loops * elements * sizeof( double ) / oneGB; + cout << "Benchmarking SpMV on CPU: "; + timer.reset(); + for( int i = 0; i < loops; i++ ) + hostMatrix.vectorProduct( hostVector, hostVector2 ); + timer.stop(); + double hostTime = timer.getTime(); + bandwidth = 2 * datasetSize / loops / timer.getTime(); + cout << timer.getTime() << " => " << bandwidth << " GB/s" << endl; + + cout << "Benchmarking SpMV on GPU: "; + timer.reset(); + for( int i = 0; i < loops; i++ ) + deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); + timer.stop(); + bandwidth = 2 * datasetSize / loops / timer.getTime(); + cout << timer.getTime() << " => " << bandwidth << " GB/s" << " speedup " << hostTime / timer.getTime() << endl; + return EXIT_SUCCESS; +#endif } #endif /* TNLCUDABENCHMARKS_H_ */ -- GitLab