Loading src/core/cuda/cuda-prefix-sum_impl.cu +8 −8 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, int >& operation, tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -32,7 +32,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, int >& operation, tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -40,7 +40,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, int >& operation, tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -48,7 +48,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, int >& operation, tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -57,7 +57,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, long int >& operation, tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -66,7 +66,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, long int >& operation, tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -74,7 +74,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, long int >& operation, tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -82,7 +82,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, long int >& operation, tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif Loading src/core/cuda/cuda-prefix-sum_impl.h +13 −13 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ template< typename DataType, typename Operation, typename Index > __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumType, const Operation operation, Operation operation, const Index size, const Index elementsInBlock, const DataType* input, Loading Loading @@ -159,7 +159,7 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT template< typename DataType, typename Operation, typename Index > __global__ void cudaSecondPhaseBlockPrefixSum( const Operation operation, __global__ void cudaSecondPhaseBlockPrefixSum( Operation operation, const Index size, const Index elementsInBlock, const Index gridShift, Loading @@ -186,7 +186,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, const Operation& operation, Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, Loading Loading @@ -276,7 +276,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaGridPrefixSum( enumPrefixSumType prefixSumType, const Operation& operation, Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, Loading Loading @@ -313,7 +313,7 @@ bool cudaPrefixSum( const Index size, const Index blockSize, const DataType *deviceInput, DataType* deviceOutput, const Operation& operation, Operation& operation, const enumPrefixSumType prefixSumType ) { /**** Loading Loading @@ -361,7 +361,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, int >& operation, tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -369,14 +369,14 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, int >& operation, tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, int >& operation, tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -384,7 +384,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, int >& operation, tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -393,7 +393,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, long int >& operation, tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -401,14 +401,14 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, long int >& operation, tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, long int >& operation, tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -416,7 +416,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, long int >& operation, tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif Loading src/core/cuda/cuda-reduction-abs-max_impl.cu +10 −10 Original line number Diff line number Diff line Loading @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > > ( const tnlParallelReductionAbsMax< char, int >& operation, ( tnlParallelReductionAbsMax< char, int >& operation, const typename tnlParallelReductionAbsMax< char, int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > > ( const tnlParallelReductionAbsMax< int, int >& operation, ( tnlParallelReductionAbsMax< int, int >& operation, const typename tnlParallelReductionAbsMax< int, int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > > ( const tnlParallelReductionAbsMax< float, int >& operation, ( tnlParallelReductionAbsMax< float, int >& operation, const typename tnlParallelReductionAbsMax< float, int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > ( const tnlParallelReductionAbsMax< double, int>& operation, ( tnlParallelReductionAbsMax< double, int>& operation, const typename tnlParallelReductionAbsMax< double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2, Loading @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > > ( const tnlParallelReductionAbsMax< long double, int>& operation, ( tnlParallelReductionAbsMax< long double, int>& operation, const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2, Loading @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > ( const tnlParallelReductionAbsMax< char, long int >& operation, ( tnlParallelReductionAbsMax< char, long int >& operation, const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2, Loading @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > > ( const tnlParallelReductionAbsMax< int, long int >& operation, ( tnlParallelReductionAbsMax< int, long int >& operation, const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > > ( const tnlParallelReductionAbsMax< float, long int >& operation, ( tnlParallelReductionAbsMax< float, long int >& operation, const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > > ( const tnlParallelReductionAbsMax< double, long int>& operation, ( tnlParallelReductionAbsMax< double, long int>& operation, const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2, Loading @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > > ( const tnlParallelReductionAbsMax< long double, long int>& operation, ( tnlParallelReductionAbsMax< long double, long int>& operation, const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2, Loading src/core/cuda/cuda-reduction-abs-min_impl.cu +10 −10 Original line number Diff line number Diff line Loading @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > > ( const tnlParallelReductionAbsMin< char, int >& operation, ( tnlParallelReductionAbsMin< char, int >& operation, const typename tnlParallelReductionAbsMin< char, int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > > ( const tnlParallelReductionAbsMin< int, int >& operation, ( tnlParallelReductionAbsMin< int, int >& operation, const typename tnlParallelReductionAbsMin< int, int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > > ( const tnlParallelReductionAbsMin< float, int >& operation, ( tnlParallelReductionAbsMin< float, int >& operation, const typename tnlParallelReductionAbsMin< float, int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > ( const tnlParallelReductionAbsMin< double, int>& operation, ( tnlParallelReductionAbsMin< double, int>& operation, const typename tnlParallelReductionAbsMin< double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2, Loading @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > > ( const tnlParallelReductionAbsMin< long double, int>& operation, ( tnlParallelReductionAbsMin< long double, int>& operation, const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2, Loading @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > > ( const tnlParallelReductionAbsMin< char, long int >& operation, ( tnlParallelReductionAbsMin< char, long int >& operation, const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2, Loading @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > > ( const tnlParallelReductionAbsMin< int, long int >& operation, ( tnlParallelReductionAbsMin< int, long int >& operation, const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > > ( const tnlParallelReductionAbsMin< float, long int >& operation, ( tnlParallelReductionAbsMin< float, long int >& operation, const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > > ( const tnlParallelReductionAbsMin< double, long int>& operation, ( tnlParallelReductionAbsMin< double, long int>& operation, const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2, Loading @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > > ( const tnlParallelReductionAbsMin< long double, long int>& operation, ( tnlParallelReductionAbsMin< long double, long int>& operation, const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2, Loading src/core/cuda/cuda-reduction-abs-sum_impl.cu +10 −10 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/core/cuda/cuda-prefix-sum_impl.cu +8 −8 Original line number Diff line number Diff line Loading @@ -23,7 +23,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, int >& operation, tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -32,7 +32,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, int >& operation, tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -40,7 +40,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, int >& operation, tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -48,7 +48,7 @@ template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, int >& operation, tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -57,7 +57,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, long int >& operation, tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -66,7 +66,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, long int >& operation, tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -74,7 +74,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, long int >& operation, tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -82,7 +82,7 @@ template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, long int >& operation, tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif Loading
src/core/cuda/cuda-prefix-sum_impl.h +13 −13 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ template< typename DataType, typename Operation, typename Index > __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumType, const Operation operation, Operation operation, const Index size, const Index elementsInBlock, const DataType* input, Loading Loading @@ -159,7 +159,7 @@ __global__ void cudaFirstPhaseBlockPrefixSum( const enumPrefixSumType prefixSumT template< typename DataType, typename Operation, typename Index > __global__ void cudaSecondPhaseBlockPrefixSum( const Operation operation, __global__ void cudaSecondPhaseBlockPrefixSum( Operation operation, const Index size, const Index elementsInBlock, const Index gridShift, Loading @@ -186,7 +186,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, const Operation& operation, Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, Loading Loading @@ -276,7 +276,7 @@ template< typename DataType, typename Operation, typename Index > bool cudaGridPrefixSum( enumPrefixSumType prefixSumType, const Operation& operation, Operation& operation, const Index size, const Index blockSize, const Index elementsInBlock, Loading Loading @@ -313,7 +313,7 @@ bool cudaPrefixSum( const Index size, const Index blockSize, const DataType *deviceInput, DataType* deviceOutput, const Operation& operation, Operation& operation, const enumPrefixSumType prefixSumType ) { /**** Loading Loading @@ -361,7 +361,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, int >& operation, tnlParallelReductionSum< int, int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -369,14 +369,14 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, int >& operation, tnlParallelReductionSum< float, int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const int size, const int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, int >& operation, tnlParallelReductionSum< double, int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -384,7 +384,7 @@ extern template bool cudaPrefixSum( const int size, const int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, int >& operation, tnlParallelReductionSum< long double, int >& operation, const enumPrefixSumType prefixSumType ); #endif Loading @@ -393,7 +393,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const int *deviceInput, int* deviceOutput, const tnlParallelReductionSum< int, long int >& operation, tnlParallelReductionSum< int, long int >& operation, const enumPrefixSumType prefixSumType ); Loading @@ -401,14 +401,14 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const float *deviceInput, float* deviceOutput, const tnlParallelReductionSum< float, long int >& operation, tnlParallelReductionSum< float, long int >& operation, const enumPrefixSumType prefixSumType ); extern template bool cudaPrefixSum( const long int size, const long int blockSize, const double *deviceInput, double* deviceOutput, const tnlParallelReductionSum< double, long int >& operation, tnlParallelReductionSum< double, long int >& operation, const enumPrefixSumType prefixSumType ); #ifdef INSTANTIATE_LONG_DOUBLE Loading @@ -416,7 +416,7 @@ extern template bool cudaPrefixSum( const long int size, const long int blockSize, const long double *deviceInput, long double* deviceOutput, const tnlParallelReductionSum< long double, long int >& operation, tnlParallelReductionSum< long double, long int >& operation, const enumPrefixSumType prefixSumType ); #endif #endif Loading
src/core/cuda/cuda-reduction-abs-max_impl.cu +10 −10 Original line number Diff line number Diff line Loading @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > > ( const tnlParallelReductionAbsMax< char, int >& operation, ( tnlParallelReductionAbsMax< char, int >& operation, const typename tnlParallelReductionAbsMax< char, int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > > ( const tnlParallelReductionAbsMax< int, int >& operation, ( tnlParallelReductionAbsMax< int, int >& operation, const typename tnlParallelReductionAbsMax< int, int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > > ( const tnlParallelReductionAbsMax< float, int >& operation, ( tnlParallelReductionAbsMax< float, int >& operation, const typename tnlParallelReductionAbsMax< float, int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > ( const tnlParallelReductionAbsMax< double, int>& operation, ( tnlParallelReductionAbsMax< double, int>& operation, const typename tnlParallelReductionAbsMax< double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2, Loading @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > > ( const tnlParallelReductionAbsMax< long double, int>& operation, ( tnlParallelReductionAbsMax< long double, int>& operation, const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2, Loading @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > ( const tnlParallelReductionAbsMax< char, long int >& operation, ( tnlParallelReductionAbsMax< char, long int >& operation, const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2, Loading @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > > ( const tnlParallelReductionAbsMax< int, long int >& operation, ( tnlParallelReductionAbsMax< int, long int >& operation, const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > > ( const tnlParallelReductionAbsMax< float, long int >& operation, ( tnlParallelReductionAbsMax< float, long int >& operation, const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > > ( const tnlParallelReductionAbsMax< double, long int>& operation, ( tnlParallelReductionAbsMax< double, long int>& operation, const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2, Loading @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > > ( const tnlParallelReductionAbsMax< long double, long int>& operation, ( tnlParallelReductionAbsMax< long double, long int>& operation, const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2, Loading
src/core/cuda/cuda-reduction-abs-min_impl.cu +10 −10 Original line number Diff line number Diff line Loading @@ -25,28 +25,28 @@ */ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > > ( const tnlParallelReductionAbsMin< char, int >& operation, ( tnlParallelReductionAbsMin< char, int >& operation, const typename tnlParallelReductionAbsMin< char, int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< char, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > > ( const tnlParallelReductionAbsMin< int, int >& operation, ( tnlParallelReductionAbsMin< int, int >& operation, const typename tnlParallelReductionAbsMin< int, int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > > ( const tnlParallelReductionAbsMin< float, int >& operation, ( tnlParallelReductionAbsMin< float, int >& operation, const typename tnlParallelReductionAbsMin< float, int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > ( const tnlParallelReductionAbsMin< double, int>& operation, ( tnlParallelReductionAbsMin< double, int>& operation, const typename tnlParallelReductionAbsMin< double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2, Loading @@ -54,7 +54,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > > ( const tnlParallelReductionAbsMin< long double, int>& operation, ( tnlParallelReductionAbsMin< long double, int>& operation, const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2, Loading @@ -62,7 +62,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, in #endif template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > > ( const tnlParallelReductionAbsMin< char, long int >& operation, ( tnlParallelReductionAbsMin< char, long int >& operation, const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2, Loading @@ -70,21 +70,21 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int #ifdef INSTANTIATE_LONG_INT template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > > ( const tnlParallelReductionAbsMin< int, long int >& operation, ( tnlParallelReductionAbsMin< int, long int >& operation, const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > > ( const tnlParallelReductionAbsMin< float, long int >& operation, ( tnlParallelReductionAbsMin< float, long int >& operation, const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > > ( const tnlParallelReductionAbsMin< double, long int>& operation, ( tnlParallelReductionAbsMin< double, long int>& operation, const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2, Loading @@ -92,7 +92,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long in #ifdef INSTANTIATE_LONG_DOUBLE template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > > ( const tnlParallelReductionAbsMin< long double, long int>& operation, ( tnlParallelReductionAbsMin< long double, long int>& operation, const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1, const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2, Loading
src/core/cuda/cuda-reduction-abs-sum_impl.cu +10 −10 File changed.Preview size limit exceeded, changes collapsed. Show changes