Loading src/TNL/Containers/Algorithms/CudaMultireductionKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ static constexpr int Multireduction_maxThreadsPerBlock = 256; // must be a powe template< typename Operation, int blockSizeX > __global__ void __launch_bounds__( Multireduction_maxThreadsPerBlock, Multireduction_minBlocksPerMultiprocessor ) CudaMultireductionKernel( Operation& operation, CudaMultireductionKernel( Operation operation, const typename Operation::IndexType n, const typename Operation::IndexType size, const typename Operation::RealType* input1, Loading src/TNL/Containers/Algorithms/CudaReductionKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ static constexpr int Reduction_maxThreadsPerBlock = 256; // must be a power of template< typename Operation, int blockSize > __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( Operation& operation, CudaReductionKernel( Operation operation, const typename Operation::IndexType size, const typename Operation::RealType* input1, const typename Operation::RealType* input2, Loading src/TNL/Containers/Algorithms/Reduction_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ reductionOnCudaDevice( Operation& operation, /*** * Transfer the reduced data from device to host. */ ResultType resultArray[ minGPUReductionDataSize ]; ResultType resultArray[ reducedSize ]; if( ! Containers::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ResultType, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) ) return false; Loading Loading
src/TNL/Containers/Algorithms/CudaMultireductionKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ static constexpr int Multireduction_maxThreadsPerBlock = 256; // must be a powe template< typename Operation, int blockSizeX > __global__ void __launch_bounds__( Multireduction_maxThreadsPerBlock, Multireduction_minBlocksPerMultiprocessor ) CudaMultireductionKernel( Operation& operation, CudaMultireductionKernel( Operation operation, const typename Operation::IndexType n, const typename Operation::IndexType size, const typename Operation::RealType* input1, Loading
src/TNL/Containers/Algorithms/CudaReductionKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -39,7 +39,7 @@ static constexpr int Reduction_maxThreadsPerBlock = 256; // must be a power of template< typename Operation, int blockSize > __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( Operation& operation, CudaReductionKernel( Operation operation, const typename Operation::IndexType size, const typename Operation::RealType* input1, const typename Operation::RealType* input2, Loading
src/TNL/Containers/Algorithms/Reduction_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -98,7 +98,7 @@ reductionOnCudaDevice( Operation& operation, /*** * Transfer the reduced data from device to host. */ ResultType resultArray[ minGPUReductionDataSize ]; ResultType resultArray[ reducedSize ]; if( ! Containers::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ResultType, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) ) return false; Loading