diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index fcee84e3971c4b4d28c690f7c01573be0bfaf2eb..82b030e1a9198eebee91609db3c384d69e237079 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -140,7 +140,7 @@ template< int blockSize, __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionWithArgumentKernel( const Result zero, - const DataFetcher dataFetcher, + DataFetcher dataFetcher, const Reduction reduction, const Index size, Result* output, @@ -294,7 +294,7 @@ struct CudaReductionKernelLauncher template< typename DataFetcher, typename Reduction > int start( const Reduction& reduction, - const DataFetcher& dataFetcher, + DataFetcher& dataFetcher, const Result& zero, Result*& output ) { @@ -311,7 +311,7 @@ struct CudaReductionKernelLauncher template< typename DataFetcher, typename Reduction > int startWithArgument( const Reduction& reduction, - const DataFetcher& dataFetcher, + DataFetcher& dataFetcher, const Result& zero, Result*& output, Index*& idxOutput ) @@ -395,7 +395,7 @@ struct CudaReductionKernelLauncher typename Reduction > int launch( const Index size, const Reduction& reduction, - const DataFetcher& dataFetcher, + DataFetcher& dataFetcher, const Result& zero, Result* output ) { @@ -498,7 +498,7 @@ struct CudaReductionKernelLauncher typename Reduction > int launchWithArgument( const Index size, const Reduction& reduction, - const DataFetcher& dataFetcher, + DataFetcher& dataFetcher, const Result& zero, Result* output, Index* idxOutput, diff --git a/src/TNL/Containers/Algorithms/CudaScanKernel.h b/src/TNL/Containers/Algorithms/CudaScanKernel.h index 34bfb7b2a36ea62817745a252d3145edec43d963..a8c3548757668df966b094a9da19e37b88ab7ed8 100644 --- a/src/TNL/Containers/Algorithms/CudaScanKernel.h +++ b/src/TNL/Containers/Algorithms/CudaScanKernel.h @@ -160,13 +160,13 @@ template< typename Real, typename Index > __global__ void cudaSecondPhaseBlockScan( Reduction reduction, - const Index size, - const int elementsInBlock, - const Index gridIdx, - const Index maxGridSize, - const Real* auxArray, - Real* data, - Real shift ) + const Index size, + const int elementsInBlock, + const Index gridIdx, + const Index maxGridSize, + const Real* auxArray, + Real* data, + Real shift ) { if( gridIdx > 0 || blockIdx.x > 0 ) shift = reduction( shift, auxArray[ gridIdx * maxGridSize + blockIdx.x - 1 ] ); diff --git a/src/TNL/Containers/Algorithms/DistributedScan.h b/src/TNL/Containers/Algorithms/DistributedScan.h index 6466b6bd5f6f6d38d58f6766ac7e8fc3ba040697..44fd425b92efcd5ab047111271e9f5b8a319e080 100644 --- a/src/TNL/Containers/Algorithms/DistributedScan.h +++ b/src/TNL/Containers/Algorithms/DistributedScan.h @@ -1,5 +1,5 @@ /*************************************************************************** - Scan.h - description + DistributedScan.h - description ------------------- begin : Aug 16, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. diff --git a/src/TNL/Containers/Algorithms/Scan.h b/src/TNL/Containers/Algorithms/Scan.h index ee313caf73d1d091dd0d3c460cc91d33912dfc97..5587c627307da329db40eef54b41ba3c2d4e994a 100644 --- a/src/TNL/Containers/Algorithms/Scan.h +++ b/src/TNL/Containers/Algorithms/Scan.h @@ -50,7 +50,7 @@ enum class ScanType { * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >. */ template< typename Device, - ScanType Type = ScanType::Inclusive > + ScanType Type = ScanType::Inclusive > struct Scan; /** @@ -93,7 +93,7 @@ struct Scan; * **Note: Segmented scan is not implemented for CUDA yet.** */ template< typename Device, - ScanType Type = ScanType::Inclusive > + ScanType Type = ScanType::Inclusive > struct SegmentedScan;