Fixed ScanType variable. (0a736f95) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/CudaScanKernel.h

+13 −13

Original line number	Diff line number	Diff line
		@@ -27,7 +27,7 @@ template< typename Real,
		typename Reduction,
		typename Index >
		__global__ void
		cudaFirstPhaseBlockScan( const ScanType ScanType,
		cudaFirstPhaseBlockScan( const ScanType scanType,
		Reduction reduction,
		const Real zero,
		const Index size,
		@@ -48,7 +48,7 @@ cudaFirstPhaseBlockScan( const ScanType ScanType,
		*/
		const int blockOffset = blockIdx.x * elementsInBlock;
		int idx = threadIdx.x;
		if( ScanType == ScanType::Exclusive )
		if( scanType == ScanType::Exclusive )
		{
		if( idx == 0 )
		sharedData[ 0 ] = zero;
		@@ -145,7 +145,7 @@ cudaFirstPhaseBlockScan( const ScanType ScanType,

		if( threadIdx.x == 0 )
		{
		if( ScanType == ScanType::Exclusive )
		if( scanType == ScanType::Exclusive )
		{
		auxArray[ blockIdx.x ] = reduction( sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ],
		sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] );
		@@ -179,7 +179,7 @@ cudaSecondPhaseBlockScan( Reduction reduction,
		}
		}

		template< ScanType ScanType,
		template< ScanType scanType,
		typename Real,
		typename Index >
		struct CudaScanKernelLauncher
		@@ -271,7 +271,7 @@ struct CudaScanKernelLauncher
		elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2;
		const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real );
		cudaFirstPhaseBlockScan<<< cudaGridSize, cudaBlockSize, sharedMemory >>>
		( ScanType,
		( scanType,
		reduction,
		zero,
		currentSize,
		@@ -306,7 +306,7 @@ struct CudaScanKernelLauncher
		}

		/****
		* \brief Performs the seocond phase of prefix sum.
		* \brief Performs the second phase of prefix sum.
		*
		* \param size Number of elements to be scanned.
		* \param deviceOutput Pointer to output array on GPU.