Replaced static member variables in CudaPrefixSumKernelLauncher with static getters (1fe62640) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/CudaPrefixSumKernel.h

+17 −24

Original line number	Diff line number	Diff line
		@@ -297,8 +297,7 @@ struct CudaPrefixSumKernelLauncher
		*/
		const Index elementsInBlock = 8 * blockSize;
		const Index numberOfBlocks = roundUpDivision( size, elementsInBlock );
		//const auto maxGridSize = 3; //Devices::Cuda::getMaxGridSize();
		const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize );
		const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() );
		Real gridShift = zero;
		//std::cerr << "numberOfgrids = " << numberOfGrids << std::endl;

		@@ -310,10 +309,10 @@ struct CudaPrefixSumKernelLauncher
		/****
		* Compute current grid size and size of data to be scanned
		*/
		const Index gridOffset = gridIdx * maxGridSize * elementsInBlock;
		const Index gridOffset = gridIdx * maxGridSize() * elementsInBlock;
		Index currentSize = size - gridOffset;
		if( currentSize / elementsInBlock > maxGridSize )
		currentSize = maxGridSize * elementsInBlock;
		if( currentSize / elementsInBlock > maxGridSize() )
		currentSize = maxGridSize() * elementsInBlock;

		//std::cerr << "GridIdx = " << gridIdx << " grid size = " << currentSize << std::endl;
		cudaRecursivePrefixSum( prefixSumType,
		@@ -331,37 +330,31 @@ struct CudaPrefixSumKernelLauncher
		* Store the number of CUDA grids for the purpose of unit testing, i.e.
		* to check if we test the algorithm with more than one CUDA grid.
		*/
		gridsCount = numberOfGrids;
		gridsCount() = numberOfGrids;
		}

		/****
		* The following serves for setting smaller maxGridSize so that we can force
		* the prefix sum in CUDA to run with more the one grids in unit tests.
		*/
		static void setMaxGridSize( int newMaxGridSize ) {
		maxGridSize = newMaxGridSize;
		static int& maxGridSize()
		{
		static int maxGridSize = Devices::Cuda::getMaxGridSize();
		return maxGridSize;
		}

		static void resetMaxGridSize() {
		maxGridSize = Devices::Cuda::getMaxGridSize();
		static void resetMaxGridSize()
		{
		maxGridSize() = Devices::Cuda::getMaxGridSize();
		}

		static int maxGridSize;

		static int gridsCount;
		static int& gridsCount()
		{
		static int gridsCount = -1;
		return gridsCount;
		}
		};

		template< PrefixSumType prefixSumType,
		typename Real,
		typename Index >
		int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::maxGridSize = Devices::Cuda::getMaxGridSize();

		template< PrefixSumType prefixSumType,
		typename Real,
		typename Index >
		int CudaPrefixSumKernelLauncher< prefixSumType, Real, Index >::gridsCount = -1;


		#endif

		} // namespace Algorithms

src/TNL/Devices/Host.h

+7 −5

Original line number	Diff line number	Diff line
		@@ -113,17 +113,19 @@ public:
		}

		protected:
		static bool& ompEnabled() {
		static bool& ompEnabled()
		{
		#ifdef HAVE_OPENMP
		static bool ompEnabled( true );
		static bool ompEnabled = true;
		#else
		static bool ompEnabled( false );
		static bool ompEnabled = false;
		#endif
		return ompEnabled;
		}

		static int& maxThreadsCount() {
		static int maxThreadsCount( -1 );
		static int& maxThreadsCount()
		{
		static int maxThreadsCount = -1;
		return maxThreadsCount;
		}
		};

src/UnitTests/Containers/VectorPrefixSumTest.h

+13 −13

Original line number	Diff line number	Diff line
		@@ -74,11 +74,11 @@ TYPED_TEST( VectorTest, prefixSum )
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef HAVE_CUDA
		Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::setMaxGridSize( 3 );
		Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::maxGridSize() = 3;
		v = 0;
		v_host = -1;
		v.prefixSum();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], 0 );
		@@ -86,7 +86,7 @@ TYPED_TEST( VectorTest, prefixSum )
		setLinearSequence( v );
		v_host = -1;
		v.prefixSum();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v;
		for( int i = 1; i < size; i++ )
		EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i );
		@@ -94,7 +94,7 @@ TYPED_TEST( VectorTest, prefixSum )
		setConstantSequence( v, 1 );
		v_host = -1;
		v_view.prefixSum();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], i + 1 );
		@@ -102,7 +102,7 @@ TYPED_TEST( VectorTest, prefixSum )
		v = 0;
		v_host = -1;
		v_view.prefixSum();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], 0 );
		@@ -110,7 +110,7 @@ TYPED_TEST( VectorTest, prefixSum )
		setLinearSequence( v );
		v_host = -1;
		v_view.prefixSum();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 1; i < size; i++ )
		EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i );
		@@ -184,12 +184,12 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef HAVE_CUDA
		Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::setMaxGridSize( 3 );
		Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::maxGridSize() = 3;

		setConstantSequence( v, 1 );
		v_host = -1;
		v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], i );
		@@ -197,7 +197,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		v.setValue( 0 );
		v_host = -1;
		v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], 0 );
		@@ -205,7 +205,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		setLinearSequence( v );
		v_host = -1;
		v.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v;
		for( int i = 1; i < size; i++ )
		EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 );
		@@ -213,7 +213,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		setConstantSequence( v, 1 );
		v_host = -1;
		v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], i );
		@@ -221,7 +221,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		v.setValue( 0 );
		v_host = -1;
		v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 0; i < size; i++ )
		EXPECT_EQ( v_host[ i ], 0 );
		@@ -229,7 +229,7 @@ TYPED_TEST( VectorTest, exclusivePrefixSum )
		setLinearSequence( v );
		v_host = -1;
		v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >();
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 );
		EXPECT_GT( ( Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
		v_host = v_view;
		for( int i = 1; i < size; i++ )
		EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 );