This project is archived. Its data is read-only.

Commit c4cc606a authored Dec 16, 2021 by Jakub Klinkovský

Fixed reduction of segments to work even for empty matrices

parent ce8f92fc

src/TNL/Algorithms/Segments/EllpackView.hpp

+4 −0

Original line number	Diff line number	Diff line
		@@ -105,6 +105,8 @@ struct EllpackCudaReductionDispatcher
		exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize )
		{
		#ifdef HAVE_CUDA
		if( last <= first )
		return;
		const Index segmentsCount = last - first;
		const Index threadsCount = segmentsCount * 32;
		const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 );
		@@ -128,6 +130,8 @@ struct EllpackCudaReductionDispatcher< Index, Fetch, Reduction, ResultKeeper, Re
		exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize )
		{
		#ifdef HAVE_CUDA
		if( last <= first )
		return;
		const Index segmentsCount = last - first;
		const Index threadsCount = segmentsCount * 32;
		const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 );

src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp

+2 −0

Original line number	Diff line number	Diff line
		@@ -245,6 +245,8 @@ reduceSegments( const OffsetsView& offsets,
		TNL_ASSERT_LE( this->threadsPerSegment, ThreadsInBlock, "" );

		#ifdef HAVE_CUDA
		if( last <= first )
		return;
		const size_t threadsCount = this->threadsPerSegment * ( last - first );
		dim3 blocksCount, gridsCount, blockSize( ThreadsInBlock );
		TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount );

src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -424,6 +424,9 @@ struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Kee
		const Index threadsPerSegment )
		{
		#ifdef HAVE_CUDA
		if( last <= first )
		return;

		const size_t threads = 128;
		Index blocks, groupSize;

src/TNL/Algorithms/Segments/Kernels/CSRVectorKernel.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -140,6 +140,9 @@ reduceSegments( const OffsetsView& offsets,
		Args... args )
		{
		#ifdef HAVE_CUDA
		if( last <= first )
		return;

		const Index warpsCount = last - first;
		const size_t threadsCount = warpsCount * TNL::Cuda::getWarpSize();
		dim3 blocksCount, gridsCount, blockSize( 256 );

src/UnitTests/Matrices/SparseMatrixTest.hpp

+3 −0

Original line number	Diff line number	Diff line
		@@ -243,6 +243,9 @@ void test_SetDimensions()

		EXPECT_EQ( m.getRows(), 9 );
		EXPECT_EQ( m.getColumns(), 8 );

		// test empty matrix
		m.setDimensions( 0, 0 );
		}

		template< typename Matrix >