Loading src/TNL/Algorithms/Segments/EllpackView.hpp +4 −0 Original line number Diff line number Diff line Loading @@ -105,6 +105,8 @@ struct EllpackCudaReductionDispatcher exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index segmentsCount = last - first; const Index threadsCount = segmentsCount * 32; const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 ); Loading @@ -128,6 +130,8 @@ struct EllpackCudaReductionDispatcher< Index, Fetch, Reduction, ResultKeeper, Re exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index segmentsCount = last - first; const Index threadsCount = segmentsCount * 32; const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 ); Loading src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -245,6 +245,8 @@ reduceSegments( const OffsetsView& offsets, TNL_ASSERT_LE( this->threadsPerSegment, ThreadsInBlock, "" ); #ifdef HAVE_CUDA if( last <= first ) return; const size_t threadsCount = this->threadsPerSegment * ( last - first ); dim3 blocksCount, gridsCount, blockSize( ThreadsInBlock ); TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount ); Loading src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -424,6 +424,9 @@ struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Kee const Index threadsPerSegment ) { #ifdef HAVE_CUDA if( last <= first ) return; const size_t threads = 128; Index blocks, groupSize; Loading src/TNL/Algorithms/Segments/Kernels/CSRVectorKernel.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -140,6 +140,9 @@ reduceSegments( const OffsetsView& offsets, Args... args ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index warpsCount = last - first; const size_t threadsCount = warpsCount * TNL::Cuda::getWarpSize(); dim3 blocksCount, gridsCount, blockSize( 256 ); Loading src/UnitTests/Matrices/SparseMatrixTest.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -243,6 +243,9 @@ void test_SetDimensions() EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); // test empty matrix m.setDimensions( 0, 0 ); } template< typename Matrix > Loading Loading
src/TNL/Algorithms/Segments/EllpackView.hpp +4 −0 Original line number Diff line number Diff line Loading @@ -105,6 +105,8 @@ struct EllpackCudaReductionDispatcher exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index segmentsCount = last - first; const Index threadsCount = segmentsCount * 32; const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 ); Loading @@ -128,6 +130,8 @@ struct EllpackCudaReductionDispatcher< Index, Fetch, Reduction, ResultKeeper, Re exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index segmentsCount = last - first; const Index threadsCount = segmentsCount * 32; const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 ); Loading
src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp +2 −0 Original line number Diff line number Diff line Loading @@ -245,6 +245,8 @@ reduceSegments( const OffsetsView& offsets, TNL_ASSERT_LE( this->threadsPerSegment, ThreadsInBlock, "" ); #ifdef HAVE_CUDA if( last <= first ) return; const size_t threadsCount = this->threadsPerSegment * ( last - first ); dim3 blocksCount, gridsCount, blockSize( ThreadsInBlock ); TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount ); Loading
src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -424,6 +424,9 @@ struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Kee const Index threadsPerSegment ) { #ifdef HAVE_CUDA if( last <= first ) return; const size_t threads = 128; Index blocks, groupSize; Loading
src/TNL/Algorithms/Segments/Kernels/CSRVectorKernel.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -140,6 +140,9 @@ reduceSegments( const OffsetsView& offsets, Args... args ) { #ifdef HAVE_CUDA if( last <= first ) return; const Index warpsCount = last - first; const size_t threadsCount = warpsCount * TNL::Cuda::getWarpSize(); dim3 blocksCount, gridsCount, blockSize( 256 ); Loading
src/UnitTests/Matrices/SparseMatrixTest.hpp +3 −0 Original line number Diff line number Diff line Loading @@ -243,6 +243,9 @@ void test_SetDimensions() EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); // test empty matrix m.setDimensions( 0, 0 ); } template< typename Matrix > Loading