Commit c4cc606a authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed reduction of segments to work even for empty matrices

parent ce8f92fc
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -105,6 +105,8 @@ struct EllpackCudaReductionDispatcher
   exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize )
   {
   #ifdef HAVE_CUDA
      if( last <= first )
         return;
      const Index segmentsCount = last - first;
      const Index threadsCount = segmentsCount * 32;
      const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 );
@@ -128,6 +130,8 @@ struct EllpackCudaReductionDispatcher< Index, Fetch, Reduction, ResultKeeper, Re
   exec( Index first, Index last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Index segmentSize )
   {
   #ifdef HAVE_CUDA
      if( last <= first )
         return;
      const Index segmentsCount = last - first;
      const Index threadsCount = segmentsCount * 32;
      const Index blocksCount = Cuda::getNumberOfBlocks( threadsCount, 256 );
+2 −0
Original line number Diff line number Diff line
@@ -245,6 +245,8 @@ reduceSegments( const OffsetsView& offsets,
    TNL_ASSERT_LE( this->threadsPerSegment, ThreadsInBlock, "" );

#ifdef HAVE_CUDA
    if( last <= first )
       return;
    const size_t threadsCount = this->threadsPerSegment * ( last - first );
    dim3 blocksCount, gridsCount, blockSize( ThreadsInBlock );
    TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount );
+3 −0
Original line number Diff line number Diff line
@@ -424,6 +424,9 @@ struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Kee
                       const Index threadsPerSegment )
   {
#ifdef HAVE_CUDA
    if( last <= first )
       return;

      const size_t threads = 128;
      Index blocks, groupSize;

+3 −0
Original line number Diff line number Diff line
@@ -140,6 +140,9 @@ reduceSegments( const OffsetsView& offsets,
                         Args... args )
{
#ifdef HAVE_CUDA
    if( last <= first )
       return;

    const Index warpsCount = last - first;
    const size_t threadsCount = warpsCount * TNL::Cuda::getWarpSize();
    dim3 blocksCount, gridsCount, blockSize( 256 );
+3 −0
Original line number Diff line number Diff line
@@ -243,6 +243,9 @@ void test_SetDimensions()

   EXPECT_EQ( m.getRows(), 9 );
   EXPECT_EQ( m.getColumns(), 8 );

   // test empty matrix
   m.setDimensions( 0, 0 );
}

template< typename Matrix >