Commit f4bab3c9 authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented fix for case when roundToMultiple would give a number smaller than...

Implemented fix for case when roundToMultiple would give a number smaller than the number of rows, thus causing index to be out of bounds on CUDA
parent 4e1309ce
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -73,11 +73,25 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp
   TNL_ASSERT( rows > 0 && columns > 0,
              std::cerr << "rows = " << rows
                   << " columns = " << columns << std::endl );
   
   this->rows = rows;
   this->columns = columns;
   
   if( std::is_same< DeviceType, Devices::Cuda >::value )
      this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
   {
       this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
       
       if( this->rows - this->alignedRows > 0 )
       {
           IndexType missingRows = this->rows - this->alignedRows;
           missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() );
           this->alignedRows +=  missingRows;
           
//           this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() );
       }
   }
   else this->alignedRows = rows;
   
   if( this->rowLengths != 0 )
       allocateElements();
}