Skip to content
Snippets Groups Projects
Commit 4e1309ce authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented fix for case when roundToMultiple would give a number smaller than...

Implemented fix for case when roundToMultiple would give a number smaller than the number of rows, thus causing index to be out of bounds on CUDA
parent 1ac6bcbf
No related branches found
No related tags found
1 merge request!45Matrices revision
...@@ -57,13 +57,27 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row ...@@ -57,13 +57,27 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row
TNL_ASSERT( rows > 0 && columns > 0, TNL_ASSERT( rows > 0 && columns > 0,
std::cerr << "rows = " << rows std::cerr << "rows = " << rows
<< " columns = " << columns <<std::endl ); << " columns = " << columns <<std::endl );
this->rows = rows; this->rows = rows;
this->columns = columns; this->columns = columns;
if( std::is_same< DeviceType, Devices::Cuda >::value ) if( std::is_same< DeviceType, Devices::Cuda >::value )
this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); {
this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
if( this->rows - this->alignedRows > 0 )
{
IndexType missingRows = this->rows - this->alignedRows;
missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() );
this->alignedRows += missingRows;
// this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() );
}
}
else this->alignedRows = rows; else this->alignedRows = rows;
if( this->rowLengths != 0 ) if( this->rowLengths != 0 )
allocateElements(); allocateElements();
} }
template< typename Real, template< typename Real,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment