Skip to content
Snippets Groups Projects
Commit 4e1309ce authored by Lukas Cejka's avatar Lukas Cejka Committed by Tomáš Oberhuber
Browse files

Implemented fix for case when roundToMultiple would give a number smaller than...

Implemented fix for case when roundToMultiple would give a number smaller than the number of rows, thus causing index to be out of bounds on CUDA
parent 1ac6bcbf
No related branches found
No related tags found
1 merge request!45Matrices revision
......@@ -57,13 +57,27 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row
TNL_ASSERT( rows > 0 && columns > 0,
std::cerr << "rows = " << rows
<< " columns = " << columns <<std::endl );
this->rows = rows;
this->columns = columns;
this->columns = columns;
if( std::is_same< DeviceType, Devices::Cuda >::value )
this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() );
{
this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
if( this->rows - this->alignedRows > 0 )
{
IndexType missingRows = this->rows - this->alignedRows;
missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() );
this->alignedRows += missingRows;
// this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() );
}
}
else this->alignedRows = rows;
if( this->rowLengths != 0 )
allocateElements();
allocateElements();
}
template< typename Real,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment