diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h index 9b12ab1f70f5315c87008d090eab0a3190d1df71..1aa9b51a6cbdbaa8c596db148b8dbceae62066fd 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h @@ -73,13 +73,27 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp TNL_ASSERT( rows > 0 && columns > 0, std::cerr << "rows = " << rows << " columns = " << columns << std::endl ); + this->rows = rows; - this->columns = columns; + this->columns = columns; + if( std::is_same< DeviceType, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); + { + this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + + if( this->rows - this->alignedRows > 0 ) + { + IndexType missingRows = this->rows - this->alignedRows; + missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() ); + this->alignedRows += missingRows; + +// this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() ); + } + } else this->alignedRows = rows; + if( this->rowLengths != 0 ) - allocateElements(); + allocateElements(); } template< typename Real,