diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h index fa8952b4ff22c376c0d9ae118ea933173833ed99..f64cef4c5b909428511e006d15d820c5c83d0f27 100644 --- a/src/TNL/Matrices/EllpackSymmetric_impl.h +++ b/src/TNL/Matrices/EllpackSymmetric_impl.h @@ -57,13 +57,27 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row TNL_ASSERT( rows > 0 && columns > 0, std::cerr << "rows = " << rows << " columns = " << columns <<std::endl ); + this->rows = rows; - this->columns = columns; + this->columns = columns; + if( std::is_same< DeviceType, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); + { + this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + + if( this->rows - this->alignedRows > 0 ) + { + IndexType missingRows = this->rows - this->alignedRows; + missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() ); + this->alignedRows += missingRows; + +// this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() ); + } + } else this->alignedRows = rows; + if( this->rowLengths != 0 ) - allocateElements(); + allocateElements(); } template< typename Real,