From f4bab3c9bec2d594dc9ae583977c97fd16596b5d Mon Sep 17 00:00:00 2001 From: Lukas Cejka <lukas.ostatek@gmail.com> Date: Mon, 24 Jun 2019 23:28:39 +0200 Subject: [PATCH] Implemented fix for case when roundToMultiple would give a number smaller than the number of rows, thus causing index to be out of bounds on CUDA --- src/TNL/Matrices/EllpackSymmetricGraph_impl.h | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h index 9b12ab1f70..1aa9b51a6c 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h @@ -73,13 +73,27 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp TNL_ASSERT( rows > 0 && columns > 0, std::cerr << "rows = " << rows << " columns = " << columns << std::endl ); + this->rows = rows; - this->columns = columns; + this->columns = columns; + if( std::is_same< DeviceType, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); + { + this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + + if( this->rows - this->alignedRows > 0 ) + { + IndexType missingRows = this->rows - this->alignedRows; + missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() ); + this->alignedRows += missingRows; + +// this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() ); + } + } else this->alignedRows = rows; + if( this->rowLengths != 0 ) - allocateElements(); + allocateElements(); } template< typename Real, -- GitLab