diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1c8f367c00c1111ab9cd15611bd7756a148dd57a..5882f50c99e56ca1a178c21507c8bef0e42684c7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,7 +46,7 @@ stages: - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 # enforce (more or less) warning-free builds - - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized" + - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp index a823f0ecb05c1b5c6bf94fbd9aaea62e13465284..ea4b92b61ba5d52fdc6ea98f656d25a97db02ab9 100644 --- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp +++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp @@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination, SourceIterator last ) { using BaseType = typename std::remove_cv< DestinationElement >::type; - std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize ); + std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index copiedElements = 0; while( copiedElements < destinationSize && first != last ) { Index i = 0; - while( i < Cuda::getTransferBufferSize() && first != last ) + while( i < buffer_size && first != last ) buffer[ i++ ] = *first++; MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i ); copiedElements += i; diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h index c0e75f2fae166776b64110adafaa346427462371..48e5ad64750c5dc8b7a84a9b4346b345e6ff3f1a 100644 --- a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h +++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h @@ -137,18 +137,19 @@ copy( DestinationElement* destination, else { using BaseType = typename std::remove_cv< SourceElement >::type; - std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; - Index i( 0 ); + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size ); + std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; + Index i = 0; while( i < size ) { if( cudaMemcpy( (void*) buffer.get(), (void*) &source[ i ], - TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ), + TNL::min( size - i, buffer_size ) * sizeof(SourceElement), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; - Index j( 0 ); - while( j < Cuda::getTransferBufferSize() && i + j < size ) + int j = 0; + while( j < buffer_size && i + j < size ) { destination[ i + j ] = buffer[ j ]; j++; @@ -180,14 +181,15 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA - std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] }; - Index compared( 0 ); + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size ); + std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] }; + Index compared = 0; while( compared < size ) { - Index transfer = min( size - compared, Cuda::getTransferBufferSize() ); + const int transfer = TNL::min( size - compared, buffer_size ); if( cudaMemcpy( (void*) host_buffer.get(), (void*) &source[ compared ], - transfer * sizeof( Element2 ), + transfer * sizeof(Element2), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; @@ -230,12 +232,13 @@ copy( DestinationElement* destination, } else { - std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] }; - Index i( 0 ); + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size ); + std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] }; + Index i = 0; while( i < size ) { - Index j( 0 ); - while( j < Cuda::getTransferBufferSize() && i + j < size ) + int j = 0; + while( j < buffer_size && i + j < size ) { buffer[ j ] = source[ i + j ]; j++;