diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 1c8f367c00c1111ab9cd15611bd7756a148dd57a..5882f50c99e56ca1a178c21507c8bef0e42684c7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -46,7 +46,7 @@ stages:
         - export CTEST_OUTPUT_ON_FAILURE=1
         - export CTEST_PARALLEL_LEVEL=4
         # enforce (more or less) warning-free builds
-        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized"
+        - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
         - mkdir -p "./builddir/$CI_JOB_NAME"
         - pushd "./builddir/$CI_JOB_NAME"
         - cmake ../..
diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
index a823f0ecb05c1b5c6bf94fbd9aaea62e13465284..ea4b92b61ba5d52fdc6ea98f656d25a97db02ab9 100644
--- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp
+++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp
@@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination,
                   SourceIterator last )
 {
    using BaseType = typename std::remove_cv< DestinationElement >::type;
-   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
+   std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
    Index copiedElements = 0;
    while( copiedElements < destinationSize && first != last ) {
       Index i = 0;
-      while( i < Cuda::getTransferBufferSize() && first != last )
+      while( i < buffer_size && first != last )
          buffer[ i++ ] = *first++;
       MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
       copiedElements += i;
diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
index c0e75f2fae166776b64110adafaa346427462371..48e5ad64750c5dc8b7a84a9b4346b345e6ff3f1a 100644
--- a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
+++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h
@@ -137,18 +137,19 @@ copy( DestinationElement* destination,
    else
    {
       using BaseType = typename std::remove_cv< SourceElement >::type;
-      std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size );
+      std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
+      Index i = 0;
       while( i < size )
       {
          if( cudaMemcpy( (void*) buffer.get(),
                          (void*) &source[ i ],
-                         TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
+                         TNL::min( size - i, buffer_size ) * sizeof(SourceElement),
                          cudaMemcpyDeviceToHost ) != cudaSuccess )
             std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
          TNL_CHECK_CUDA_DEVICE;
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         int j = 0;
+         while( j < buffer_size && i + j < size )
          {
             destination[ i + j ] = buffer[ j ];
             j++;
@@ -180,14 +181,15 @@ compare( const Element1* destination,
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
 #ifdef HAVE_CUDA
-   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
-   Index compared( 0 );
+   const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size );
+   std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] };
+   Index compared = 0;
    while( compared < size )
    {
-      Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
+      const int transfer = TNL::min( size - compared, buffer_size );
       if( cudaMemcpy( (void*) host_buffer.get(),
                       (void*) &source[ compared ],
-                      transfer * sizeof( Element2 ),
+                      transfer * sizeof(Element2),
                       cudaMemcpyDeviceToHost ) != cudaSuccess )
          std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
       TNL_CHECK_CUDA_DEVICE;
@@ -230,12 +232,13 @@ copy( DestinationElement* destination,
    }
    else
    {
-      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
-      Index i( 0 );
+      const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size );
+      std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] };
+      Index i = 0;
       while( i < size )
       {
-         Index j( 0 );
-         while( j < Cuda::getTransferBufferSize() && i + j < size )
+         int j = 0;
+         while( j < buffer_size && i + j < size )
          {
             buffer[ j ] = source[ i + j ];
             j++;