Loading .gitlab-ci.yml +1 −1 Original line number Diff line number Diff line Loading @@ -46,7 +46,7 @@ stages: - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 # enforce (more or less) warning-free builds - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized" - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. Loading src/TNL/Algorithms/MemoryOperationsCuda.hpp +3 −2 Original line number Diff line number Diff line Loading @@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination, SourceIterator last ) { using BaseType = typename std::remove_cv< DestinationElement >::type; std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize ); std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index copiedElements = 0; while( copiedElements < destinationSize && first != last ) { Index i = 0; while( i < Cuda::getTransferBufferSize() && first != last ) while( i < buffer_size && first != last ) buffer[ i++ ] = *first++; MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i ); copiedElements += i; Loading src/TNL/Algorithms/MultiDeviceMemoryOperations.h +16 −13 Original line number Diff line number Diff line Loading @@ -137,18 +137,19 @@ copy( DestinationElement* destination, else { using BaseType = typename std::remove_cv< SourceElement >::type; std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; Index i( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size ); std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index i = 0; while( i < size ) { if( cudaMemcpy( (void*) buffer.get(), (void*) &source[ i ], TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ), TNL::min( size - i, buffer_size ) * sizeof(SourceElement), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; Index j( 0 ); while( j < Cuda::getTransferBufferSize() && i + j < size ) int j = 0; while( j < buffer_size && i + j < size ) { destination[ i + j ] = buffer[ j ]; j++; Loading Loading @@ -180,11 +181,12 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] }; Index compared( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size ); std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] }; Index compared = 0; while( compared < size ) { Index transfer = min( size - compared, Cuda::getTransferBufferSize() ); const int transfer = TNL::min( size - compared, buffer_size ); if( cudaMemcpy( (void*) host_buffer.get(), (void*) &source[ compared ], transfer * sizeof(Element2), Loading Loading @@ -230,12 +232,13 @@ copy( DestinationElement* destination, } else { std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] }; Index i( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size ); std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] }; Index i = 0; while( i < size ) { Index j( 0 ); while( j < Cuda::getTransferBufferSize() && i + j < size ) int j = 0; while( j < buffer_size && i + j < size ) { buffer[ j ] = source[ i + j ]; j++; Loading Loading
.gitlab-ci.yml +1 −1 Original line number Diff line number Diff line Loading @@ -46,7 +46,7 @@ stages: - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 # enforce (more or less) warning-free builds - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized" - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. Loading
src/TNL/Algorithms/MemoryOperationsCuda.hpp +3 −2 Original line number Diff line number Diff line Loading @@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination, SourceIterator last ) { using BaseType = typename std::remove_cv< DestinationElement >::type; std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize ); std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index copiedElements = 0; while( copiedElements < destinationSize && first != last ) { Index i = 0; while( i < Cuda::getTransferBufferSize() && first != last ) while( i < buffer_size && first != last ) buffer[ i++ ] = *first++; MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i ); copiedElements += i; Loading
src/TNL/Algorithms/MultiDeviceMemoryOperations.h +16 −13 Original line number Diff line number Diff line Loading @@ -137,18 +137,19 @@ copy( DestinationElement* destination, else { using BaseType = typename std::remove_cv< SourceElement >::type; std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] }; Index i( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size ); std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index i = 0; while( i < size ) { if( cudaMemcpy( (void*) buffer.get(), (void*) &source[ i ], TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ), TNL::min( size - i, buffer_size ) * sizeof(SourceElement), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; Index j( 0 ); while( j < Cuda::getTransferBufferSize() && i + j < size ) int j = 0; while( j < buffer_size && i + j < size ) { destination[ i + j ] = buffer[ j ]; j++; Loading Loading @@ -180,11 +181,12 @@ compare( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] }; Index compared( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size ); std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] }; Index compared = 0; while( compared < size ) { Index transfer = min( size - compared, Cuda::getTransferBufferSize() ); const int transfer = TNL::min( size - compared, buffer_size ); if( cudaMemcpy( (void*) host_buffer.get(), (void*) &source[ compared ], transfer * sizeof(Element2), Loading Loading @@ -230,12 +232,13 @@ copy( DestinationElement* destination, } else { std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] }; Index i( 0 ); const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size ); std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] }; Index i = 0; while( i < size ) { Index j( 0 ); while( j < Cuda::getTransferBufferSize() && i + j < size ) int j = 0; while( j < buffer_size && i + j < size ) { buffer[ j ] = source[ i + j ]; j++; Loading