Fixed handling of Cuda::getTransferBufferSize() in memory operations (9615d107) · Commits · TNL / tnl-dev

.gitlab-ci.yml

+1 −1

Original line number	Diff line number	Diff line
		@@ -46,7 +46,7 @@ stages:
		- export CTEST_OUTPUT_ON_FAILURE=1
		- export CTEST_PARALLEL_LEVEL=4
		# enforce (more or less) warning-free builds
		- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized"
		- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
		- mkdir -p "./builddir/$CI_JOB_NAME"
		- pushd "./builddir/$CI_JOB_NAME"
		- cmake ../..

src/TNL/Algorithms/MemoryOperationsCuda.hpp

+3 −2

Original line number	Diff line number	Diff line
		@@ -92,11 +92,12 @@ copyFromIterator( DestinationElement* destination,
		SourceIterator last )
		{
		using BaseType = typename std::remove_cv< DestinationElement >::type;
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
		const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
		Index copiedElements = 0;
		while( copiedElements < destinationSize && first != last ) {
		Index i = 0;
		while( i < Cuda::getTransferBufferSize() && first != last )
		while( i < buffer_size && first != last )
		buffer[ i++ ] = *first++;
		MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
		copiedElements += i;

src/TNL/Algorithms/MultiDeviceMemoryOperations.h

+16 −13

Original line number	Diff line number	Diff line
		@@ -137,18 +137,19 @@ copy( DestinationElement* destination,
		else
		{
		using BaseType = typename std::remove_cv< SourceElement >::type;
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
		Index i( 0 );
		const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size );
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
		Index i = 0;
		while( i < size )
		{
		if( cudaMemcpy( (void*) buffer.get(),
		(void*) &source[ i ],
		TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
		TNL::min( size - i, buffer_size ) * sizeof(SourceElement),
		cudaMemcpyDeviceToHost ) != cudaSuccess )
		std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
		TNL_CHECK_CUDA_DEVICE;
		Index j( 0 );
		while( j < Cuda::getTransferBufferSize() && i + j < size )
		int j = 0;
		while( j < buffer_size && i + j < size )
		{
		destination[ i + j ] = buffer[ j ];
		j++;
		@@ -180,11 +181,12 @@ compare( const Element1* destination,
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
		#ifdef HAVE_CUDA
		std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
		Index compared( 0 );
		const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size );
		std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] };
		Index compared = 0;
		while( compared < size )
		{
		Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
		const int transfer = TNL::min( size - compared, buffer_size );
		if( cudaMemcpy( (void*) host_buffer.get(),
		(void*) &source[ compared ],
		transfer * sizeof(Element2),
		@@ -230,12 +232,13 @@ copy( DestinationElement* destination,
		}
		else
		{
		std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
		Index i( 0 );
		const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size );
		std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] };
		Index i = 0;
		while( i < size )
		{
		Index j( 0 );
		while( j < Cuda::getTransferBufferSize() && i + j < size )
		int j = 0;
		while( j < buffer_size && i + j < size )
		{
		buffer[ j ] = source[ i + j ];
		j++;