Removed duplicate TransferBufferSize constants (a1a054bf) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp

+9 −9

Original line number	Diff line number	Diff line
		@@ -106,11 +106,11 @@ copyFromIterator( DestinationElement* destination,
		SourceIterator last )
		{
		using BaseType = typename std::remove_cv< DestinationElement >::type;
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
		Index copiedElements = 0;
		while( copiedElements < destinationSize && first != last ) {
		Index i = 0;
		while( i < Devices::Cuda::getGPUTransferBufferSize() && first != last )
		while( i < Cuda::getTransferBufferSize() && first != last )
		buffer[ i++ ] = *first++;
		ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i );
		copiedElements += i;
		@@ -197,18 +197,18 @@ copy( DestinationElement* destination,
		else
		{
		using BaseType = typename std::remove_cv< SourceElement >::type;
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
		std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
		Index i( 0 );
		while( i < size )
		{
		if( cudaMemcpy( (void*) buffer.get(),
		(void*) &source[ i ],
		TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
		TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
		cudaMemcpyDeviceToHost ) != cudaSuccess )
		std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
		TNL_CHECK_CUDA_DEVICE;
		Index j( 0 );
		while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
		while( j < Cuda::getTransferBufferSize() && i + j < size )
		{
		destination[ i + j ] = buffer[ j ];
		j++;
		@@ -239,11 +239,11 @@ compare( const Element1* destination,
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
		#ifdef HAVE_CUDA
		std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] };
		std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
		Index compared( 0 );
		while( compared < size )
		{
		Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() );
		Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
		if( cudaMemcpy( (void*) host_buffer.get(),
		(void*) &source[ compared ],
		transfer * sizeof( Element2 ),
		@@ -288,12 +288,12 @@ copy( DestinationElement* destination,
		}
		else
		{
		std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] };
		std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
		Index i( 0 );
		while( i < size )
		{
		Index j( 0 );
		while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
		while( j < Cuda::getTransferBufferSize() && i + j < size )
		{
		buffer[ j ] = source[ i + j ];
		j++;

src/TNL/Cuda/LaunchHelpers.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -30,6 +30,14 @@ inline constexpr int getWarpSize()
		return 32;
		}

		// When we transfer data between the GPU and the CPU we use 1 MiB buffer. This
		// size should ensure good performance.
		// We use the same buffer size even for retyping data during IO operations.
		inline constexpr int getTransferBufferSize()
		{
		return 1 << 20;
		}

		#ifdef HAVE_CUDA
		__device__ inline int getGlobalThreadIdx( const int gridIdx = 0,
		const int gridSize = getMaxGridSize() )

src/TNL/Devices/Cuda.h

+0 −10

Original line number	Diff line number	Diff line
		@@ -24,16 +24,6 @@ public:

		static inline bool setup( const Config::ParameterContainer& parameters,
		const String& prefix = "" );

		static inline constexpr int getGPUTransferBufferSize();

		////
		// When we transfer data between the GPU and the CPU we use 5 MB buffer. This
		// size should ensure good performance -- see.
		// http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
		// We use the same buffer size even for retyping data during IO operations.
		//
		static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
		};

		} // namespace Devices

src/TNL/Devices/Cuda_impl.h

+0 −5

Original line number	Diff line number	Diff line
		@@ -51,10 +51,5 @@ Cuda::setup( const Config::ParameterContainer& parameters,
		return true;
		}

		inline constexpr int Cuda::getGPUTransferBufferSize()
		{
		return 1 << 20;
		}

		} // namespace Devices
		} // namespace TNL

src/TNL/File.h

+0 −8

Original line number	Diff line number	Diff line
		@@ -168,14 +168,6 @@ class File

		std::fstream file;
		String fileName;

		////
		// When we transfer data between the GPU and the CPU we use 5 MB buffer. This
		// size should ensure good performance -- see.
		// http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
		// We use the same buffer size even for retyping data during IO operations.
		//
		static constexpr std::streamsize TransferBufferSize = 5 * 2<<20;
		};

		/**