Skip to content
Snippets Groups Projects
Commit a1a054bf authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Removed duplicate TransferBufferSize constants

Also set the buffer size to 1 MiB, because larger buffer size slows down
memory copies significantly (e.g. MeshTest would take about 10x longer).

Addresses #26
parent 15b5e2c4
No related branches found
No related tags found
1 merge request!42Refactoring for execution policies
......@@ -106,11 +106,11 @@ copyFromIterator( DestinationElement* destination,
SourceIterator last )
{
using BaseType = typename std::remove_cv< DestinationElement >::type;
std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
Index copiedElements = 0;
while( copiedElements < destinationSize && first != last ) {
Index i = 0;
while( i < Devices::Cuda::getGPUTransferBufferSize() && first != last )
while( i < Cuda::getTransferBufferSize() && first != last )
buffer[ i++ ] = *first++;
ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i );
copiedElements += i;
......@@ -197,18 +197,18 @@ copy( DestinationElement* destination,
else
{
using BaseType = typename std::remove_cv< SourceElement >::type;
std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] };
std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
Index i( 0 );
while( i < size )
{
if( cudaMemcpy( (void*) buffer.get(),
(void*) &source[ i ],
TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
TNL::min( size - i, Cuda::getTransferBufferSize() ) * sizeof( SourceElement ),
cudaMemcpyDeviceToHost ) != cudaSuccess )
std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
TNL_CHECK_CUDA_DEVICE;
Index j( 0 );
while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
while( j < Cuda::getTransferBufferSize() && i + j < size )
{
destination[ i + j ] = buffer[ j ];
j++;
......@@ -239,11 +239,11 @@ compare( const Element1* destination,
TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
#ifdef HAVE_CUDA
std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] };
std::unique_ptr< Element2[] > host_buffer{ new Element2[ Cuda::getTransferBufferSize() ] };
Index compared( 0 );
while( compared < size )
{
Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() );
Index transfer = min( size - compared, Cuda::getTransferBufferSize() );
if( cudaMemcpy( (void*) host_buffer.get(),
(void*) &source[ compared ],
transfer * sizeof( Element2 ),
......@@ -288,12 +288,12 @@ copy( DestinationElement* destination,
}
else
{
std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] };
std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Cuda::getTransferBufferSize() ] };
Index i( 0 );
while( i < size )
{
Index j( 0 );
while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
while( j < Cuda::getTransferBufferSize() && i + j < size )
{
buffer[ j ] = source[ i + j ];
j++;
......
......@@ -30,6 +30,14 @@ inline constexpr int getWarpSize()
return 32;
}
// When we transfer data between the GPU and the CPU we use 1 MiB buffer. This
// size should ensure good performance.
// We use the same buffer size even for retyping data during IO operations.
inline constexpr int getTransferBufferSize()
{
return 1 << 20;
}
#ifdef HAVE_CUDA
__device__ inline int getGlobalThreadIdx( const int gridIdx = 0,
const int gridSize = getMaxGridSize() )
......
......@@ -24,16 +24,6 @@ public:
static inline bool setup( const Config::ParameterContainer& parameters,
const String& prefix = "" );
static inline constexpr int getGPUTransferBufferSize();
////
// When we transfer data between the GPU and the CPU we use 5 MB buffer. This
// size should ensure good performance -- see.
// http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
// We use the same buffer size even for retyping data during IO operations.
//
static constexpr std::size_t TransferBufferSize = 5 * 2<<20;
};
} // namespace Devices
......
......@@ -51,10 +51,5 @@ Cuda::setup( const Config::ParameterContainer& parameters,
return true;
}
inline constexpr int Cuda::getGPUTransferBufferSize()
{
return 1 << 20;
}
} // namespace Devices
} // namespace TNL
......@@ -168,14 +168,6 @@ class File
std::fstream file;
String fileName;
////
// When we transfer data between the GPU and the CPU we use 5 MB buffer. This
// size should ensure good performance -- see.
// http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
// We use the same buffer size even for retyping data during IO operations.
//
static constexpr std::streamsize TransferBufferSize = 5 * 2<<20;
};
/**
......
......@@ -18,6 +18,7 @@
#include <TNL/File.h>
#include <TNL/Assert.h>
#include <TNL/Cuda/CheckDevice.h>
#include <TNL/Cuda/LaunchHelpers.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Exceptions/FileSerializationError.h>
#include <TNL/Exceptions/FileDeserializationError.h>
......@@ -101,7 +102,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements );
else
{
const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
using BaseType = typename std::remove_cv< SourceType >::type;
std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
std::streamsize readElements = 0;
......@@ -124,7 +125,7 @@ template< typename Type,
void File::load_impl( Type* buffer, std::streamsize elements )
{
#ifdef HAVE_CUDA
const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
using BaseType = typename std::remove_cv< Type >::type;
std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
......@@ -145,7 +146,7 @@ void File::load_impl( Type* buffer, std::streamsize elements )
}
else
{
const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements );
using BaseType = typename std::remove_cv< SourceType >::type;
std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
......@@ -192,7 +193,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
file.write( reinterpret_cast<const char*>(buffer), sizeof(Type) * elements );
else
{
const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
using BaseType = typename std::remove_cv< TargetType >::type;
std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
std::streamsize writtenElements = 0;
......@@ -216,7 +217,7 @@ template< typename Type,
void File::save_impl( const Type* buffer, std::streamsize elements )
{
#ifdef HAVE_CUDA
const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements );
using BaseType = typename std::remove_cv< Type >::type;
std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
......@@ -237,7 +238,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements )
}
else
{
const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements );
const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements );
using BaseType = typename std::remove_cv< TargetType >::type;
std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment