From 3889eb072c4824b664edd470b68e35cf1a9a177c Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Wed, 17 Jul 2013 15:14:51 +0200 Subject: [PATCH] Refactoring the array operations. --- install | 2 +- src/core/arrays/tnlArrayOperations.h | 4 +- src/core/tnlCuda.h | 2 + src/implementation/core/CMakeLists.txt | 3 +- .../arrays/tnlArrayOperationsCuda_impl.cu | 60 ++--- .../core/arrays/tnlArrayOperationsCuda_impl.h | 72 ++--- .../core/arrays/tnlArrayOperationsHost_impl.h | 46 ++-- .../core/cuda/cuda-reduction_impl.h | 15 +- src/implementation/core/tnlCuda_impl.h | 5 + tests/unit-tests/CMakeLists.txt | 4 +- .../core/arrays/tnlArrayOperationsTester.h | 145 +++++----- tests/unit-tests/core/cuda/CMakeLists.txt | 7 +- .../core/cuda/memory-operations-test.cu | 26 -- .../core/cuda/tnlCudaMemoryOperationsTester.h | 248 ------------------ .../core/cuda/tnlCudaReductionTester.h | 48 ++-- .../core/vectors/tnlVectorOperationsTester.h | 21 +- 16 files changed, 232 insertions(+), 476 deletions(-) delete mode 100644 tests/unit-tests/core/cuda/memory-operations-test.cu delete mode 100644 tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h diff --git a/install b/install index 029a1902e7..2b9489d97e 100755 --- a/install +++ b/install @@ -2,7 +2,7 @@ TARGET=TNL INSTALL_PREFIX=${HOME}/local -WITH_CUDA=no +WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes diff --git a/src/core/arrays/tnlArrayOperations.h b/src/core/arrays/tnlArrayOperations.h index 51e72a9516..485b095473 100644 --- a/src/core/arrays/tnlArrayOperations.h +++ b/src/core/arrays/tnlArrayOperations.h @@ -63,12 +63,12 @@ class tnlArrayOperations< tnlHost > const SourceElement* source, const Index size ); - /*template< typename Element, + template< typename Element, typename DestinationDevice, typename Index > static bool copyMemory( Element* destination, const Element* source, - const Index size );*/ + const Index size ); template< typename Element1, typename DestinationDevice, diff --git a/src/core/tnlCuda.h b/src/core/tnlCuda.h index 8d4c423ac4..2496fbcb45 100644 --- a/src/core/tnlCuda.h +++ b/src/core/tnlCuda.h @@ -38,6 +38,8 @@ class tnlCuda static void setMaxBlockSize( int newMaxBlockSize ); + static int getGPUTransferBufferSize(); + protected: static int maxGridSize, maxBlockSize; diff --git a/src/implementation/core/CMakeLists.txt b/src/implementation/core/CMakeLists.txt index 18aa614027..7a3c04eb04 100755 --- a/src/implementation/core/CMakeLists.txt +++ b/src/implementation/core/CMakeLists.txt @@ -2,8 +2,7 @@ ADD_SUBDIRECTORY( arrays ) ADD_SUBDIRECTORY( cuda ) ADD_SUBDIRECTORY( vectors ) -SET( headers memory-operations.h - tnlCuda_impl.h +SET( headers tnlCuda_impl.h tnlHost_impl.h tnlLogger_impl.h ) diff --git a/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.cu b/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.cu index 84245c9737..aa57f8434b 100644 --- a/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.cu +++ b/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.cu @@ -46,12 +46,12 @@ template void tnlArrayOperations< tnlCuda >::setMemoryElement< float >( fl template void tnlArrayOperations< tnlCuda >::setMemoryElement< double >( double* data, const double& value ); template void tnlArrayOperations< tnlCuda >::setMemoryElement< long double >( long double* data, const long double& value ); -template char tnlArrayOperations< tnlCuda >::getMemoryElement< char >( char* data ); -template int tnlArrayOperations< tnlCuda >::getMemoryElement< int >( int* data ); -template long int tnlArrayOperations< tnlCuda >::getMemoryElement< long int >( long int* data ); -template float tnlArrayOperations< tnlCuda >::getMemoryElement< float >( float* data ); -template double tnlArrayOperations< tnlCuda >::getMemoryElement< double >( double* data ); -template long double tnlArrayOperations< tnlCuda >::getMemoryElement< long double >( long double* data ); +template char tnlArrayOperations< tnlCuda >::getMemoryElement< char >( const char* data ); +template int tnlArrayOperations< tnlCuda >::getMemoryElement< int >( const int* data ); +template long int tnlArrayOperations< tnlCuda >::getMemoryElement< long int >( const long int* data ); +template float tnlArrayOperations< tnlCuda >::getMemoryElement< float >( const float* data ); +template double tnlArrayOperations< tnlCuda >::getMemoryElement< double >( const double* data ); +template long double tnlArrayOperations< tnlCuda >::getMemoryElement< long double >( const long double* data ); template char& tnlArrayOperations< tnlCuda >::getArrayElementReference< char, int >( char* data, const int i ); template int& tnlArrayOperations< tnlCuda >::getArrayElementReference< int, int >( int* data, const int i ); @@ -106,30 +106,30 @@ template bool tnlArrayOperations< tnlCuda >::copyMemory< float, tnlCuda, template bool tnlArrayOperations< tnlCuda >::copyMemory< double, tnlCuda, double, long int >( double* destination, const double* source, const long int size ); template bool tnlArrayOperations< tnlCuda >::copyMemory< long double, tnlCuda, long double, long int >( long double* destination, const long double* source, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlHost, int >( const char* data1, const char* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, int >( const int* data1, const int* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlHost, int >( const long int* data1, const long int* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlHost, int >( const float* data1, const float* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlHost, int >( const double* data1, const double* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlHost, int >( const long double* data1, const long double* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlHost, long int >( const char* data1, const char* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, long int >( const int* data1, const int* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlHost, long int >( const long int* data1, const long int* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlHost, long int >( const float* data1, const float* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlHost, long int >( const double* data1, const double* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlHost, long int >( const long double* data1, const long double* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlCuda, int >( const char* data1, const char* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlCuda, int >( const int* data1, const int* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlCuda, int >( const long int* data1, const long int* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlCuda, int >( const float* data1, const float* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlCuda, int >( const double* data1, const double* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlCuda, int >( const long double* data1, const long double* data2, const int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlCuda, long int >( const char* data1, const char* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlCuda, long int >( const int* data1, const int* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlCuda, long int >( const long int* data1, const long int* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlCuda, long int >( const float* data1, const float* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlCuda, long int >( const double* data1, const double* data2, const long int size ); -template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlCuda, long int >( const long double* data1, const long double* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlHost, char, int >( const char* data1, const char* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, int, int >( const int* data1, const int* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlHost, long int, int >( const long int* data1, const long int* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlHost, float, int >( const float* data1, const float* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlHost, double, int >( const double* data1, const double* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlHost, long double, int >( const long double* data1, const long double* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlHost, char, long int >( const char* data1, const char* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, int, long int >( const int* data1, const int* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlHost, long int, long int >( const long int* data1, const long int* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlHost, float, long int >( const float* data1, const float* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlHost, double, long int >( const double* data1, const double* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlHost, long double, long int >( const long double* data1, const long double* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlCuda, char, int >( const char* data1, const char* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlCuda, int, int >( const int* data1, const int* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlCuda, long int, int >( const long int* data1, const long int* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlCuda, float, int >( const float* data1, const float* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlCuda, double, int >( const double* data1, const double* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlCuda, long double, int >( const long double* data1, const long double* data2, const int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< char, tnlCuda, char, long int >( const char* data1, const char* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< int, tnlCuda, int, long int >( const int* data1, const int* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long int, tnlCuda, long int, long int >( const long int* data1, const long int* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< float, tnlCuda, float, long int >( const float* data1, const float* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< double, tnlCuda, double, long int >( const double* data1, const double* data2, const long int size ); +template bool tnlArrayOperations< tnlCuda >::compareMemory< long double, tnlCuda, long double, long int >( const long double* data1, const long double* data2, const long int size ); template bool tnlArrayOperations< tnlCuda >::setMemory< char, int >( char* destination, const char& value, const int size ); template bool tnlArrayOperations< tnlCuda >::setMemory< int, int >( int* destination, const int& value, const int size ); diff --git a/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.h b/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.h index f8ccb1c89b..aef5373a42 100644 --- a/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.h +++ b/src/implementation/core/arrays/tnlArrayOperationsCuda_impl.h @@ -18,6 +18,7 @@ #ifndef TNLARRAYOPERATIONSCUDA_IMPL_H_ #define TNLARRAYOPERATIONSCUDA_IMPL_H_ +#include <core/mfuncs.h> template< typename Element, typename Index > bool tnlArrayOperations< tnlCuda > :: allocateMemory( Element*& data, @@ -50,14 +51,14 @@ template< typename Element > void tnlArrayOperations< tnlCuda > :: setMemoryElement( Element* data, const Element& value ) { - setMemoryCuda( data, value, 1, tnlCuda::getMaxGridSize() ); + tnlArrayOperations< tnlCuda >::setMemory( data, value, 1 ); } template< typename Element > Element tnlArrayOperations< tnlCuda > :: getMemoryElement( const Element* data ) { Element result; - copyMemoryCudaToHost( &result, data, 1 ); + tnlArrayOperations< tnlCuda >::copyMemory< Element, tnlHost, Element, int >( &result, data, 1 ); return result; } @@ -78,9 +79,9 @@ const Element& tnlArrayOperations< tnlCuda > :: getArrayElementReference(const E #ifdef HAVE_CUDA template< typename Element, typename Index > -__global__ void tnlArrayOperations< tnlCuda > :: setArrayValueCudaKernel( Element* data, - const Index size, - const Element value ) +__global__ void setArrayValueCudaKernel( Element* data, + const Index size, + const Element value ) { Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x; const Index maxGridSize = blockDim. x * gridDim. x; @@ -180,29 +181,34 @@ bool tnlArrayOperations< tnlCuda > :: copyMemory( DestinationElement* destinatio { if( DestinationDevice :: getDevice() == tnlHostDevice ) { - SourceElement* buffer = new SourceElement[ tnlGPUvsCPUTransferBufferSize ]; - if( ! buffer ) - { - cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl; - return false; - } - Index i( 0 ); - while( i < size ) - { - if( ! copyMemoryCudaToHost( buffer, - &source[ i ], - Min( size - i, tnlGPUvsCPUTransferBufferSize ) ) ) + #ifdef HAVE_CUDA + SourceElement* buffer = new SourceElement[ tnlCuda::getGPUTransferBufferSize() ]; + if( ! buffer ) { - delete[] buffer; + cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl; return false; } - Index j( 0 ); - while( j < tnlGPUvsCPUTransferBufferSize && i + j < size ) - destination[ i + j ] = buffer[ j++ ]; - i += j; - } - delete[] buffer; - return true; + Index i( 0 ); + while( i < size ) + { + if( cudaMemcpy( buffer, + &source[ i ], + Min( size - i, tnlCuda::getGPUTransferBufferSize() ), cudaMemcpyDeviceToHost ) != cudaSuccess ) + { + checkCudaDevice; + delete[] buffer; + return false; + } + Index j( 0 ); + while( j < tnlCuda::getGPUTransferBufferSize() && i + j < size ) + destination[ i + j ] = buffer[ j++ ]; + i += j; + } + delete[] buffer; + #else + cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; + return false; + #endif } if( DestinationDevice::getDevice() == tnlCudaDevice ) { @@ -210,7 +216,7 @@ bool tnlArrayOperations< tnlCuda > :: copyMemory( DestinationElement* destinatio dim3 blockSize( 0 ), gridSize( 0 ); blockSize. x = 256; Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); - gridSize. x = Min( blocksNumber, maxGridSize ); + gridSize. x = Min( blocksNumber, tnlCuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); return checkCudaDevice; #else @@ -218,7 +224,7 @@ bool tnlArrayOperations< tnlCuda > :: copyMemory( DestinationElement* destinatio return false; #endif } - + return true; } template< typename Element1, @@ -226,13 +232,13 @@ template< typename Element1, typename Element2, typename Index > bool tnlArrayOperations< tnlCuda > :: compareMemory( const Element1* destination, - const Element2* source, - const Index size ) + const Element2* source, + const Index size ) { if( DestinationDevice::getDevice() == tnlHostDevice ) { #ifdef HAVE_CUDA - Element2* host_buffer = new Element2[ tnlGPUvsCPUTransferBufferSize ]; + Element2* host_buffer = new Element2[ tnlCuda::getGPUTransferBufferSize() ]; if( ! host_buffer ) { cerr << "I am sorry but I cannot allocate supporting buffer on the host for comparing data between CUDA GPU and CPU." << endl; @@ -241,9 +247,9 @@ bool tnlArrayOperations< tnlCuda > :: compareMemory( const Element1* destination Index compared( 0 ); while( compared < size ) { - Index transfer = Min( size - compared, tnlGPUvsCPUTransferBufferSize ); + Index transfer = Min( size - compared, tnlCuda::getGPUTransferBufferSize() ); if( cudaMemcpy( ( void* ) host_buffer, - ( void* ) & ( deviceData[ compared ] ), + ( void* ) & ( source[ compared ] ), transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { @@ -254,7 +260,7 @@ bool tnlArrayOperations< tnlCuda > :: compareMemory( const Element1* destination } Index bufferIndex( 0 ); while( bufferIndex < transfer && - host_buffer[ bufferIndex ] == hostData[ compared ] ) + host_buffer[ bufferIndex ] == destination[ compared ] ) { bufferIndex ++; compared ++; diff --git a/src/implementation/core/arrays/tnlArrayOperationsHost_impl.h b/src/implementation/core/arrays/tnlArrayOperationsHost_impl.h index fd70f11443..a8a321d94e 100644 --- a/src/implementation/core/arrays/tnlArrayOperationsHost_impl.h +++ b/src/implementation/core/arrays/tnlArrayOperationsHost_impl.h @@ -18,6 +18,8 @@ #ifndef TNLARRAYOPERATIONSHOST_IMPL_H_ #define TNLARRAYOPERATIONSHOST_IMPL_H_ +#include <core/cuda/device-check.h> + template< typename Element, typename Index > bool tnlArrayOperations< tnlHost > :: allocateMemory( Element*& data, const Index size ) @@ -83,29 +85,33 @@ bool tnlArrayOperations< tnlHost > :: copyMemory( DestinationElement* destinatio destination[ i ] = ( DestinationElement) source[ i ]; if( DestinationDevice :: getDevice() == tnlCudaDevice ) { - DestinationElement* buffer = new DestinationElement[ tnlGPUvsCPUTransferBufferSize ]; - if( ! buffer ) - { - cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl; - return false; - } - Index i( 0 ); - while( i < size ) - { - Index j( 0 ); - while( j < tnlGPUvsCPUTransferBufferSize && i + j < size ) - buffer[ j ] = source[ i + j++ ]; - if( ! copyMemoryHostToCuda( &destination[ i ], - buffer, - j ) ) + #ifdef HAVE_CUDA + DestinationElement* buffer = new DestinationElement[ tnlCuda::getGPUTransferBufferSize() ]; + if( ! buffer ) { - delete[] buffer; + cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl; return false; } - i += j; - } - delete[] buffer; - return true; + Index i( 0 ); + while( i < size ) + { + Index j( 0 ); + while( j < tnlCuda::getGPUTransferBufferSize() && i + j < size ) + buffer[ j ] = source[ i + j++ ]; + if( cudaMemcpy( &destination[ i ], buffer, j, cudaMemcpyHostToDevice ) != cudaSuccess ) + { + checkCudaDevice; + delete[] buffer; + return false; + } + i += j; + } + delete[] buffer; + return true; + #else + cerr << "The CUDA support is missing on this system." << endl; + return false; + #endif } return true; } diff --git a/src/implementation/core/cuda/cuda-reduction_impl.h b/src/implementation/core/cuda/cuda-reduction_impl.h index 6355790aab..d227c5bd30 100644 --- a/src/implementation/core/cuda/cuda-reduction_impl.h +++ b/src/implementation/core/cuda/cuda-reduction_impl.h @@ -24,7 +24,7 @@ #include <iostream> #include <core/tnlAssert.h> #include <core/cuda/reduction-operations.h> -#include <implementation/core/memory-operations.h> +#include <core/arrays/tnlArrayOperations.h> using namespace std; @@ -296,7 +296,7 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, gridSize. x = Min( ( IndexType ) ( size / blockSize. x + 1 ) / 2, desGridSize ); if( ! output && - ! allocateMemoryCuda( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) ) + ! tnlArrayOperations< tnlCuda >::allocateMemory( output, :: Max( ( IndexType ) 1, size / desBlockSize ) ) ) return false; IndexType shmem = blockSize. x * sizeof( ResultType ); @@ -372,9 +372,10 @@ bool reductionOnCudaDevice( const Operation& operation, RealType hostArray2[ maxGPUReductionDataSize ]; if( size <= maxGPUReductionDataSize ) { - if( ! copyMemoryCudaToHost( hostArray1, deviceInput1, size ) ) + if( ! tnlArrayOperations< tnlCuda >::copyMemory< RealType, tnlCuda, RealType, IndexType >( hostArray1, deviceInput1, size ) ) return false; - if( deviceInput2 && ! copyMemoryCudaToHost( hostArray2, deviceInput2, size ) ) + if( deviceInput2 && ! + tnlArrayOperations< tnlCuda >::copyMemory< RealType, tnlHost, RealType, IndexType >( hostArray2, deviceInput2, size ) ) return false; result = operation. initialValueOnHost( 0, hostArray1, hostArray2 ); for( IndexType i = 1; i < size; i ++ ) @@ -407,7 +408,7 @@ bool reductionOnCudaDevice( const Operation& operation, * Transfer the reduced data from device to host. */ ResultType resultArray[ maxGPUReductionDataSize ]; - if( ! copyMemoryCudaToHost( resultArray, deviceAux1, reducedSize ) ) + if( ! tnlArrayOperations< tnlCuda >::copyMemory< ResultType, tnlHost, ResultType, IndexType >( resultArray, deviceAux1, reducedSize ) ) return false; /*** @@ -422,9 +423,9 @@ bool reductionOnCudaDevice( const Operation& operation, /**** * Free the memory allocated on the device. */ - if( deviceAux1 && ! freeMemoryCuda( deviceAux1 ) ) + if( deviceAux1 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux1 ) ) return false; - if( deviceAux2 && ! freeMemoryCuda( deviceAux2 ) ) + if( deviceAux2 && ! tnlArrayOperations< tnlCuda >::freeMemory( deviceAux2 ) ) return false; return true; #else diff --git a/src/implementation/core/tnlCuda_impl.h b/src/implementation/core/tnlCuda_impl.h index b2c7eb584a..33b7dce9c9 100644 --- a/src/implementation/core/tnlCuda_impl.h +++ b/src/implementation/core/tnlCuda_impl.h @@ -48,5 +48,10 @@ inline void tnlCuda :: setMaxBlockSize( int newMaxBlockSize ) maxBlockSize = newMaxBlockSize; } +inline int tnlCuda::getGPUTransferBufferSize() +{ + return 1 << 20; +} + #endif /* TNLCUDA_IMPL_H_ */ diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt index c99208f449..2e21edbe24 100755 --- a/tests/unit-tests/CMakeLists.txt +++ b/tests/unit-tests/CMakeLists.txt @@ -41,11 +41,11 @@ if( BUILD_CUDA ) SET_TESTS_PROPERTIES ( core/cuda/tnl-reduction-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-device-check-test${mpiExt}${debugExt} ) ADD_TEST( core/arrays/tnlArrayOperationsTest-cuda${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlArrayOperationsTest-cuda${mpiExt}${debugExt} ) - SET_TESTS_PROPERTIES ( core/cuda/tnlArrayOperationsTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-reduction-test${mpiExt}${debugExt} ) + SET_TESTS_PROPERTIES ( core/arrays/tnlArrayOperationsTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-reduction-test${mpiExt}${debugExt} ) ADD_TEST( core/vectors/tnlCudaVectorOperationsTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlCudaVectorOperationsTest${mpiExt}${debugExt} ) SET_TESTS_PROPERTIES ( core/vectors/tnlCudaVectorOperationsTest${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnlArrayOperationsTest-cuda${mpiExt}${debugExt} ) endif() ADD_TEST( tnl-unit-tests${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnl-unit-tests${mpiExt}${debugExt} ) - \ No newline at end of file + diff --git a/tests/unit-tests/core/arrays/tnlArrayOperationsTester.h b/tests/unit-tests/core/arrays/tnlArrayOperationsTester.h index 737bbbbcfd..beb2d56194 100644 --- a/tests/unit-tests/core/arrays/tnlArrayOperationsTester.h +++ b/tests/unit-tests/core/arrays/tnlArrayOperationsTester.h @@ -28,6 +28,7 @@ #include <cppunit/Message.h> #include <core/arrays/tnlArrayOperations.h> +#include <core/cuda/device-check.h> template< typename Element, typename Device > class tnlArrayOperationsTester{}; @@ -81,10 +82,10 @@ class tnlArrayOperationsTester< Element, tnlHost > : public CppUnit :: TestCase void allocationTest() { Element* data; - tnlArrayOperations< tnlHost > :: allocateMemory( data, getTestSize() ); + tnlArrayOperations< tnlHost >::allocateMemory( data, getTestSize() ); CPPUNIT_ASSERT( data != 0 ); - freeMemoryCuda( data ); + tnlArrayOperations< tnlHost >::freeMemory( data ); }; void memorySetTest() @@ -220,10 +221,10 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase void allocationTest() { int* data; - allocateMemoryCuda( data, getTestSize() ); + tnlArrayOperations< tnlCuda >::allocateMemory( data, getTestSize() ); CPPUNIT_ASSERT( checkCudaDevice ); - freeMemoryCuda( data ); + tnlArrayOperations< tnlCuda >::freeMemory( data ); CPPUNIT_ASSERT( checkCudaDevice ); }; @@ -231,37 +232,37 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase { const int size = 1024; int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 0, size ); - setMemoryCuda( deviceData, 13, size, maxCudaGridSize ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData, 0, size ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, 13, size ); CPPUNIT_ASSERT( checkCudaDevice ); - copyMemoryCudaToHost( hostData, deviceData, size ); + tnlArrayOperations< tnlCuda >::copyMemory< int, tnlHost, int, int >( hostData, deviceData, size ); CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlCuda >::freeMemory( hostData ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); }; void bigMemorySetTest() { const int size( getTestSize() ); int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 0, size ); - setMemoryCuda( deviceData, 13, size, maxCudaGridSize ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData, 0, size ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, 13, size ); CPPUNIT_ASSERT( checkCudaDevice ); - copyMemoryCudaToHost( hostData, deviceData, size ); + tnlArrayOperations< tnlCuda >::copyMemory< int, tnlHost, int, int >( hostData, deviceData, size ); CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i += 100 ) { if( hostData[ i ] != 13 ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); } - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); }; void copyMemoryTest() @@ -269,16 +270,16 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase const int size = getTestSize(); int *hostData1, *hostData2, *deviceData; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); - CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData1, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData2, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData1, 13, size ); + tnlArrayOperations< tnlHost >::copyMemory< int, tnlCuda, int, int >( deviceData, hostData1, size ); + tnlArrayOperations< tnlCuda >::copyMemory< int, tnlHost, int, int >( hostData2, deviceData, size ); + CPPUNIT_ASSERT( ( tnlArrayOperations< tnlHost >::compareMemory< int, tnlHost, int, int >( hostData1, hostData2, size) ) ); + tnlArrayOperations< tnlHost >::freeMemory( hostData1 ); + tnlArrayOperations< tnlHost >::freeMemory( hostData2 ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); }; void copyMemoryWithConversionHostToCudaTest() @@ -286,17 +287,17 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase const int size = getTestSize(); int *hostData1; float *hostData2, *deviceData; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData1, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData2, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData1, 13, size ); + tnlArrayOperations< tnlHost >::copyMemory< float, tnlCuda, int, int >( deviceData, hostData1, size ); + tnlArrayOperations< tnlCuda >::copyMemory< float, tnlHost, float, int >( hostData2, deviceData, size ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData1 ); + tnlArrayOperations< tnlHost >::freeMemory( hostData2 ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); }; void copyMemoryWithConversionCudaToHostTest() @@ -304,17 +305,17 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase const int size = getTestSize(); int *hostData1, *deviceData; float *hostData2; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData1, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData2, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData1, 13, size ); + tnlArrayOperations< tnlHost >::copyMemory< int, tnlCuda, int, int >( deviceData, hostData1, size ); + tnlArrayOperations< tnlCuda >::copyMemory< float, tnlHost, int, int >( hostData2, deviceData, size ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData1 ); + tnlArrayOperations< tnlHost >::freeMemory( hostData2 ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); }; void copyMemoryWithConversionCudaToCudaTest() @@ -322,33 +323,33 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase const int size = getTestSize(); int *hostData1, *deviceData1; float *hostData2, *deviceData2; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData1, size ); - allocateMemoryCuda( deviceData2, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData1, hostData1, size ); - copyMemoryCudaToCuda( deviceData2, deviceData1, size, maxCudaGridSize ); - copyMemoryCudaToHost( hostData2, deviceData2, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData1, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData2, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData1, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData2, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData1, 13, size ); + tnlArrayOperations< tnlHost >::copyMemory< int, tnlCuda, int, int >( deviceData1, hostData1, size ); + tnlArrayOperations< tnlCuda >::copyMemory< float, tnlCuda, int, int >( deviceData2, deviceData1, size ); + tnlArrayOperations< tnlCuda >::copyMemory< float, tnlHost, float, int >( hostData2, deviceData2, size ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData1 ); - freeMemoryCuda( deviceData2 ); + tnlArrayOperations< tnlHost >::freeMemory( hostData1 ); + tnlArrayOperations< tnlHost >::freeMemory( hostData2 ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData1 ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData2 ); }; void compareMemoryHostCudaTest() { const int size = getTestSize(); int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 7, size ); - setMemoryCuda( deviceData, 8, size, maxCudaGridSize ); - CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); - setMemoryCuda( deviceData, 7, size, maxCudaGridSize ); - CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData, 7, size ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, 8, size ); + CPPUNIT_ASSERT( ( ! tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, int, int >( hostData, deviceData, size ) ) ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, 7, size ); + CPPUNIT_ASSERT( ( tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, int, int >( hostData, deviceData, size ) ) ); }; void compareMemoryWithConversionHostCudaTest() @@ -356,13 +357,13 @@ class tnlArrayOperationsTester< Element, tnlCuda > : public CppUnit :: TestCase const int size = getTestSize(); int *hostData; float *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 7, size ); - setMemoryCuda( deviceData, ( float ) 8.0, size, maxCudaGridSize ); - CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); - setMemoryCuda( deviceData, ( float ) 7.0, size, maxCudaGridSize ); - CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); + tnlArrayOperations< tnlHost >::setMemory( hostData, 7, size ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, ( float ) 8.0, size ); + CPPUNIT_ASSERT( ( ! tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, float, int >( hostData, deviceData, size ) ) ); + tnlArrayOperations< tnlCuda >::setMemory( deviceData, ( float ) 7.0, size ); + CPPUNIT_ASSERT( ( tnlArrayOperations< tnlCuda >::compareMemory< int, tnlHost, float, int >( hostData, deviceData, size ) ) ); }; }; diff --git a/tests/unit-tests/core/cuda/CMakeLists.txt b/tests/unit-tests/core/cuda/CMakeLists.txt index 3355318a36..36154e7580 100755 --- a/tests/unit-tests/core/cuda/CMakeLists.txt +++ b/tests/unit-tests/core/cuda/CMakeLists.txt @@ -1,5 +1,4 @@ set( headers tnlCudaDeviceCheckTester.h - tnlCudaMemoryOperationsTester.h tnlCudaReductionTester.h ) if( BUILD_CUDA ) @@ -7,10 +6,6 @@ if( BUILD_CUDA ) TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) - CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu ) - TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} - tnl${mpiExt}${debugExt}-0.1 ) - CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu ) TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) @@ -19,4 +14,4 @@ if( BUILD_CUDA ) endif() - \ No newline at end of file + diff --git a/tests/unit-tests/core/cuda/memory-operations-test.cu b/tests/unit-tests/core/cuda/memory-operations-test.cu deleted file mode 100644 index 8fe5f88f20..0000000000 --- a/tests/unit-tests/core/cuda/memory-operations-test.cu +++ /dev/null @@ -1,26 +0,0 @@ -/*************************************************************************** - memory-operations-test.cu - description - ------------------- - begin : Mar 20, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnlCudaMemoryOperationsTester.h" -#include "../../tnlUnitTestStarter.h" - -int main( int argc, char* argv[] ) -{ - if( ! tnlUnitTestStarter :: run< tnlCudaMemoryOperationsTester >() ) - return EXIT_FAILURE; - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h deleted file mode 100644 index f9e98d72d9..0000000000 --- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +++ /dev/null @@ -1,248 +0,0 @@ -/*************************************************************************** - tnlCudaMemoryOperationsTester.h - description - ------------------- - begin : Mar 20, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLCUDAMEMORYOPERATIONSTESTER_H_ -#define TNLCUDAMEMORYOPERATIONSTESTER_H_ - -#include <tnlConfig.h> - -#ifdef HAVE_CPPUNIT -#include <cppunit/TestSuite.h> -#include <cppunit/TestResult.h> -#include <cppunit/TestCaller.h> -#include <cppunit/TestCase.h> -#include <cppunit/Message.h> -#include <core/cuda/device-check.h> -#include <implementation/core/memory-operations.h> - -class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase -{ - public: - tnlCudaMemoryOperationsTester(){}; - - virtual - ~tnlCudaMemoryOperationsTester(){}; - - static CppUnit :: Test* suite() - { - CppUnit :: TestSuite* suiteOfTests = new CppUnit :: TestSuite( "tnlCudaMemoryOperationsTester" ); - CppUnit :: TestResult result; - - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "allocationTest", - &tnlCudaMemoryOperationsTester :: allocationTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "smallMemorySetTest", - &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "bigMemorySetTest", - &tnlCudaMemoryOperationsTester :: bigMemorySetTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "copyMemoryTest", - &tnlCudaMemoryOperationsTester :: copyMemoryTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "copyMemoryWithConversionHostToCudaTest", - &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionHostToCudaTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "copyMemoryWithConversionCudaToHostTest", - &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToHostTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "copyMemoryWithConversionCudaToCudaTest", - &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToCudaTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "compareMemoryHostCudaTest", - &tnlCudaMemoryOperationsTester :: compareMemoryHostCudaTest ) - ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "compareMemoryWithConevrsionHostCudaTest", - &tnlCudaMemoryOperationsTester :: compareMemoryWithConversionHostCudaTest ) - ); - - return suiteOfTests; - }; - - int getTestSize() - { - const int cudaGridSize = 256; - return 1.5 * cudaGridSize * maxCudaBlockSize; - //return 1 << 22; - }; - - void allocationTest() - { - int* data; - allocateMemoryCuda( data, getTestSize() ); - CPPUNIT_ASSERT( checkCudaDevice ); - - freeMemoryCuda( data ); - CPPUNIT_ASSERT( checkCudaDevice ); - }; - - void smallMemorySetTest() - { - const int size = 1024; - int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 0, size ); - setMemoryCuda( deviceData, 13, size, maxCudaGridSize ); - CPPUNIT_ASSERT( checkCudaDevice ); - copyMemoryCudaToHost( hostData, deviceData, size ); - CPPUNIT_ASSERT( checkCudaDevice ); - for( int i = 0; i < size; i ++ ) - CPPUNIT_ASSERT( hostData[ i ] == 13 ); - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); - }; - - void bigMemorySetTest() - { - const int size( getTestSize() ); - int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 0, size ); - setMemoryCuda( deviceData, 13, size, maxCudaGridSize ); - CPPUNIT_ASSERT( checkCudaDevice ); - copyMemoryCudaToHost( hostData, deviceData, size ); - CPPUNIT_ASSERT( checkCudaDevice ); - for( int i = 0; i < size; i += 100 ) - { - if( hostData[ i ] != 13 ) - CPPUNIT_ASSERT( hostData[ i ] == 13 ); - } - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); - }; - - void copyMemoryTest() - { - const int size = getTestSize(); - - int *hostData1, *hostData2, *deviceData; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); - CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); - }; - - void copyMemoryWithConversionHostToCudaTest() - { - const int size = getTestSize(); - int *hostData1; - float *hostData2, *deviceData; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); - for( int i = 0; i < size; i ++ ) - CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); - }; - - void copyMemoryWithConversionCudaToHostTest() - { - const int size = getTestSize(); - int *hostData1, *deviceData; - float *hostData2; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); - for( int i = 0; i < size; i ++ ) - CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); - }; - - void copyMemoryWithConversionCudaToCudaTest() - { - const int size = getTestSize(); - int *hostData1, *deviceData1; - float *hostData2, *deviceData2; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData1, size ); - allocateMemoryCuda( deviceData2, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData1, hostData1, size ); - copyMemoryCudaToCuda( deviceData2, deviceData1, size, maxCudaGridSize ); - copyMemoryCudaToHost( hostData2, deviceData2, size ); - for( int i = 0; i < size; i ++ ) - CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData1 ); - freeMemoryCuda( deviceData2 ); - }; - - void compareMemoryHostCudaTest() - { - const int size = getTestSize(); - int *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 7, size ); - setMemoryCuda( deviceData, 8, size, maxCudaGridSize ); - CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); - setMemoryCuda( deviceData, 7, size, maxCudaGridSize ); - CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); - }; - - void compareMemoryWithConversionHostCudaTest() - { - const int size = getTestSize(); - int *hostData; - float *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData, 7, size ); - setMemoryCuda( deviceData, ( float ) 8.0, size, maxCudaGridSize ); - CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); - setMemoryCuda( deviceData, ( float ) 7.0, size, maxCudaGridSize ); - CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); - }; - - -}; - -#else -class tnlCudaMemoryOperationsTester -{}; -#endif /* HAVE_CPPUNIT */ - -#endif /* TNLCUDAMEMORYOPERATIONSTESTER_H_ */ diff --git a/tests/unit-tests/core/cuda/tnlCudaReductionTester.h b/tests/unit-tests/core/cuda/tnlCudaReductionTester.h index 9324d9dedf..9ac39964ad 100644 --- a/tests/unit-tests/core/cuda/tnlCudaReductionTester.h +++ b/tests/unit-tests/core/cuda/tnlCudaReductionTester.h @@ -99,7 +99,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase { for( int i = 0; i < size; i ++ ) hostData[ i ] = value; - copyMemoryHostToCuda( deviceData, hostData, size ); + tnlArrayOperations< tnlHost >::copyMemory< RealType, tnlCuda, RealType, int >( deviceData, hostData, size ); CPPUNIT_ASSERT( checkCudaDevice ); } @@ -108,8 +108,8 @@ class tnlCudaReductionTester : public CppUnit :: TestCase { const int shortSequence( 128 ); RealType *hostData, *deviceData; - allocateMemoryHost( hostData, shortSequence ); - allocateMemoryCuda( deviceData, shortSequence ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, shortSequence ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, shortSequence ); CPPUNIT_ASSERT( checkCudaDevice ); RealType result; @@ -152,18 +152,18 @@ class tnlCudaReductionTester : public CppUnit :: TestCase CPPUNIT_ASSERT( result == shortSequence ); - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); CPPUNIT_ASSERT( checkCudaDevice ); - }; + } template< typename RealType > void longConstantSequenceTest() { const int longSequence( 172892 ); RealType *hostData, *deviceData; - allocateMemoryHost( hostData, longSequence ); - allocateMemoryCuda( deviceData, longSequence ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, longSequence ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, longSequence ); CPPUNIT_ASSERT( checkCudaDevice ); RealType result; @@ -240,18 +240,18 @@ class tnlCudaReductionTester : public CppUnit :: TestCase CPPUNIT_ASSERT( result == 8 * longSequence ); - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData ); + tnlArrayOperations< tnlHost >::freeMemory( deviceData ); CPPUNIT_ASSERT( checkCudaDevice ); - }; + } template< typename RealType > void linearSequenceTest() { const int size( 10245 ); RealType *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); CPPUNIT_ASSERT( checkCudaDevice ); RealType sum( 0.0 ); @@ -260,7 +260,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase hostData[ i ] = -i - 1; sum += hostData[ i ]; } - copyMemoryHostToCuda( deviceData, hostData, size ); + tnlArrayOperations< tnlHost >::copyMemory< RealType, tnlCuda, RealType, int >( deviceData, hostData, size ); CPPUNIT_ASSERT( checkCudaDevice ); tnlParallelReductionSum< RealType, int > sumOperation; RealType result; @@ -292,18 +292,18 @@ class tnlCudaReductionTester : public CppUnit :: TestCase ( reductionOnCudaDevice( absMaxOperation, size, deviceData, ( RealType* ) 0, result ) ) ); CPPUNIT_ASSERT( result == size ); - freeMemoryHost( hostData ); - freeMemoryCuda( deviceData ); + tnlArrayOperations< tnlHost >::freeMemory( hostData ); + tnlArrayOperations< tnlCuda >::freeMemory( deviceData ); CPPUNIT_ASSERT( checkCudaDevice ); - }; + } template< typename Type > void shortLogicalOperationsTest() { int size( 125 ); Type *hostData, *deviceData; - allocateMemoryHost( hostData, size ); - allocateMemoryCuda( deviceData, size ); + tnlArrayOperations< tnlHost >::allocateMemory( hostData, size ); + tnlArrayOperations< tnlCuda >::allocateMemory( deviceData, size ); CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) @@ -503,7 +503,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase CPPUNIT_ASSERT( ( reductionOnCudaDevice( inequalityOperation, size, deviceData1, deviceData2, result ) ) ); CPPUNIT_ASSERT( result == true ); - }; + } template< typename Type > void shortSdotTest() @@ -536,7 +536,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase CPPUNIT_ASSERT( ( reductionOnCudaDevice( sdotOperation, size, deviceData1, deviceData2, result ) ) ); CPPUNIT_ASSERT( result == sdot ); - }; + } template< typename Type > @@ -570,7 +570,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase CPPUNIT_ASSERT( ( reductionOnCudaDevice( sdotOperation, size, deviceData1, deviceData2, result ) ) ); CPPUNIT_ASSERT( result == sdot ); - }; + } template< typename Type > void shortDiffTest() @@ -686,7 +686,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase freeMemoryCuda( deviceZeros ); freeMemoryCuda( deviceOnes ); freeMemoryCuda( deviceLinear ); - }; + } template< typename Type > @@ -804,7 +804,7 @@ class tnlCudaReductionTester : public CppUnit :: TestCase freeMemoryCuda( deviceZeros ); freeMemoryCuda( deviceOnes ); freeMemoryCuda( deviceLinear ); - }; + } }; diff --git a/tests/unit-tests/core/vectors/tnlVectorOperationsTester.h b/tests/unit-tests/core/vectors/tnlVectorOperationsTester.h index 667257884e..4593fba5b7 100644 --- a/tests/unit-tests/core/vectors/tnlVectorOperationsTester.h +++ b/tests/unit-tests/core/vectors/tnlVectorOperationsTester.h @@ -100,7 +100,12 @@ class tnlVectorOperationsTester : public CppUnit :: TestCase for( int i = 0; i < a. getSize(); i ++ ) a. getData()[ i ] = i; - copyMemoryHostToCuda( deviceVector. getData(), + tnlArrayOperations< tnlHost >:: + copyMemory< typename Vector::RealType, + tnlCuda, + typename Vector::RealType, + typename Vector::IndexType > + ( deviceVector. getData(), a. getData(), a. getSize() ); CPPUNIT_ASSERT( checkCudaDevice ); @@ -115,7 +120,12 @@ class tnlVectorOperationsTester : public CppUnit :: TestCase for( int i = 0; i < a. getSize(); i ++ ) a. getData()[ i ] = 1; - copyMemoryHostToCuda( deviceVector. getData(), + tnlArrayOperations< tnlHost >:: + copyMemory< typename Vector::RealType, + tnlCuda, + typename Vector::RealType, + typename Vector::IndexType > + ( deviceVector. getData(), a. getData(), a. getSize() ); CPPUNIT_ASSERT( checkCudaDevice ); @@ -130,7 +140,12 @@ class tnlVectorOperationsTester : public CppUnit :: TestCase for( int i = 0; i < a. getSize(); i ++ ) a. getData()[ i ] = -i; - copyMemoryHostToCuda( deviceVector. getData(), + tnlArrayOperations< tnlHost >:: + copyMemory< typename Vector::RealType, + tnlCuda, + typename Vector::RealType, + typename Vector::IndexType > + ( deviceVector. getData(), a. getData(), a. getSize() ); CPPUNIT_ASSERT( checkCudaDevice ); -- GitLab