diff --git a/src/implementation/core/cuda/cuda-reduction_impl.h b/src/implementation/core/cuda/cuda-reduction_impl.h index 741b613bf139dbaafea0de4ab07537fa4bccadc3..6355790aab7ca262e7f328f2e16c87f6fdefeb68 100644 --- a/src/implementation/core/cuda/cuda-reduction_impl.h +++ b/src/implementation/core/cuda/cuda-reduction_impl.h @@ -343,10 +343,8 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation, break; case 1: tnlAssert( false, cerr << "blockSize should not be 1." << endl ); - break; default: tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); - break; } return gridSize. x; } diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h index 3dfd2ee83de9d03561de7bd2b5f079c95d8cbfed..aa8b6d15c3d060e036c0d84d89724a6948682589 100644 --- a/src/implementation/core/memory-operations.h +++ b/src/implementation/core/memory-operations.h @@ -108,13 +108,11 @@ bool setMemoryCuda( Element* data, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = Min( blocksNumber, maxGridSize ); setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); - return checkCudaDevice; #else cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; return false; #endif - } template< typename DestinationElement, typename SourceElement, typename Index > @@ -166,7 +164,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination, const SourceElement* source, const Index size ) { -#ifdef HAVE_CUDA DestinationElement* buffer = new DestinationElement[ tnlGPUvsCPUTransferBufferSize ]; if( ! buffer ) { @@ -179,8 +176,8 @@ bool copyMemoryHostToCuda( DestinationElement* destination, Index j( 0 ); while( j < tnlGPUvsCPUTransferBufferSize && i + j < size ) buffer[ j ] = source[ i + j++ ]; - if( ! copyMemoryHostTuCuda( buffer, - &destination[ i ], + if( ! copyMemoryHostToCuda( &destination[ i ], + buffer, j ) ) { delete[] buffer; @@ -190,10 +187,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination, } delete[] buffer; return true; -#else - cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; - return false; -#endif } template< typename Element, typename Index > @@ -234,8 +227,8 @@ bool copyMemoryCudaToHost( DestinationElement* destination, Index i( 0 ); while( i < size ) { - if( ! copyMemoryCudaToHost( &source[ i ], - buffer, + if( ! copyMemoryCudaToHost( buffer, + &source[ i ], Min( size - i, tnlGPUvsCPUTransferBufferSize ) ) ) { delete[] buffer; @@ -268,10 +261,63 @@ bool copyMemoryCudaToCuda( Element* destination, #endif } -template< typename Element, typename Index > +#ifdef HAVE_CUDA +template< typename DestinationElement, + typename SourceElement, + typename Index > +__global__ void copyMemoryCudaToCudaKernel( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x; + const Index maxGridSize = blockDim. x * gridDim. x; + while( elementIdx < size ) + { + destination[ elementIdx ] = source[ elementIdx ]; + elementIdx += maxGridSize; + } +} +#endif + + +template< typename DestinationElement, + typename SourceElement, + typename Index > +bool copyMemoryCudaToCuda( DestinationElement* destination, + const SourceElement* source, + const Index size, + const Index maxGridSize ) +{ +#ifdef HAVE_CUDA + dim3 blockSize( 0 ), gridSize( 0 ); + blockSize. x = 256; + Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); + gridSize. x = Min( blocksNumber, maxGridSize ); + copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); + return checkCudaDevice; +#else + cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; + return false; +#endif +} + +template< typename Element, + typename Index > bool compareMemoryHost( const Element* data1, const Element* data2, const Index size ) +{ + if( memcmp( data1, data2, size * sizeof( Element ) ) != 0 ) + return false; + return true; +} + +template< typename Element1, + typename Element2, + typename Index > +bool compareMemoryHost( const Element1* data1, + const Element2* data2, + const Index size ) { for( Index i = 0; i < size; i ++ ) if( data1[ i ] != data2[ i ] ) @@ -279,15 +325,15 @@ bool compareMemoryHost( const Element* data1, return true; } -template< typename Element, typename Index > -bool compareMemoryHostCuda( const Element* hostData, - const Element* deviceData, - const Index size ) +template< typename Element1, + typename Element2, + typename Index > +bool compareMemoryHostCuda( const Element1* hostData, + const Element2* deviceData, + const Index size ) { #ifdef HAVE_CUDA - Index host_buffer_size = :: Min( ( Index ) ( tnlGPUvsCPUTransferBufferSize / sizeof( Element ) ), - size ); - Element* host_buffer = new Element[ host_buffer_size ]; + Element2* host_buffer = new Element2[ tnlGPUvsCPUTransferBufferSize ]; if( ! host_buffer ) { cerr << "I am sorry but I cannot allocate supporting buffer on the host for comparing data between CUDA GPU and CPU." << endl; @@ -296,10 +342,10 @@ bool compareMemoryHostCuda( const Element* hostData, Index compared( 0 ); while( compared < size ) { - Index transfer = Min( size - compared, host_buffer_size ); + Index transfer = Min( size - compared, tnlGPUvsCPUTransferBufferSize ); if( cudaMemcpy( ( void* ) host_buffer, ( void* ) & ( deviceData[ compared ] ), - transfer * sizeof( Element ), + transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { cerr << "Transfer of data from the device failed." << endl; @@ -328,7 +374,8 @@ bool compareMemoryHostCuda( const Element* hostData, #endif } -template< typename Element, typename Index > +template< typename Element, + typename Index > bool compareMemoryCuda( const Element* deviceData1, const Element* deviceData2, const Index size ) diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt index 0f5e338e650c094b88ae986ffed0482935615e4e..80fa599c77340d33cd597f470714c9a71d10e2d5 100755 --- a/tests/unit-tests/CMakeLists.txt +++ b/tests/unit-tests/CMakeLists.txt @@ -39,7 +39,7 @@ if( BUILD_CUDA ) ADD_TEST( core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnl-memory-operations-test${mpiExt}${debugExt} ) SET_TESTS_PROPERTIES ( core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-device-check-test${mpiExt}${debugExt} ) ADD_TEST( core/cuda/tnl-reduction-test${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnl-reduction-test${mpiExt}${debugExt} ) - SET_TESTS_PROPERTIES ( core/cuda/tnl-reduction-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} ) + SET_TESTS_PROPERTIES ( core/cuda/tnl-reduction-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} ) ADD_TEST( core/cuda/tnlCudaVectorOperationsTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlCudaVectorOperationsTest${mpiExt}${debugExt} ) SET_TESTS_PROPERTIES ( core/cuda/tnlCudaVectorOperationsTest${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-reduction-test${mpiExt}${debugExt} ) diff --git a/tests/unit-tests/core/CMakeLists.txt b/tests/unit-tests/core/CMakeLists.txt index ab23f61b0aaa1c8635748e5d930a8e92758ffded..dbad7047b16293927d2ec5242ce18ee4e372ee57 100755 --- a/tests/unit-tests/core/CMakeLists.txt +++ b/tests/unit-tests/core/CMakeLists.txt @@ -19,17 +19,5 @@ TARGET_LINK_LIBRARIES( tnlArrayTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) if( BUILD_CUDA ) -# CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu ) -# TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} -# tnl${mpiExt}${debugExt}-0.1 ) -# -# CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu ) -# TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} -# tnl${mpiExt}${debugExt}-0.1 ) -# -# CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu ) -# TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} -# tnl${mpiExt}${debugExt}-0.1 ) -# endif() \ No newline at end of file diff --git a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h index 6be0197eb1996c4f3475a65acac13f9539c4ac15..57522c234eecf1488f9b3f9918e328230adee4ad 100644 --- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +++ b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h @@ -46,10 +46,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "allocationTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); - suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( - "copyTest", - &tnlCudaMemoryOperationsTester :: allocationTest ) - ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) @@ -58,6 +54,30 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "bigMemorySetTest", &tnlCudaMemoryOperationsTester :: bigMemorySetTest ) ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyMemoryTest", + &tnlCudaMemoryOperationsTester :: copyMemoryTest ) + ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyMemoryWithConversionHostToCudaTest", + &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionHostToCudaTest ) + ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyMemoryWithConversionCudaToHostTest", + &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToHostTest ) + ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyMemoryWithConversionCudaToCudaTest", + &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToCudaTest ) + ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "compareMemoryHostCudaTest", + &tnlCudaMemoryOperationsTester :: compareMemoryHostCudaTest ) + ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "compareMemoryWithConevrsionHostCudaTest", + &tnlCudaMemoryOperationsTester :: compareMemoryWithConversionHostCudaTest ) + ); return suiteOfTests; }; @@ -72,22 +92,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); }; - void copyTest() - { - const int size( 1 << 22 ); - int *hostData1, *hostData2, *deviceData; - allocateMemoryHost( hostData1, size ); - allocateMemoryHost( hostData2, size ); - allocateMemoryCuda( deviceData, size ); - setMemoryHost( hostData1, 13, size ); - copyMemoryHostToCuda( deviceData, hostData1, size ); - copyMemoryCudaToHost( hostData2, deviceData, size ); - CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); - freeMemoryHost( hostData1 ); - freeMemoryHost( hostData2 ); - freeMemoryCuda( deviceData ); - }; - void smallMemorySetTest() { const int size( 1024 ); @@ -108,7 +112,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase void bigMemorySetTest() { const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize ); - cout << "Size = " << size << endl; int *hostData, *deviceData; allocateMemoryHost( hostData, size ); allocateMemoryCuda( deviceData, size ); @@ -120,13 +123,113 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase for( int i = 0; i < size; i += 100 ) { if( hostData[ i ] != 13 ) - cout << " i = " << i << " " << hostData[ i ] << endl; CPPUNIT_ASSERT( hostData[ i ] == 13 ); } freeMemoryHost( hostData ); freeMemoryCuda( deviceData ); }; + void copyMemoryTest() + { + const int size( 1 << 22 ); + int *hostData1, *hostData2, *deviceData; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData, hostData1, size ); + copyMemoryCudaToHost( hostData2, deviceData, size ); + CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData ); + }; + + void copyMemoryWithConversionHostToCudaTest() + { + const int size( 1 << 22 ); + int *hostData1; + float *hostData2, *deviceData; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData, hostData1, size ); + copyMemoryCudaToHost( hostData2, deviceData, size ); + for( int i = 0; i < size; i ++ ) + CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData ); + }; + + void copyMemoryWithConversionCudaToHostTest() + { + const int size( 1 << 22 ); + int *hostData1, *deviceData; + float *hostData2; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData, hostData1, size ); + copyMemoryCudaToHost( hostData2, deviceData, size ); + for( int i = 0; i < size; i ++ ) + CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData ); + }; + + void copyMemoryWithConversionCudaToCudaTest() + { + const int size( 1 << 22 ); + int *hostData1, *deviceData1; + float *hostData2, *deviceData2; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData1, size ); + allocateMemoryCuda( deviceData2, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData1, hostData1, size ); + copyMemoryCudaToCuda( deviceData2, deviceData1, size, maxCudaGridSize ); + copyMemoryCudaToHost( hostData2, deviceData2, size ); + for( int i = 0; i < size; i ++ ) + CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData1 ); + freeMemoryCuda( deviceData2 ); + }; + + void compareMemoryHostCudaTest() + { + const int size( 1 << 22 ); + int *hostData, *deviceData; + allocateMemoryHost( hostData, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData, 7, size ); + setMemoryCuda( deviceData, 8, size, maxCudaGridSize ); + CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); + setMemoryCuda( deviceData, 7, size, maxCudaGridSize ); + CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); + }; + + void compareMemoryWithConversionHostCudaTest() + { + const int size( 1 << 22 ); + int *hostData; + float *deviceData; + allocateMemoryHost( hostData, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData, 7, size ); + setMemoryCuda( deviceData, ( float ) 8.0, size, maxCudaGridSize ); + CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) ); + setMemoryCuda( deviceData, ( float ) 7.0, size, maxCudaGridSize ); + CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) ); + }; + + }; #else diff --git a/tests/unit-tests/core/tnlArrayTest.cpp b/tests/unit-tests/core/tnlArrayTest.cpp index 4184e8c35650aa9764442ae0afca73eec4a3a345..2902b47838bfc72f471e690291637aebfdbebd8f 100644 --- a/tests/unit-tests/core/tnlArrayTest.cpp +++ b/tests/unit-tests/core/tnlArrayTest.cpp @@ -15,14 +15,17 @@ * * ***************************************************************************/ +#include <tnlConfig.h> +#include <core/tnlHost.h> #include <cstdlib> + #include "tnlArrayTester.h" #include "../tnlUnitTestStarter.h" int main( int argc, char* argv[] ) { #ifdef HAVE_CPPUNIT -/* if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() || + if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() || ! tnlUnitTestStarter :: run< tnlArrayTester< int, tnlHost, int > >() || ! tnlUnitTestStarter :: run< tnlArrayTester< long int, tnlHost, int > >() || ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, int > >() || @@ -34,7 +37,7 @@ int main( int argc, char* argv[] ) ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, long int > >() || ! tnlUnitTestStarter :: run< tnlArrayTester< double, tnlHost, long int > >() || ! tnlUnitTestStarter :: run< tnlArrayTester< long double, tnlHost, long int > >() ) - return EXIT_FAILURE;*/ + return EXIT_FAILURE; return EXIT_SUCCESS; #else return EXIT_FAILURE; diff --git a/tests/unit-tests/core/tnlArrayTester.h b/tests/unit-tests/core/tnlArrayTester.h index 9bb9e8b0177fdf4a466048f88e1db3192056382e..42859c99c0dc96c259e4f36cffcc4b27ccd2c8aa 100644 --- a/tests/unit-tests/core/tnlArrayTester.h +++ b/tests/unit-tests/core/tnlArrayTester.h @@ -213,7 +213,9 @@ class tnlArrayTester : public CppUnit :: TestCase }; }; - +#else /* HAVE_CPPUNIT */ +template< typename ElementType, typename Device, typename IndexType > +class tnlArrayTester{}; #endif /* HAVE_CPPUNIT */ #endif /* TNLARRAYMANAGERTESTER_H_ */