Commit d0b3ee26 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing CUDA memory operations and unit tests.

parent 91ab8e1a
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -343,10 +343,8 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
            break;
         case   1:
            tnlAssert( false, cerr << "blockSize should not be 1." << endl );
            break;
         default:
            tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
            break;
      }
   return gridSize. x;
}
+69 −22
Original line number Diff line number Diff line
@@ -108,13 +108,11 @@ bool setMemoryCuda( Element* data,
   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
   gridSize. x = Min( blocksNumber, maxGridSize );
   setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );

   return checkCudaDevice;
#else
      cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
      return false;
#endif

}

template< typename DestinationElement, typename SourceElement, typename Index >
@@ -166,7 +164,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
                           const SourceElement* source,
                           const Index size )
{
#ifdef HAVE_CUDA
   DestinationElement* buffer = new DestinationElement[ tnlGPUvsCPUTransferBufferSize ];
   if( ! buffer )
   {
@@ -179,8 +176,8 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
      Index j( 0 );
      while( j < tnlGPUvsCPUTransferBufferSize && i + j < size )
         buffer[ j ] = source[ i + j++ ];
      if( ! copyMemoryHostTuCuda( buffer,
                                  &destination[ i ],
      if( ! copyMemoryHostToCuda( &destination[ i ],
                                  buffer,
                                  j ) )
      {
         delete[] buffer;
@@ -190,10 +187,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
   }
   delete[] buffer;
   return true;
#else
   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
   return false;
#endif
}

template< typename Element, typename Index >
@@ -234,8 +227,8 @@ bool copyMemoryCudaToHost( DestinationElement* destination,
   Index i( 0 );
   while( i < size )
   {
      if( ! copyMemoryCudaToHost( &source[ i ],
                                  buffer,
      if( ! copyMemoryCudaToHost( buffer,
                                  &source[ i ],
                                  Min( size - i, tnlGPUvsCPUTransferBufferSize ) ) )
      {
         delete[] buffer;
@@ -268,10 +261,63 @@ bool copyMemoryCudaToCuda( Element* destination,
#endif
}

template< typename Element, typename Index >
#ifdef HAVE_CUDA
template< typename DestinationElement,
          typename SourceElement,
          typename Index >
__global__ void copyMemoryCudaToCudaKernel( DestinationElement* destination,
                                            const SourceElement* source,
                                            const Index size )
{
   Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
   const Index maxGridSize = blockDim. x * gridDim. x;
   while( elementIdx < size )
   {
      destination[ elementIdx ] = source[ elementIdx ];
      elementIdx += maxGridSize;
   }
}
#endif


template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool copyMemoryCudaToCuda( DestinationElement* destination,
                           const SourceElement* source,
                           const Index size,
                           const Index maxGridSize )
{
#ifdef HAVE_CUDA
   dim3 blockSize( 0 ), gridSize( 0 );
   blockSize. x = 256;
   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
   gridSize. x = Min( blocksNumber, maxGridSize );
   copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
   return checkCudaDevice;
#else
      cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
      return false;
#endif
}

template< typename Element,
          typename Index >
bool compareMemoryHost( const Element* data1,
                        const Element* data2,
                        const Index size )
{
   if( memcmp( data1, data2, size * sizeof( Element ) ) != 0 )
      return false;
   return true;
}

template< typename Element1,
          typename Element2,
          typename Index >
bool compareMemoryHost( const Element1* data1,
                        const Element2* data2,
                        const Index size )
{
   for( Index i = 0; i < size; i ++ )
      if( data1[ i ] != data2[ i ] )
@@ -279,15 +325,15 @@ bool compareMemoryHost( const Element* data1,
   return true;
}

template< typename Element, typename Index >
bool compareMemoryHostCuda( const Element* hostData,
                               const Element* deviceData,
template< typename Element1,
          typename Element2,
          typename Index >
bool compareMemoryHostCuda( const Element1* hostData,
                            const Element2* deviceData,
                            const Index size )
{
#ifdef HAVE_CUDA
   Index host_buffer_size = :: Min( ( Index ) ( tnlGPUvsCPUTransferBufferSize / sizeof( Element ) ),
                                                size );
   Element* host_buffer = new Element[ host_buffer_size ];
   Element2* host_buffer = new Element2[ tnlGPUvsCPUTransferBufferSize ];
   if( ! host_buffer )
   {
      cerr << "I am sorry but I cannot allocate supporting buffer on the host for comparing data between CUDA GPU and CPU." << endl;
@@ -296,10 +342,10 @@ bool compareMemoryHostCuda( const Element* hostData,
   Index compared( 0 );
   while( compared < size )
   {
      Index transfer = Min( size - compared, host_buffer_size );
      Index transfer = Min( size - compared, tnlGPUvsCPUTransferBufferSize );
      if( cudaMemcpy( ( void* ) host_buffer,
                      ( void* ) & ( deviceData[ compared ] ),
                      transfer * sizeof( Element ),
                      transfer * sizeof( Element2 ),
                      cudaMemcpyDeviceToHost ) != cudaSuccess )
      {
         cerr << "Transfer of data from the device failed." << endl;
@@ -328,7 +374,8 @@ bool compareMemoryHostCuda( const Element* hostData,
#endif
}

template< typename Element, typename Index >
template< typename Element,
          typename Index >
bool compareMemoryCuda( const Element* deviceData1,
                        const Element* deviceData2,
                        const Index size )
+0 −12
Original line number Diff line number Diff line
@@ -19,17 +19,5 @@ TARGET_LINK_LIBRARIES( tnlArrayTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                           tnl${mpiExt}${debugExt}-0.1 )
             
if( BUILD_CUDA )
#    CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu )
#    TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
#                                                              tnl${mpiExt}${debugExt}-0.1 )
#
#    CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu )
#    TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
#                                                              tnl${mpiExt}${debugExt}-0.1 )
#    
#    CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu )
#    TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
#                                                              tnl${mpiExt}${debugExt}-0.1 )                                                                
#                                                                                                                                                                                                                                                            
endif()
                                                                       
 No newline at end of file
+125 −22
Original line number Diff line number Diff line
@@ -46,10 +46,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                "allocationTest",
                                &tnlCudaMemoryOperationsTester :: allocationTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyTest",
                                &tnlCudaMemoryOperationsTester :: allocationTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "smallMemorySetTest",
                                &tnlCudaMemoryOperationsTester :: smallMemorySetTest )
@@ -58,6 +54,30 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                "bigMemorySetTest",
                                &tnlCudaMemoryOperationsTester :: bigMemorySetTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyMemoryTest",
                                &tnlCudaMemoryOperationsTester :: copyMemoryTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyMemoryWithConversionHostToCudaTest",
                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionHostToCudaTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyMemoryWithConversionCudaToHostTest",
                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToHostTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyMemoryWithConversionCudaToCudaTest",
                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToCudaTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "compareMemoryHostCudaTest",
                                &tnlCudaMemoryOperationsTester :: compareMemoryHostCudaTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "compareMemoryWithConevrsionHostCudaTest",
                                &tnlCudaMemoryOperationsTester :: compareMemoryWithConversionHostCudaTest )
                               );

      return suiteOfTests;
   };
@@ -72,22 +92,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      CPPUNIT_ASSERT( checkCudaDevice );
   };

   void copyTest()
   {
      const int size( 1 << 22 );
      int *hostData1, *hostData2, *deviceData;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData, hostData1, size );
      copyMemoryCudaToHost( hostData2, deviceData, size );
      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData );
   };

   void smallMemorySetTest()
   {
      const int size( 1024 );
@@ -108,7 +112,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
   void bigMemorySetTest()
   {
      const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize );
      cout << "Size = " << size << endl;
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
      allocateMemoryCuda( deviceData, size );
@@ -120,13 +123,113 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      for( int i = 0; i < size; i += 100 )
      {
         if( hostData[ i ] != 13 )
            cout << " i = " << i << " " << hostData[ i ] << endl;
         CPPUNIT_ASSERT( hostData[ i ] == 13 );
      }
      freeMemoryHost( hostData );
      freeMemoryCuda( deviceData );
   };

   void copyMemoryTest()
   {
      const int size( 1 << 22 );
      int *hostData1, *hostData2, *deviceData;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData, hostData1, size );
      copyMemoryCudaToHost( hostData2, deviceData, size );
      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData );
   };

   void copyMemoryWithConversionHostToCudaTest()
   {
      const int size( 1 << 22 );
      int *hostData1;
      float *hostData2, *deviceData;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData, hostData1, size );
      copyMemoryCudaToHost( hostData2, deviceData, size );
      for( int i = 0; i < size; i ++ )
         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData );
   };

   void copyMemoryWithConversionCudaToHostTest()
   {
      const int size( 1 << 22 );
      int *hostData1, *deviceData;
      float *hostData2;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData, hostData1, size );
      copyMemoryCudaToHost( hostData2, deviceData, size );
      for( int i = 0; i < size; i ++ )
         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData );
   };

   void copyMemoryWithConversionCudaToCudaTest()
   {
      const int size( 1 << 22 );
      int *hostData1, *deviceData1;
      float *hostData2, *deviceData2;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData1, size );
      allocateMemoryCuda( deviceData2, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData1, hostData1, size );
      copyMemoryCudaToCuda( deviceData2, deviceData1, size, maxCudaGridSize );
      copyMemoryCudaToHost( hostData2, deviceData2, size );
      for( int i = 0; i < size; i ++ )
         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData1 );
      freeMemoryCuda( deviceData2 );
   };

   void compareMemoryHostCudaTest()
   {
      const int size( 1 << 22 );
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData, 7, size );
      setMemoryCuda( deviceData, 8, size, maxCudaGridSize );
      CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) );
      setMemoryCuda( deviceData, 7, size, maxCudaGridSize );
      CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) );
   };

   void compareMemoryWithConversionHostCudaTest()
   {
      const int size( 1 << 22 );
      int *hostData;
      float *deviceData;
      allocateMemoryHost( hostData, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData, 7, size );
      setMemoryCuda( deviceData, ( float ) 8.0, size, maxCudaGridSize );
      CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) );
      setMemoryCuda( deviceData, ( float ) 7.0, size, maxCudaGridSize );
      CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) );
   };


};

#else
+5 −2
Original line number Diff line number Diff line
@@ -15,14 +15,17 @@
 *                                                                         *
 ***************************************************************************/

#include <tnlConfig.h>
#include <core/tnlHost.h>
#include <cstdlib>

#include "tnlArrayTester.h"
#include "../tnlUnitTestStarter.h"

int main( int argc, char* argv[] )
{
#ifdef HAVE_CPPUNIT
/*   if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() ||
   if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() ||
       ! tnlUnitTestStarter :: run< tnlArrayTester< int, tnlHost, int > >() ||
       ! tnlUnitTestStarter :: run< tnlArrayTester< long int, tnlHost, int > >() ||
       ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, int > >() ||
@@ -34,7 +37,7 @@ int main( int argc, char* argv[] )
       ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, long int > >() ||
       ! tnlUnitTestStarter :: run< tnlArrayTester< double, tnlHost, long int > >() ||
       ! tnlUnitTestStarter :: run< tnlArrayTester< long double, tnlHost, long int > >() )
     return EXIT_FAILURE;*/
     return EXIT_FAILURE;
   return EXIT_SUCCESS;
#else
   return EXIT_FAILURE;
Loading