Commit b682fcfe authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Debugging memory set value CUDA kernel.

parent 695575ce
Loading
Loading
Loading
Loading
+17 −6
Original line number Diff line number Diff line
@@ -70,11 +70,21 @@ bool setMemoryHost( Element* data,
}

#ifdef HAVE_CUDA
template< typename Element >
template< typename Element, typename Index >
__global__ void setVectorValueCudaKernel( Element* data,
                                          const Element value )
                                          const Index size,
                                          const Element value,
                                          const Index elementsPerThread )
{
   Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x;
   Index elementsProcessed( 0 );
   while( elementsProcessed < elementsPerThread &&
          elementIdx < size )
   {
   data[ blockIdx. x * blockDim. x + threadIdx. x ] = value;
      data[ elementIdx ] = value;
      elementIdx += blockDim. x;
      elementsProcessed ++;
   }
}
#endif

@@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data,
{
#ifdef HAVE_CUDA
      dim3 blockSize, gridSize;
      blockSize. x = 512;
      blockSize. x = 32;
      int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
      int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize );

      setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread );
      gridSize. x = Min( blocksNumber, maxCudaGridSize );
      cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl;
      setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread );

      return checkCudaDevice;
#else
+27 −3
Original line number Diff line number Diff line
@@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                "allocationTest",
                                &tnlCudaMemoryOperationsTester :: allocationTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "copyTest",
                                &tnlCudaMemoryOperationsTester :: allocationTest )
                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "smallMemorySetTest",
                                &tnlCudaMemoryOperationsTester :: smallMemorySetTest )
@@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      CPPUNIT_ASSERT( checkCudaDevice );
   };

   void copyTest()
   {
      const int size( 1 << 20 );
      int *hostData1, *hostData2, *deviceData;
      allocateMemoryHost( hostData1, size );
      allocateMemoryHost( hostData2, size );
      allocateMemoryCuda( deviceData, size );
      setMemoryHost( hostData1, 13, size );
      copyMemoryHostToCuda( deviceData, hostData1, size );
      copyMemoryCudaToHost( hostData2, deviceData, size );
      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
      freeMemoryHost( hostData1 );
      freeMemoryHost( hostData2 );
      freeMemoryCuda( deviceData );
   };

   void smallMemorySetTest()
   {
      const int size( 100 );
      const int size( 1024 );
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
      allocateMemoryCuda( deviceData, size );
@@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      CPPUNIT_ASSERT( checkCudaDevice );
      for( int i = 0; i < size; i ++ )
         CPPUNIT_ASSERT( hostData[ i ] == 13 );
      freeMemoryHost( hostData );
      freeMemoryCuda( deviceData );
   };

   void bigMemorySetTest()
   {
      const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize );
      const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize );
      cout << "Size = " << size << endl;
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
@@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      for( int i = 0; i < size; i ++ )
      {
         if( hostData[ i ] != 13 )
            cout << " i = " << i << endl;
            cout << " i = " << i << " " << hostData[ i ] << endl;
         CPPUNIT_ASSERT( hostData[ i ] == 13 );
      }
      freeMemoryHost( hostData );
      freeMemoryCuda( deviceData );
   };

};