Debugging memory set value CUDA kernel.

b682fcfe · Tomáš Oberhuber · 695575ce · b682fcfe · b682fcfe
Commit b682fcfe authored 12 years ago by Tomáš Oberhuber
--- a/src/implementation/core/memory-operations.h
+++ b/src/implementation/core/memory-operations.h
@@ -70,11 +70,21 @@ bool setMemoryHost( Element* data,
 }
 #ifdef HAVE_CUDA
-template< typename Element >
+template< typename Element, typename Index >
 __global__ void setVectorValueCudaKernel( Element* data,
-                                          const Element value )
+                                          const Index size,
+                                          const Element value,
+                                          const Index elementsPerThread )
 {
-   data[ blockIdx. x * blockDim. x + threadIdx. x ] = value;
+   Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x;
+   Index elementsProcessed( 0 );
+   while( elementsProcessed < elementsPerThread &&
+          elementIdx < size )
+   {
+      data[ elementIdx ] = value;
+      elementIdx += blockDim. x;
+      elementsProcessed ++;
+   }
 }
 #endif
@@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data,
 {
 #ifdef HAVE_CUDA
      dim3 blockSize, gridSize;
-      blockSize. x = 512;
+      blockSize. x = 32;
      int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
      int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize );
+      gridSize. x = Min( blocksNumber, maxCudaGridSize );
-      setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread );
+      cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl;
+      setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread );
      return checkCudaDevice;
 #else

--- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h
+++ b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h
@@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                "allocationTest",
                                &tnlCudaMemoryOperationsTester :: allocationTest )
                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "copyTest",
+                                &tnlCudaMemoryOperationsTester :: allocationTest )
+                               );
      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                "smallMemorySetTest",
                                &tnlCudaMemoryOperationsTester :: smallMemorySetTest )
@@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      CPPUNIT_ASSERT( checkCudaDevice );
   };
+   void copyTest()
+   {
+      const int size( 1 << 20 );
+      int *hostData1, *hostData2, *deviceData;
+      allocateMemoryHost( hostData1, size );
+      allocateMemoryHost( hostData2, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData1, 13, size );
+      copyMemoryHostToCuda( deviceData, hostData1, size );
+      copyMemoryCudaToHost( hostData2, deviceData, size );
+      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
+      freeMemoryHost( hostData1 );
+      freeMemoryHost( hostData2 );
+      freeMemoryCuda( deviceData );
+   };
   void smallMemorySetTest()
   {
-      const int size( 100 );
+      const int size( 1024 );
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
      allocateMemoryCuda( deviceData, size );
@@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      CPPUNIT_ASSERT( checkCudaDevice );
      for( int i = 0; i < size; i ++ )
         CPPUNIT_ASSERT( hostData[ i ] == 13 );
+      freeMemoryHost( hostData );
+      freeMemoryCuda( deviceData );
   };
   void bigMemorySetTest()
   {
-      const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize );
+      const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize );
      cout << "Size = " << size << endl;
      int *hostData, *deviceData;
      allocateMemoryHost( hostData, size );
@@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
      for( int i = 0; i < size; i ++ )
      {
         if( hostData[ i ] != 13 )
-            cout << " i = " << i << endl;
+            cout << " i = " << i << " " << hostData[ i ] << endl;
         CPPUNIT_ASSERT( hostData[ i ] == 13 );
      }
+      freeMemoryHost( hostData );
+      freeMemoryCuda( deviceData );
   };
 };