From b682fcfeefd103b5204c90fc7cfa58b8fd118fc8 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Thu, 21 Mar 2013 21:06:31 +0100 Subject: [PATCH] Debugging memory set value CUDA kernel. --- src/implementation/core/memory-operations.h | 23 ++++++++++---- .../core/cuda/tnlCudaMemoryOperationsTester.h | 30 +++++++++++++++++-- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h index 00871261f2..dae87f9fa9 100644 --- a/src/implementation/core/memory-operations.h +++ b/src/implementation/core/memory-operations.h @@ -70,11 +70,21 @@ bool setMemoryHost( Element* data, } #ifdef HAVE_CUDA -template< typename Element > +template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, - const Element value ) + const Index size, + const Element value, + const Index elementsPerThread ) { - data[ blockIdx. x * blockDim. x + threadIdx. x ] = value; + Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; + Index elementsProcessed( 0 ); + while( elementsProcessed < elementsPerThread && + elementIdx < size ) + { + data[ elementIdx ] = value; + elementIdx += blockDim. x; + elementsProcessed ++; + } } #endif @@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data, { #ifdef HAVE_CUDA dim3 blockSize, gridSize; - blockSize. x = 512; + blockSize. x = 32; int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); - - setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread ); + gridSize. x = Min( blocksNumber, maxCudaGridSize ); + cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; + setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); return checkCudaDevice; #else diff --git a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h index 2289281990..b65a8091a5 100644 --- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +++ b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h @@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "allocationTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyTest", + &tnlCudaMemoryOperationsTester :: allocationTest ) + ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) @@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); }; + void copyTest() + { + const int size( 1 << 20 ); + int *hostData1, *hostData2, *deviceData; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData, hostData1, size ); + copyMemoryCudaToHost( hostData2, deviceData, size ); + CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData ); + }; + void smallMemorySetTest() { - const int size( 100 ); + const int size( 1024 ); int *hostData, *deviceData; allocateMemoryHost( hostData, size ); allocateMemoryCuda( deviceData, size ); @@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); + freeMemoryHost( hostData ); + freeMemoryCuda( deviceData ); }; void bigMemorySetTest() { - const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize ); + const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize ); cout << "Size = " << size << endl; int *hostData, *deviceData; allocateMemoryHost( hostData, size ); @@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase for( int i = 0; i < size; i ++ ) { if( hostData[ i ] != 13 ) - cout << " i = " << i << endl; + cout << " i = " << i << " " << hostData[ i ] << endl; CPPUNIT_ASSERT( hostData[ i ] == 13 ); } + freeMemoryHost( hostData ); + freeMemoryCuda( deviceData ); }; }; -- GitLab