diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h index 00871261f2cc9c343878b5717cfda4bd2b4436d3..dae87f9fa932883f76ff1d9191befc580c2e6cb7 100644 --- a/src/implementation/core/memory-operations.h +++ b/src/implementation/core/memory-operations.h @@ -70,11 +70,21 @@ bool setMemoryHost( Element* data, } #ifdef HAVE_CUDA -template< typename Element > +template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, - const Element value ) + const Index size, + const Element value, + const Index elementsPerThread ) { - data[ blockIdx. x * blockDim. x + threadIdx. x ] = value; + Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; + Index elementsProcessed( 0 ); + while( elementsProcessed < elementsPerThread && + elementIdx < size ) + { + data[ elementIdx ] = value; + elementIdx += blockDim. x; + elementsProcessed ++; + } } #endif @@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data, { #ifdef HAVE_CUDA dim3 blockSize, gridSize; - blockSize. x = 512; + blockSize. x = 32; int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); - - setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread ); + gridSize. x = Min( blocksNumber, maxCudaGridSize ); + cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; + setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); return checkCudaDevice; #else diff --git a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h index 22892819909416610f94639bbb9ebaa1a059a022..b65a8091a588a1367e3955d60bd5c0814e597888 100644 --- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +++ b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h @@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "allocationTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); + suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( + "copyTest", + &tnlCudaMemoryOperationsTester :: allocationTest ) + ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) @@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); }; + void copyTest() + { + const int size( 1 << 20 ); + int *hostData1, *hostData2, *deviceData; + allocateMemoryHost( hostData1, size ); + allocateMemoryHost( hostData2, size ); + allocateMemoryCuda( deviceData, size ); + setMemoryHost( hostData1, 13, size ); + copyMemoryHostToCuda( deviceData, hostData1, size ); + copyMemoryCudaToHost( hostData2, deviceData, size ); + CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); + freeMemoryHost( hostData1 ); + freeMemoryHost( hostData2 ); + freeMemoryCuda( deviceData ); + }; + void smallMemorySetTest() { - const int size( 100 ); + const int size( 1024 ); int *hostData, *deviceData; allocateMemoryHost( hostData, size ); allocateMemoryCuda( deviceData, size ); @@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); + freeMemoryHost( hostData ); + freeMemoryCuda( deviceData ); }; void bigMemorySetTest() { - const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize ); + const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize ); cout << "Size = " << size << endl; int *hostData, *deviceData; allocateMemoryHost( hostData, size ); @@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase for( int i = 0; i < size; i ++ ) { if( hostData[ i ] != 13 ) - cout << " i = " << i << endl; + cout << " i = " << i << " " << hostData[ i ] << endl; CPPUNIT_ASSERT( hostData[ i ] == 13 ); } + freeMemoryHost( hostData ); + freeMemoryCuda( deviceData ); }; };