Loading src/implementation/core/memory-operations.h +17 −6 Original line number Diff line number Diff line Loading @@ -70,11 +70,21 @@ bool setMemoryHost( Element* data, } #ifdef HAVE_CUDA template< typename Element > template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, const Element value ) const Index size, const Element value, const Index elementsPerThread ) { Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; Index elementsProcessed( 0 ); while( elementsProcessed < elementsPerThread && elementIdx < size ) { data[ blockIdx. x * blockDim. x + threadIdx. x ] = value; data[ elementIdx ] = value; elementIdx += blockDim. x; elementsProcessed ++; } } #endif Loading @@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data, { #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; blockSize. x = 32; int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread ); gridSize. x = Min( blocksNumber, maxCudaGridSize ); cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); return checkCudaDevice; #else Loading tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +27 −3 Original line number Diff line number Diff line Loading @@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "allocationTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "copyTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) Loading @@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); }; void copyTest() { const int size( 1 << 20 ); int *hostData1, *hostData2, *deviceData; allocateMemoryHost( hostData1, size ); allocateMemoryHost( hostData2, size ); allocateMemoryCuda( deviceData, size ); setMemoryHost( hostData1, 13, size ); copyMemoryHostToCuda( deviceData, hostData1, size ); copyMemoryCudaToHost( hostData2, deviceData, size ); CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); freeMemoryHost( hostData1 ); freeMemoryHost( hostData2 ); freeMemoryCuda( deviceData ); }; void smallMemorySetTest() { const int size( 100 ); const int size( 1024 ); int *hostData, *deviceData; allocateMemoryHost( hostData, size ); allocateMemoryCuda( deviceData, size ); Loading @@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); freeMemoryHost( hostData ); freeMemoryCuda( deviceData ); }; void bigMemorySetTest() { const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize ); const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize ); cout << "Size = " << size << endl; int *hostData, *deviceData; allocateMemoryHost( hostData, size ); Loading @@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase for( int i = 0; i < size; i ++ ) { if( hostData[ i ] != 13 ) cout << " i = " << i << endl; cout << " i = " << i << " " << hostData[ i ] << endl; CPPUNIT_ASSERT( hostData[ i ] == 13 ); } freeMemoryHost( hostData ); freeMemoryCuda( deviceData ); }; }; Loading Loading
src/implementation/core/memory-operations.h +17 −6 Original line number Diff line number Diff line Loading @@ -70,11 +70,21 @@ bool setMemoryHost( Element* data, } #ifdef HAVE_CUDA template< typename Element > template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, const Element value ) const Index size, const Element value, const Index elementsPerThread ) { Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; Index elementsProcessed( 0 ); while( elementsProcessed < elementsPerThread && elementIdx < size ) { data[ blockIdx. x * blockDim. x + threadIdx. x ] = value; data[ elementIdx ] = value; elementIdx += blockDim. x; elementsProcessed ++; } } #endif Loading @@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data, { #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; blockSize. x = 32; int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread ); gridSize. x = Min( blocksNumber, maxCudaGridSize ); cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); return checkCudaDevice; #else Loading
tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +27 −3 Original line number Diff line number Diff line Loading @@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "allocationTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "copyTest", &tnlCudaMemoryOperationsTester :: allocationTest ) ); suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) Loading @@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); }; void copyTest() { const int size( 1 << 20 ); int *hostData1, *hostData2, *deviceData; allocateMemoryHost( hostData1, size ); allocateMemoryHost( hostData2, size ); allocateMemoryCuda( deviceData, size ); setMemoryHost( hostData1, 13, size ); copyMemoryHostToCuda( deviceData, hostData1, size ); copyMemoryCudaToHost( hostData2, deviceData, size ); CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) ); freeMemoryHost( hostData1 ); freeMemoryHost( hostData2 ); freeMemoryCuda( deviceData ); }; void smallMemorySetTest() { const int size( 100 ); const int size( 1024 ); int *hostData, *deviceData; allocateMemoryHost( hostData, size ); allocateMemoryCuda( deviceData, size ); Loading @@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase CPPUNIT_ASSERT( checkCudaDevice ); for( int i = 0; i < size; i ++ ) CPPUNIT_ASSERT( hostData[ i ] == 13 ); freeMemoryHost( hostData ); freeMemoryCuda( deviceData ); }; void bigMemorySetTest() { const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize ); const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize ); cout << "Size = " << size << endl; int *hostData, *deviceData; allocateMemoryHost( hostData, size ); Loading @@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase for( int i = 0; i < size; i ++ ) { if( hostData[ i ] != 13 ) cout << " i = " << i << endl; cout << " i = " << i << " " << hostData[ i ] << endl; CPPUNIT_ASSERT( hostData[ i ] == 13 ); } freeMemoryHost( hostData ); freeMemoryCuda( deviceData ); }; }; Loading