Skip to content
Snippets Groups Projects
Commit b682fcfe authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Debugging memory set value CUDA kernel.

parent 695575ce
No related branches found
No related tags found
No related merge requests found
...@@ -70,11 +70,21 @@ bool setMemoryHost( Element* data, ...@@ -70,11 +70,21 @@ bool setMemoryHost( Element* data,
} }
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
template< typename Element > template< typename Element, typename Index >
__global__ void setVectorValueCudaKernel( Element* data, __global__ void setVectorValueCudaKernel( Element* data,
const Element value ) const Index size,
const Element value,
const Index elementsPerThread )
{ {
data[ blockIdx. x * blockDim. x + threadIdx. x ] = value; Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x;
Index elementsProcessed( 0 );
while( elementsProcessed < elementsPerThread &&
elementIdx < size )
{
data[ elementIdx ] = value;
elementIdx += blockDim. x;
elementsProcessed ++;
}
} }
#endif #endif
...@@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data, ...@@ -85,11 +95,12 @@ bool setMemoryCuda( Element* data,
{ {
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
dim3 blockSize, gridSize; dim3 blockSize, gridSize;
blockSize. x = 512; blockSize. x = 32;
int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); int blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); int elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize );
gridSize. x = Min( blocksNumber, maxCudaGridSize );
setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value, elementsPerThread ); cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl;
setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread );
return checkCudaDevice; return checkCudaDevice;
#else #else
......
...@@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase ...@@ -43,6 +43,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
"allocationTest", "allocationTest",
&tnlCudaMemoryOperationsTester :: allocationTest ) &tnlCudaMemoryOperationsTester :: allocationTest )
); );
suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
"copyTest",
&tnlCudaMemoryOperationsTester :: allocationTest )
);
suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
"smallMemorySetTest", "smallMemorySetTest",
&tnlCudaMemoryOperationsTester :: smallMemorySetTest ) &tnlCudaMemoryOperationsTester :: smallMemorySetTest )
...@@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase ...@@ -65,9 +69,25 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
CPPUNIT_ASSERT( checkCudaDevice ); CPPUNIT_ASSERT( checkCudaDevice );
}; };
void copyTest()
{
const int size( 1 << 20 );
int *hostData1, *hostData2, *deviceData;
allocateMemoryHost( hostData1, size );
allocateMemoryHost( hostData2, size );
allocateMemoryCuda( deviceData, size );
setMemoryHost( hostData1, 13, size );
copyMemoryHostToCuda( deviceData, hostData1, size );
copyMemoryCudaToHost( hostData2, deviceData, size );
CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
freeMemoryHost( hostData1 );
freeMemoryHost( hostData2 );
freeMemoryCuda( deviceData );
};
void smallMemorySetTest() void smallMemorySetTest()
{ {
const int size( 100 ); const int size( 1024 );
int *hostData, *deviceData; int *hostData, *deviceData;
allocateMemoryHost( hostData, size ); allocateMemoryHost( hostData, size );
allocateMemoryCuda( deviceData, size ); allocateMemoryCuda( deviceData, size );
...@@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase ...@@ -78,11 +98,13 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
CPPUNIT_ASSERT( checkCudaDevice ); CPPUNIT_ASSERT( checkCudaDevice );
for( int i = 0; i < size; i ++ ) for( int i = 0; i < size; i ++ )
CPPUNIT_ASSERT( hostData[ i ] == 13 ); CPPUNIT_ASSERT( hostData[ i ] == 13 );
freeMemoryHost( hostData );
freeMemoryCuda( deviceData );
}; };
void bigMemorySetTest() void bigMemorySetTest()
{ {
const int size( 2.7 * maxCudaGridSize * maxCudaBlockSize ); const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize );
cout << "Size = " << size << endl; cout << "Size = " << size << endl;
int *hostData, *deviceData; int *hostData, *deviceData;
allocateMemoryHost( hostData, size ); allocateMemoryHost( hostData, size );
...@@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase ...@@ -95,9 +117,11 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
for( int i = 0; i < size; i ++ ) for( int i = 0; i < size; i ++ )
{ {
if( hostData[ i ] != 13 ) if( hostData[ i ] != 13 )
cout << " i = " << i << endl; cout << " i = " << i << " " << hostData[ i ] << endl;
CPPUNIT_ASSERT( hostData[ i ] == 13 ); CPPUNIT_ASSERT( hostData[ i ] == 13 );
} }
freeMemoryHost( hostData );
freeMemoryCuda( deviceData );
}; };
}; };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment