Debugging set memory function for the CUDA device. (f9b0abaf) · Commits · TNL / tnl-dev

CMakeLists.txt

+2 −1

Original line number	Diff line number	Diff line
		@@ -49,7 +49,8 @@ if( NOT WITH_CUDA STREQUAL "no" )
		find_package( CUDA )
		if( CUDA_FOUND )
		set( BUILD_CUDA TRUE)
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
		#set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
		AddCompilerFlag( "-DHAVE_CUDA" )
		if( CUDA_ARCHITECTURE STREQUAL "1.0" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10)
		endif()

+1 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ VERBOSE=1

		CMAKE="cmake"
		CPUS=`grep -c processor /proc/cpuinfo`
		CPUS=1

		echo "Building $TARGET using $CPUS processors."

+12 −16

Original line number	Diff line number	Diff line
		@@ -81,17 +81,14 @@ bool setMemoryHost( Element* data,
		template< typename Element, typename Index >
		__global__ void setVectorValueCudaKernel( Element* data,
		const Index size,
		const Element value,
		const Index elementsPerThread )
		const Element value )
		{
		Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x;
		Index elementsProcessed( 0 );
		while( elementsProcessed < elementsPerThread &&
		elementIdx < size )
		Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
		const Index gridSize = blockDim. x * gridDim. x;
		while( elementIdx < size )
		{
		data[ elementIdx ] = value;
		elementIdx += blockDim. x;
		elementsProcessed ++;
		elementIdx += gridSize;
		}
		}
		#endif
		@@ -102,13 +99,12 @@ bool setMemoryCuda( Element* data,
		const Index size )
		{
		#ifdef HAVE_CUDA
		dim3 blockSize, gridSize;
		dim3 blockSize( 0 ), gridSize( 0 );
		blockSize. x = 256;
		Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
		Index elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize );
		gridSize. x = Min( blocksNumber, ( Index ) maxCudaGridSize );
		//cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl;
		setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread );
		setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value );

		return checkCudaDevice;
		#else

+28 −0

Original line number	Diff line number	Diff line
		ADD_SUBDIRECTORY( cuda )

		set( headers tnlFileTester.h
		tnlStringTester.h
		tnlObjectTester.h
		tnlRealTester.h
		tnlTupleTester.h
		tnlVectorTester.h
		tnlMultiArrayTester.h
		tnlArrayTester.h
		tnlSharedArrayTester.h
		tnlSharedVectorTester.h
		tnlGridOldTester.h
		tnlSharedMemoryTester.h
		tnlCommunicatorTester.h )

		if( BUILD_CUDA )
		# CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu )
		# TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
		# tnl${mpiExt}${debugExt}-0.1 )
		#
		# CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu )
		# TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
		# tnl${mpiExt}${debugExt}-0.1 )
		#
		# CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu )
		# TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
		# tnl${mpiExt}${debugExt}-0.1 )
		#
		endif()

		No newline at end of file

+2 −2

Original line number	Diff line number	Diff line
		@@ -51,10 +51,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
		"smallMemorySetTest",
		&tnlCudaMemoryOperationsTester :: smallMemorySetTest )
		);
		/*suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
		suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
		"bigMemorySetTest",
		&tnlCudaMemoryOperationsTester :: bigMemorySetTest )
		);*/
		);

		return suiteOfTests;
		};