Loading CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,8 @@ if( NOT WITH_CUDA STREQUAL "no" ) find_package( CUDA ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) AddCompilerFlag( "-DHAVE_CUDA" ) if( CUDA_ARCHITECTURE STREQUAL "1.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10) endif() Loading buildAll +1 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,7 @@ VERBOSE=1 CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS=1 echo "Building $TARGET using $CPUS processors." Loading src/implementation/core/memory-operations.h +12 −16 Original line number Diff line number Diff line Loading @@ -81,17 +81,14 @@ bool setMemoryHost( Element* data, template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, const Index size, const Element value, const Index elementsPerThread ) const Element value ) { Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; Index elementsProcessed( 0 ); while( elementsProcessed < elementsPerThread && elementIdx < size ) Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x; const Index gridSize = blockDim. x * gridDim. x; while( elementIdx < size ) { data[ elementIdx ] = value; elementIdx += blockDim. x; elementsProcessed ++; elementIdx += gridSize; } } #endif Loading @@ -102,13 +99,12 @@ bool setMemoryCuda( Element* data, const Index size ) { #ifdef HAVE_CUDA dim3 blockSize, gridSize; dim3 blockSize( 0 ), gridSize( 0 ); blockSize. x = 256; Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); gridSize. x = Min( blocksNumber, ( Index ) maxCudaGridSize ); //cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value ); return checkCudaDevice; #else Loading tests/unit-tests/core/CMakeLists.txt +28 −0 Original line number Diff line number Diff line ADD_SUBDIRECTORY( cuda ) set( headers tnlFileTester.h tnlStringTester.h tnlObjectTester.h tnlRealTester.h tnlTupleTester.h tnlVectorTester.h tnlMultiArrayTester.h tnlArrayTester.h tnlSharedArrayTester.h tnlSharedVectorTester.h tnlGridOldTester.h tnlSharedMemoryTester.h tnlCommunicatorTester.h ) if( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu ) # TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # # CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu ) # TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # # CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu ) # TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # endif() No newline at end of file tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +2 −2 Original line number Diff line number Diff line Loading @@ -51,10 +51,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) ); /*suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "bigMemorySetTest", &tnlCudaMemoryOperationsTester :: bigMemorySetTest ) );*/ ); return suiteOfTests; }; Loading Loading
CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,8 @@ if( NOT WITH_CUDA STREQUAL "no" ) find_package( CUDA ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) AddCompilerFlag( "-DHAVE_CUDA" ) if( CUDA_ARCHITECTURE STREQUAL "1.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10) endif() Loading
buildAll +1 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,7 @@ VERBOSE=1 CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS=1 echo "Building $TARGET using $CPUS processors." Loading
src/implementation/core/memory-operations.h +12 −16 Original line number Diff line number Diff line Loading @@ -81,17 +81,14 @@ bool setMemoryHost( Element* data, template< typename Element, typename Index > __global__ void setVectorValueCudaKernel( Element* data, const Index size, const Element value, const Index elementsPerThread ) const Element value ) { Index elementIdx = blockDim. x * blockIdx. x * elementsPerThread + threadIdx. x; Index elementsProcessed( 0 ); while( elementsProcessed < elementsPerThread && elementIdx < size ) Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x; const Index gridSize = blockDim. x * gridDim. x; while( elementIdx < size ) { data[ elementIdx ] = value; elementIdx += blockDim. x; elementsProcessed ++; elementIdx += gridSize; } } #endif Loading @@ -102,13 +99,12 @@ bool setMemoryCuda( Element* data, const Index size ) { #ifdef HAVE_CUDA dim3 blockSize, gridSize; dim3 blockSize( 0 ), gridSize( 0 ); blockSize. x = 256; Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index elementsPerThread = ceil( ( double ) blocksNumber / ( double ) maxCudaGridSize ); gridSize. x = Min( blocksNumber, ( Index ) maxCudaGridSize ); //cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl; setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value, elementsPerThread ); setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value ); return checkCudaDevice; #else Loading
tests/unit-tests/core/CMakeLists.txt +28 −0 Original line number Diff line number Diff line ADD_SUBDIRECTORY( cuda ) set( headers tnlFileTester.h tnlStringTester.h tnlObjectTester.h tnlRealTester.h tnlTupleTester.h tnlVectorTester.h tnlMultiArrayTester.h tnlArrayTester.h tnlSharedArrayTester.h tnlSharedVectorTester.h tnlGridOldTester.h tnlSharedMemoryTester.h tnlCommunicatorTester.h ) if( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu ) # TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # # CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu ) # TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # # CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu ) # TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} # tnl${mpiExt}${debugExt}-0.1 ) # endif() No newline at end of file
tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h +2 −2 Original line number Diff line number Diff line Loading @@ -51,10 +51,10 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase "smallMemorySetTest", &tnlCudaMemoryOperationsTester :: smallMemorySetTest ) ); /*suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >( "bigMemorySetTest", &tnlCudaMemoryOperationsTester :: bigMemorySetTest ) );*/ ); return suiteOfTests; }; Loading