Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +29 −29 Original line number Original line Diff line number Diff line Loading @@ -11,6 +11,7 @@ #pragma once #pragma once #include <iostream> #include <iostream> #include <memory> #include <TNL/tnlConfig.h> #include <TNL/tnlConfig.h> #include <TNL/Math.h> #include <TNL/Math.h> Loading Loading @@ -112,7 +113,8 @@ setMemory( Element* data, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -155,7 +157,8 @@ copyMemory( DestinationElement* destination, source, source, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); cudaMemcpyDeviceToDevice ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -164,7 +167,8 @@ copyMemory( DestinationElement* destination, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -248,22 +252,23 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< SourceElement[] > buffer{ new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); Index i( 0 ); while( i < size ) while( i < size ) { { if( cudaMemcpy( buffer, if( cudaMemcpy( (void*) buffer.get(), &source[ i ], (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { { delete[] buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } Index j( 0 ); Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) Loading @@ -273,7 +278,6 @@ copyMemory( DestinationElement* destination, } } i += j; i += j; } } delete[] buffer; } } return true; return true; #else #else Loading @@ -298,28 +302,24 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA #ifdef HAVE_CUDA Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index compared( 0 ); Index compared( 0 ); while( compared < size ) while( compared < size ) { { Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); if( cudaMemcpy( ( void* ) host_buffer, if( cudaMemcpy( (void*) host_buffer.get(), ( void* ) & ( source[ compared ] ), (void*) &source[ compared ], transfer * sizeof( Element2 ), transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { { delete[] host_buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) ) if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) { delete[] host_buffer; return false; return false; } compared += transfer; compared += transfer; } } delete[] host_buffer; return true; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -349,11 +349,12 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); Index i( 0 ); while( i < size ) while( i < size ) { { Loading @@ -363,18 +364,17 @@ copyMemory( DestinationElement* destination, buffer[ j ] = source[ i + j ]; buffer[ j ] = source[ i + j ]; j++; j++; } } if( cudaMemcpy( &destination[ i ], if( cudaMemcpy( (void*) &destination[ i ], buffer, (void*) buffer.get(), j * sizeof( DestinationElement ), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) { { delete[] buffer; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } i += j; i += j; } } delete[] buffer; return true; return true; } } #else #else Loading src/TNL/Containers/Algorithms/Multireduction_impl.h +2 −1 Original line number Original line Diff line number Diff line Loading @@ -146,7 +146,8 @@ reduce( Operation& operation, std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif #endif return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading src/TNL/Containers/Algorithms/Reduction_impl.h +3 −2 Original line number Original line Diff line number Diff line Loading @@ -163,7 +163,8 @@ reduce( Operation& operation, #endif #endif } } return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading src/TNL/Devices/Cuda.cu +4 −5 Original line number Original line Diff line number Diff line Loading @@ -103,10 +103,9 @@ void Cuda::printThreadsSetup( const dim3& blockSize, } } bool Cuda::checkDevice( const char* file_name, int line, cudaError error ) void Cuda::checkDevice( const char* file_name, int line, cudaError error ) { { if( error == cudaSuccess ) if( error != cudaSuccess ) return true; throw Exceptions::CudaRuntimeError( error, file_name, line ); throw Exceptions::CudaRuntimeError( error, file_name, line ); } } Loading src/TNL/Devices/Cuda.h +2 −2 Original line number Original line Diff line number Diff line Loading @@ -153,9 +153,9 @@ class Cuda * of calling cudaGetLastError() inside the method. * of calling cudaGetLastError() inside the method. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. */ */ static bool checkDevice( const char* file_name, int line, cudaError error ); static void checkDevice( const char* file_name, int line, cudaError error ); #else #else static bool checkDevice() { return false; }; static void checkDevice() {} #endif #endif static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); Loading Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +29 −29 Original line number Original line Diff line number Diff line Loading @@ -11,6 +11,7 @@ #pragma once #pragma once #include <iostream> #include <iostream> #include <memory> #include <TNL/tnlConfig.h> #include <TNL/tnlConfig.h> #include <TNL/Math.h> #include <TNL/Math.h> Loading Loading @@ -112,7 +113,8 @@ setMemory( Element* data, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -155,7 +157,8 @@ copyMemory( DestinationElement* destination, source, source, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); cudaMemcpyDeviceToDevice ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -164,7 +167,8 @@ copyMemory( DestinationElement* destination, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -248,22 +252,23 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< SourceElement[] > buffer{ new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); Index i( 0 ); while( i < size ) while( i < size ) { { if( cudaMemcpy( buffer, if( cudaMemcpy( (void*) buffer.get(), &source[ i ], (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { { delete[] buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } Index j( 0 ); Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) Loading @@ -273,7 +278,6 @@ copyMemory( DestinationElement* destination, } } i += j; i += j; } } delete[] buffer; } } return true; return true; #else #else Loading @@ -298,28 +302,24 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA #ifdef HAVE_CUDA Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index compared( 0 ); Index compared( 0 ); while( compared < size ) while( compared < size ) { { Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); if( cudaMemcpy( ( void* ) host_buffer, if( cudaMemcpy( (void*) host_buffer.get(), ( void* ) & ( source[ compared ] ), (void*) &source[ compared ], transfer * sizeof( Element2 ), transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { { delete[] host_buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) ) if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) { delete[] host_buffer; return false; return false; } compared += transfer; compared += transfer; } } delete[] host_buffer; return true; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -349,11 +349,12 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ]; std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); Index i( 0 ); while( i < size ) while( i < size ) { { Loading @@ -363,18 +364,17 @@ copyMemory( DestinationElement* destination, buffer[ j ] = source[ i + j ]; buffer[ j ] = source[ i + j ]; j++; j++; } } if( cudaMemcpy( &destination[ i ], if( cudaMemcpy( (void*) &destination[ i ], buffer, (void*) buffer.get(), j * sizeof( DestinationElement ), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) { { delete[] buffer; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } i += j; i += j; } } delete[] buffer; return true; return true; } } #else #else Loading
src/TNL/Containers/Algorithms/Multireduction_impl.h +2 −1 Original line number Original line Diff line number Diff line Loading @@ -146,7 +146,8 @@ reduce( Operation& operation, std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif #endif return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading
src/TNL/Containers/Algorithms/Reduction_impl.h +3 −2 Original line number Original line Diff line number Diff line Loading @@ -163,7 +163,8 @@ reduce( Operation& operation, #endif #endif } } return TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading
src/TNL/Devices/Cuda.cu +4 −5 Original line number Original line Diff line number Diff line Loading @@ -103,10 +103,9 @@ void Cuda::printThreadsSetup( const dim3& blockSize, } } bool Cuda::checkDevice( const char* file_name, int line, cudaError error ) void Cuda::checkDevice( const char* file_name, int line, cudaError error ) { { if( error == cudaSuccess ) if( error != cudaSuccess ) return true; throw Exceptions::CudaRuntimeError( error, file_name, line ); throw Exceptions::CudaRuntimeError( error, file_name, line ); } } Loading
src/TNL/Devices/Cuda.h +2 −2 Original line number Original line Diff line number Diff line Loading @@ -153,9 +153,9 @@ class Cuda * of calling cudaGetLastError() inside the method. * of calling cudaGetLastError() inside the method. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. */ */ static bool checkDevice( const char* file_name, int line, cudaError error ); static void checkDevice( const char* file_name, int line, cudaError error ); #else #else static bool checkDevice() { return false; }; static void checkDevice() {} #endif #endif static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); Loading