diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h index 15a72a2650dbbf1042456f8f9c4cce63b4a3dafb..a261ce2ff97334efcdf3b46954b362e7d41798bd 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h @@ -8,9 +8,10 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include <iostream> +#include <memory> #include <TNL/tnlConfig.h> #include <TNL/Math.h> @@ -21,7 +22,7 @@ #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { -namespace Containers { +namespace Containers { namespace Algorithms { template< typename Element, typename Index > @@ -112,7 +113,8 @@ setMemory( Element* data, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; #else throw Exceptions::CudaSupportMissing(); #endif @@ -155,7 +157,8 @@ copyMemory( DestinationElement* destination, source, size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } else { @@ -164,7 +167,8 @@ copyMemory( DestinationElement* destination, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } #else throw Exceptions::CudaSupportMissing(); @@ -248,22 +252,23 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } else { - SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< SourceElement[] > buffer{ new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); while( i < size ) { - if( cudaMemcpy( buffer, - &source[ i ], + if( cudaMemcpy( (void*) buffer.get(), + (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { - delete[] buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) @@ -273,7 +278,6 @@ copyMemory( DestinationElement* destination, } i += j; } - delete[] buffer; } return true; #else @@ -298,28 +302,24 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA - Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index compared( 0 ); while( compared < size ) { Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); - if( cudaMemcpy( ( void* ) host_buffer, - ( void* ) & ( source[ compared ] ), + if( cudaMemcpy( (void*) host_buffer.get(), + (void*) &source[ compared ], transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { - delete[] host_buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } - if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) ) - { - delete[] host_buffer; + if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) return false; - } compared += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); @@ -349,11 +349,12 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } else { - DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); while( i < size ) { @@ -363,18 +364,17 @@ copyMemory( DestinationElement* destination, buffer[ j ] = source[ i + j ]; j++; } - if( cudaMemcpy( &destination[ i ], - buffer, + if( cudaMemcpy( (void*) &destination[ i ], + (void*) buffer.get(), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) { - delete[] buffer; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } i += j; } - delete[] buffer; return true; } #else diff --git a/src/TNL/Containers/Algorithms/Multireduction_impl.h b/src/TNL/Containers/Algorithms/Multireduction_impl.h index 92c1f9b19c4a10a1aa967f97930df0550c058894..a7066405f6e897ba0fde15e5b3122cb0ddb49c3e 100644 --- a/src/TNL/Containers/Algorithms/Multireduction_impl.h +++ b/src/TNL/Containers/Algorithms/Multireduction_impl.h @@ -146,7 +146,8 @@ reduce( Operation& operation, std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; #else throw Exceptions::CudaSupportMissing(); #endif diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h index ea3a20076e8086ac4988545d3077132485eb2d41..348e53fccacaf304366a5e16379280eb133f276e 100644 --- a/src/TNL/Containers/Algorithms/Reduction_impl.h +++ b/src/TNL/Containers/Algorithms/Reduction_impl.h @@ -162,8 +162,9 @@ reduce( Operation& operation, std::cout << " Transferring the result to CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif } - - return TNL_CHECK_CUDA_DEVICE; + + TNL_CHECK_CUDA_DEVICE; + return true; #else throw Exceptions::CudaSupportMissing(); #endif diff --git a/src/TNL/Devices/Cuda.cu b/src/TNL/Devices/Cuda.cu index 2605e6dca83290eb59db54618b7bf91ed1e59150..c1e5248330d0b46d23258a200238756c36aa314d 100644 --- a/src/TNL/Devices/Cuda.cu +++ b/src/TNL/Devices/Cuda.cu @@ -103,11 +103,10 @@ void Cuda::printThreadsSetup( const dim3& blockSize, } -bool Cuda::checkDevice( const char* file_name, int line, cudaError error ) -{ - if( error == cudaSuccess ) - return true; - throw Exceptions::CudaRuntimeError( error, file_name, line ); +void Cuda::checkDevice( const char* file_name, int line, cudaError error ) +{ + if( error != cudaSuccess ) + throw Exceptions::CudaRuntimeError( error, file_name, line ); } std::ostream& operator << ( std::ostream& str, const dim3& d ) diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index c73e327e9ac84ab10752a66e783a46da1b288c72..123d3a96c6d940be44208785fb5c776586a0d52d 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -153,9 +153,9 @@ class Cuda * of calling cudaGetLastError() inside the method. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. */ - static bool checkDevice( const char* file_name, int line, cudaError error ); + static void checkDevice( const char* file_name, int line, cudaError error ); #else - static bool checkDevice() { return false; }; + static void checkDevice() {} #endif static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/File_impl.h b/src/TNL/File_impl.h index 0b7d18ad3bb04d4ebca507a021e60825c7c04f19..a27250242144156fdf156b3d889cda3420b069e0 100644 --- a/src/TNL/File_impl.h +++ b/src/TNL/File_impl.h @@ -11,6 +11,7 @@ #pragma once #include <type_traits> +#include <memory> #include <TNL/File.h> #include <TNL/Exceptions/CudaSupportMissing.h> @@ -89,35 +90,27 @@ bool File::read_impl( Type* buffer, this->readElements = 0; const std::size_t host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / sizeof( Type ), elements ); using BaseType = typename std::remove_cv< Type >::type; - BaseType* host_buffer = new BaseType[ host_buffer_size ]; + std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; while( readElements < elements ) { std::size_t transfer = std::min( elements - readElements, host_buffer_size ); - std::size_t transfered = std::fread( host_buffer, sizeof( Type ), transfer, file ); + std::size_t transfered = std::fread( host_buffer.get(), sizeof( Type ), transfer, file ); if( transfered != transfer ) { std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl; std::cerr << transfered << " bytes were transfered. " << std::endl; std::perror( "Fread ended with the error code" ); - delete[] host_buffer; return false; } - cudaMemcpy( ( void* ) & ( buffer[ readElements ] ), - host_buffer, + cudaMemcpy( (void*) &buffer[ readElements ], + (void*) host_buffer.get(), transfer * sizeof( Type ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) - { - std::cerr << "Transfer of data from the CUDA device to the file " << this->fileName - << " failed." << std::endl; - delete[] host_buffer; - return false; - } + TNL_CHECK_CUDA_DEVICE; this->readElements += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); @@ -233,35 +226,27 @@ bool File::write_impl( const Type* buffer, const std::size_t host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / sizeof( Type ), elements ); using BaseType = typename std::remove_cv< Type >::type; - BaseType* host_buffer = new BaseType[ host_buffer_size ]; + std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; while( this->writtenElements < elements ) { std::size_t transfer = std::min( elements - this->writtenElements, host_buffer_size ); - cudaMemcpy( host_buffer, - ( void* ) & ( buffer[ this->writtenElements ] ), + cudaMemcpy( (void*) host_buffer.get(), + (void*) &buffer[ this->writtenElements ], transfer * sizeof( Type ), cudaMemcpyDeviceToHost ); - if( ! TNL_CHECK_CUDA_DEVICE ) - { - std::cerr << "Transfer of data from the file " << this->fileName - << " to the CUDA device failed." << std::endl; - delete[] host_buffer; - return false; - } - if( std::fwrite( host_buffer, + TNL_CHECK_CUDA_DEVICE; + if( std::fwrite( host_buffer.get(), sizeof( Type ), transfer, this->file ) != transfer ) { std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl; std::perror( "Fwrite ended with the error code" ); - delete[] host_buffer; return false; } this->writtenElements += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h index 5a7e76485339306c4e506202d3574d59571e6b45..3e7da8c33ddc00b236f75cfac25646741eb0e78d 100644 --- a/src/TNL/Functions/TestFunction_impl.h +++ b/src/TNL/Functions/TestFunction_impl.h @@ -137,8 +137,7 @@ setupFunction( const Config::ParameterContainer& parameters, { this->function = Devices::Cuda::passToDevice( *auxFunction ); delete auxFunction; - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; } return true; } @@ -167,8 +166,7 @@ setupOperator( const Config::ParameterContainer& parameters, { this->operator_ = Devices::Cuda::passToDevice( *auxOperator ); delete auxOperator; - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; } return true; } diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h index 194e68967ccc3368983ce32aeca22f3af1f4e2be..26ff692e4d0e36d9d6783fc91d4fb620ad832d5a 100644 --- a/src/TNL/Pointers/DevicePointer.h +++ b/src/TNL/Pointers/DevicePointer.h @@ -393,9 +393,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer TNL_ASSERT( this->pointer, ); TNL_ASSERT( this->cuda_pointer, ); cudaMemcpy( (void*) this->cuda_pointer, (void*) this->pointer, sizeof( ObjectType ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) { - return false; - } + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h index 810d85e99125bea191cd112e88771b8ef2488322..42e46b257f9eb309f458f28ff3e46e591b03091b 100644 --- a/src/TNL/Pointers/SharedPointerCuda.h +++ b/src/TNL/Pointers/SharedPointerCuda.h @@ -544,9 +544,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer #endif TNL_ASSERT( this->cuda_pointer, ); cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) { - return false; - } + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/TNL/Pointers/SmartPointersRegister.cpp b/src/TNL/Pointers/SmartPointersRegister.cpp index cd57dfe3439b0846f65f0bf8bfaf573cfcbd6e91..01641661c1ae008e6517232fc0bb56572f09ff5a 100644 --- a/src/TNL/Pointers/SmartPointersRegister.cpp +++ b/src/TNL/Pointers/SmartPointersRegister.cpp @@ -44,7 +44,8 @@ bool SmartPointersRegister::synchronizeDevice( int deviceId ) const auto & set = pointersOnDevices.at( deviceId ); for( auto&& it : set ) ( *it ).synchronize(); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } catch( const std::out_of_range& ) { return false; diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h index 93a667c3553e65fc335c9a87e244d6e37dac536c..279f4535629ea144234040ef55570133a4dbeba8 100644 --- a/src/TNL/Pointers/UniquePointer.h +++ b/src/TNL/Pointers/UniquePointer.h @@ -238,8 +238,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer if( this->modified() ) { cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h index 109e947649bec236bcb7b7e64bcc84eacd44aef7..aff044601cfcc326fa43134d6da5d903cdabf5f5 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Containers/ArrayOperationsTest.h @@ -209,11 +209,11 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda ) ValueType* data; ArrayOperations< Devices::Cuda >::allocateMemory( data, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); ASSERT_NE( data, nullptr ); ArrayOperations< Devices::Cuda >::freeMemory( data ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) @@ -223,7 +223,7 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) ValueType* data; ArrayOperations< Devices::Cuda >::allocateMemory( data, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < size; i++ ) ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], (ValueType) i ); @@ -237,7 +237,7 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) } ArrayOperations< Devices::Cuda >::freeMemory( data ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } TYPED_TEST( ArrayOperationsTest, setMemory_cuda ) @@ -250,9 +250,9 @@ TYPED_TEST( ArrayOperationsTest, setMemory_cuda ) ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 0, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 13, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ValueType, ValueType >( hostData, deviceData, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < size; i++ ) EXPECT_EQ( hostData[ i ], 13 ); ArrayOperations< Devices::Host >::freeMemory( hostData );