Loading src/TNL/Algorithms/AtomicOperations.h +41 −18 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #pragma once #include <cuda.h> #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading Loading @@ -41,9 +42,15 @@ struct AtomicOperations< Devices::Cuda > static void add( Value& v, const Value& a ) { #ifdef HAVE_CUDA #if __CUDA_ARCH__ < 600 if( std::is_same< Value, double >::value ) atomicAdd( &v, a ); #endif // HAVE_CUDA } #ifdef HAVE_CUDA __device__ static void add( double& v, const double& a ) { #if __CUDA_ARCH__ < 600 unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; unsigned long long int old = *v_as_ull, assumed; Loading @@ -52,19 +59,35 @@ struct AtomicOperations< Devices::Cuda > assumed = old; old = atomicCAS( v_as_ull, assumed, __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ; // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while( assumed != old ); return; } #endif #else // __CUDA_ARCH__ < 600 atomicAdd( &v, a ); #endif #endif //__CUDA_ARCH__ < 600 #else // HAVE_CUDA static void add( double& v, const double& a ){} #endif // HAVE_CUDA } }; __cuda_callable__ static void add( long int& v, const long int& a ) { #ifdef HAVE_CUDA TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); #endif // HAVE_CUDA } __cuda_callable__ static void add( short int& v, const short int& a ) { #ifdef HAVE_CUDA TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); #endif // HAVE_CUDA } }; } //namespace Algorithms } //namespace TNL src/TNL/Matrices/SparseMatrix.h +5 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); }; static_assert( ! isSymmetric() || ! std::is_same< Device, Devices::Cuda >::value || ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) ); using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; Loading src/TNL/Matrices/SparseMatrixView.hpp +0 −12 Original line number Diff line number Diff line Loading @@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector, if( isBinary() ) Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); else { //std::cerr << outVectorView << std::endl; Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ]; //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] << // " --> " << outVectorView[ column ] << std::endl; } } if( isBinary() ) return inVectorView[ column ]; Loading @@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector, }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { if( isSymmetric() ) { //std::cerr << outVectorView << std::endl; //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ]; outVectorView[ row ] += matrixMultiplicator * value; //std::cerr << " ---> " << outVectorView[ row ] << std::endl; } else { if( outVectorMultiplicator == 0.0 ) Loading src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +10 −10 Original line number Diff line number Diff line Loading @@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, #ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #endif // HAVE_CUDA >; Loading Loading
src/TNL/Algorithms/AtomicOperations.h +41 −18 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #pragma once #include <cuda.h> #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> Loading Loading @@ -41,9 +42,15 @@ struct AtomicOperations< Devices::Cuda > static void add( Value& v, const Value& a ) { #ifdef HAVE_CUDA #if __CUDA_ARCH__ < 600 if( std::is_same< Value, double >::value ) atomicAdd( &v, a ); #endif // HAVE_CUDA } #ifdef HAVE_CUDA __device__ static void add( double& v, const double& a ) { #if __CUDA_ARCH__ < 600 unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; unsigned long long int old = *v_as_ull, assumed; Loading @@ -52,19 +59,35 @@ struct AtomicOperations< Devices::Cuda > assumed = old; old = atomicCAS( v_as_ull, assumed, __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ; // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while( assumed != old ); return; } #endif #else // __CUDA_ARCH__ < 600 atomicAdd( &v, a ); #endif #endif //__CUDA_ARCH__ < 600 #else // HAVE_CUDA static void add( double& v, const double& a ){} #endif // HAVE_CUDA } }; __cuda_callable__ static void add( long int& v, const long int& a ) { #ifdef HAVE_CUDA TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); #endif // HAVE_CUDA } __cuda_callable__ static void add( short int& v, const short int& a ) { #ifdef HAVE_CUDA TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); #endif // HAVE_CUDA } }; } //namespace Algorithms } //namespace TNL
src/TNL/Matrices/SparseMatrix.h +5 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); }; static_assert( ! isSymmetric() || ! std::is_same< Device, Devices::Cuda >::value || ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) ); using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; Loading
src/TNL/Matrices/SparseMatrixView.hpp +0 −12 Original line number Diff line number Diff line Loading @@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector, if( isBinary() ) Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); else { //std::cerr << outVectorView << std::endl; Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ]; //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] << // " --> " << outVectorView[ column ] << std::endl; } } if( isBinary() ) return inVectorView[ column ]; Loading @@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector, }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { if( isSymmetric() ) { //std::cerr << outVectorView << std::endl; //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ]; outVectorView[ row ] += matrixMultiplicator * value; //std::cerr << " ---> " << outVectorView[ row ] << std::endl; } else { if( outVectorMultiplicator == 0.0 ) Loading
src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +10 −10 Original line number Diff line number Diff line Loading @@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, #ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #endif // HAVE_CUDA >; Loading