diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h index b00260846843b8aae006586d48221ef88069861b..4be725d482f4ebb255c42c1d984b58a2ed4975e1 100644 --- a/src/TNL/Algorithms/AtomicOperations.h +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -12,6 +12,7 @@ #pragma once +#include <cuda.h> #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> @@ -41,30 +42,52 @@ struct AtomicOperations< Devices::Cuda > static void add( Value& v, const Value& a ) { #ifdef HAVE_CUDA + atomicAdd( &v, a ); +#endif // HAVE_CUDA + } + +#ifdef HAVE_CUDA + __device__ + static void add( double& v, const double& a ) + { #if __CUDA_ARCH__ < 600 - if( std::is_same< Value, double >::value ) - { - unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; - unsigned long long int old = *v_as_ull, assumed; + unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; + unsigned long long int old = *v_as_ull, assumed; - do - { - assumed = old; - old = atomicCAS( v_as_ull, - assumed, - __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; + do + { + assumed = old; + old = atomicCAS( v_as_ull, + assumed, + __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ; - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } - while( assumed != old ); - return; + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } -#endif + while( assumed != old ); +#else // __CUDA_ARCH__ < 600 atomicAdd( &v, a ); -#endif +#endif //__CUDA_ARCH__ < 600 + +#else // HAVE_CUDA + static void add( double& v, const double& a ){} +#endif // HAVE_CUDA } + __cuda_callable__ + static void add( long int& v, const long int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); +#endif // HAVE_CUDA + } + + __cuda_callable__ + static void add( short int& v, const short int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); +#endif // HAVE_CUDA + } }; - } //namespace Algorithms -} //namespace TNL \ No newline at end of file +} //namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index c7f953a8cdc2b58a67801a15e098b7524effc159..d48e7d6eabac1ac063fd3729e826c2b011b83617 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); }; + static_assert( ! isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) ); + using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index e07e00fa63de998ff018e0dd095d4d75dbca32a7..98285e064dc227410e9632ff75bbd0df9f00903e 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector, if( isBinary() ) Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); else - { - //std::cerr << outVectorView << std::endl; Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); - //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ]; - - //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] << - // " --> " << outVectorView[ column ] << std::endl; - } } if( isBinary() ) return inVectorView[ column ]; @@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector, }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { if( isSymmetric() ) - { - //std::cerr << outVectorView << std::endl; - //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ]; outVectorView[ row ] += matrixMultiplicator * value; - //std::cerr << " ---> " << outVectorView[ row ] << std::endl; - } else { if( outVectorMultiplicator == 0.0 ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h index 45dd5e5b9522a378ee247ba212bc6fcf138a7915..f6f7ec95a3a93aa6917d88a0f014ddbc44ca92ef 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > -#ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, +#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. + ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > + //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #endif // HAVE_CUDA >;