Fixed symmetric sparse matrix to run with CUDA. (a4885bf9) · Commits · TNL / tnl-dev

src/TNL/Algorithms/AtomicOperations.h

+41 −18

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@

		#pragma once

		#include <cuda.h>
		#include <TNL/Devices/Sequential.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		@@ -41,9 +42,15 @@ struct AtomicOperations< Devices::Cuda >
		static void add( Value& v, const Value& a )
		{
		#ifdef HAVE_CUDA
		#if __CUDA_ARCH__ < 600
		if( std::is_same< Value, double >::value )
		atomicAdd( &v, a );
		#endif // HAVE_CUDA
		}

		#ifdef HAVE_CUDA
		__device__
		static void add( double& v, const double& a )
		{
		#if __CUDA_ARCH__ < 600
		unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
		unsigned long long int old = *v_as_ull, assumed;

		@@ -52,19 +59,35 @@ struct AtomicOperations< Devices::Cuda >
		assumed = old;
		old = atomicCAS( v_as_ull,
		assumed,
		__double_as_longlong( s + __longlong_as_double( assumed ) ) ) ;
		__double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;

		// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
		}
		while( assumed != old );
		return;
		}
		#endif
		#else // __CUDA_ARCH__ < 600
		atomicAdd( &v, a );
		#endif
		#endif //__CUDA_ARCH__ < 600

		#else // HAVE_CUDA
		static void add( double& v, const double& a ){}
		#endif // HAVE_CUDA
		}

		};
		__cuda_callable__
		static void add( long int& v, const long int& a )
		{
		#ifdef HAVE_CUDA
		TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
		#endif // HAVE_CUDA
		}

		__cuda_callable__
		static void add( short int& v, const short int& a )
		{
		#ifdef HAVE_CUDA
		TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
		#endif // HAVE_CUDA
		}
		};
		} //namespace Algorithms
		} //namespace TNL

src/TNL/Matrices/SparseMatrix.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
		static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
		static constexpr bool isBinary() { return MatrixType::isBinary(); };

		static_assert( ! isSymmetric() \|\|
		! std::is_same< Device, Devices::Cuda >::value \|\|
		( ( std::is_same< Real, float >::value \|\| std::is_same< Real, double >::value \|\| std::is_same< Real, int >::value \|\| std::is_same< Real, long long int >::value ),
		"Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) );

		using RealType = Real;
		template< typename Device_, typename Index_, typename IndexAllocator_ >
		using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;

src/TNL/Matrices/SparseMatrixView.hpp

+0 −12

Original line number	Diff line number	Diff line
		@@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector,
		if( isBinary() )
		Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
		else
		{
		//std::cerr << outVectorView << std::endl;
		Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
		//outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ];

		//std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] <<
		// " --> " << outVectorView[ column ] << std::endl;
		}
		}
		if( isBinary() )
		return inVectorView[ column ];
		@@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector,
		};
		auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
		if( isSymmetric() )
		{
		//std::cerr << outVectorView << std::endl;
		//std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ];
		outVectorView[ row ] += matrixMultiplicator * value;
		//std::cerr << " ---> " << outVectorView[ row ] << std::endl;
		}
		else
		{
		if( outVectorMultiplicator == 0.0 )

src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h

+10 −10

Original line number	Diff line number	Diff line
		@@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types
		TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
		#ifdef HAVE_CUDA
		,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
		,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
		//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
		//TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
		#endif // HAVE_CUDA
		>;