Skip to content
Snippets Groups Projects
Commit a4885bf9 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixed symmetric sparse matrix to run with CUDA.

parent a72c076d
No related branches found
No related tags found
1 merge request!48Segments
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#pragma once #pragma once
#include <cuda.h>
#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h> #include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h> #include <TNL/Devices/Cuda.h>
...@@ -41,30 +42,52 @@ struct AtomicOperations< Devices::Cuda > ...@@ -41,30 +42,52 @@ struct AtomicOperations< Devices::Cuda >
static void add( Value& v, const Value& a ) static void add( Value& v, const Value& a )
{ {
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
atomicAdd( &v, a );
#endif // HAVE_CUDA
}
#ifdef HAVE_CUDA
__device__
static void add( double& v, const double& a )
{
#if __CUDA_ARCH__ < 600 #if __CUDA_ARCH__ < 600
if( std::is_same< Value, double >::value ) unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
{ unsigned long long int old = *v_as_ull, assumed;
unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
unsigned long long int old = *v_as_ull, assumed;
do do
{ {
assumed = old; assumed = old;
old = atomicCAS( v_as_ull, old = atomicCAS( v_as_ull,
assumed, assumed,
__double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
}
while( assumed != old );
return;
} }
#endif while( assumed != old );
#else // __CUDA_ARCH__ < 600
atomicAdd( &v, a ); atomicAdd( &v, a );
#endif #endif //__CUDA_ARCH__ < 600
#else // HAVE_CUDA
static void add( double& v, const double& a ){}
#endif // HAVE_CUDA
} }
__cuda_callable__
static void add( long int& v, const long int& a )
{
#ifdef HAVE_CUDA
TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
#endif // HAVE_CUDA
}
__cuda_callable__
static void add( short int& v, const short int& a )
{
#ifdef HAVE_CUDA
TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
#endif // HAVE_CUDA
}
}; };
} //namespace Algorithms } //namespace Algorithms
} //namespace TNL } //namespace TNL
\ No newline at end of file
...@@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > ...@@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
static constexpr bool isBinary() { return MatrixType::isBinary(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); };
static_assert( ! isSymmetric() ||
! std::is_same< Device, Devices::Cuda >::value ||
( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
"Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) );
using RealType = Real; using RealType = Real;
template< typename Device_, typename Index_, typename IndexAllocator_ > template< typename Device_, typename Index_, typename IndexAllocator_ >
using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
......
...@@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector, ...@@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector,
if( isBinary() ) if( isBinary() )
Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
else else
{
//std::cerr << outVectorView << std::endl;
Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
//outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ];
//std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] <<
// " --> " << outVectorView[ column ] << std::endl;
}
} }
if( isBinary() ) if( isBinary() )
return inVectorView[ column ]; return inVectorView[ column ];
...@@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector, ...@@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector,
}; };
auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
if( isSymmetric() ) if( isSymmetric() )
{
//std::cerr << outVectorView << std::endl;
//std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ];
outVectorView[ row ] += matrixMultiplicator * value; outVectorView[ row ] += matrixMultiplicator * value;
//std::cerr << " ---> " << outVectorView[ row ] << std::endl;
}
else else
{ {
if( outVectorMultiplicator == 0.0 ) if( outVectorMultiplicator == 0.0 )
......
...@@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types ...@@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types
TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
#ifdef HAVE_CUDA #ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
#endif // HAVE_CUDA #endif // HAVE_CUDA
>; >;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment