Commit a4885bf9 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixed symmetric sparse matrix to run with CUDA.

parent a72c076d
Loading
Loading
Loading
Loading
+41 −18
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@

#pragma once

#include <cuda.h>
#include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
@@ -41,9 +42,15 @@ struct AtomicOperations< Devices::Cuda >
   static void add( Value& v, const Value& a )
   {
#ifdef HAVE_CUDA
#if __CUDA_ARCH__ < 600
      if( std::is_same< Value, double >::value )
      atomicAdd( &v, a );
#endif // HAVE_CUDA
   }

#ifdef HAVE_CUDA
   __device__
   static void add( double& v, const double& a )
   {
#if __CUDA_ARCH__ < 600
      unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
      unsigned long long int old = *v_as_ull, assumed;

@@ -52,19 +59,35 @@ struct AtomicOperations< Devices::Cuda >
         assumed = old;
         old = atomicCAS( v_as_ull,
                          assumed,
                             __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ;
                          __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;

      // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
      }
      while( assumed != old );
         return;
      }
#endif
#else // __CUDA_ARCH__ < 600
      atomicAdd( &v, a );
#endif
#endif //__CUDA_ARCH__ < 600

#else // HAVE_CUDA
   static void add( double& v, const double& a ){}
#endif // HAVE_CUDA
   }

};
   __cuda_callable__
   static void add( long int& v, const long int& a )
   {
#ifdef HAVE_CUDA
      TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
#endif // HAVE_CUDA
   }
   
   __cuda_callable__
   static void add( short int& v, const short int& a )
   {
#ifdef HAVE_CUDA
      TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
#endif // HAVE_CUDA
   }
};
} //namespace Algorithms
} //namespace TNL
+5 −0
Original line number Diff line number Diff line
@@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
      static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
      static constexpr bool isBinary() { return MatrixType::isBinary(); };

      static_assert( ! isSymmetric() ||
               ! std::is_same< Device, Devices::Cuda >::value ||
               ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ),
              "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) );

      using RealType = Real;
      template< typename Device_, typename Index_, typename IndexAllocator_ >
      using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >;
+0 −12
Original line number Diff line number Diff line
@@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector,
         if( isBinary() )
            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] );
         else
         {
            //std::cerr << outVectorView << std::endl;
            Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] );
            //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ];

            //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] <<
            //   " --> " << outVectorView[ column ] << std::endl;
         }
      }
      if( isBinary() )
         return inVectorView[ column ];
@@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector,
   };
   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
      if( isSymmetric() )
      {
         //std::cerr << outVectorView << std::endl;
         //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ];
         outVectorView[ row ] += matrixMultiplicator * value;
         //std::cerr << " ---> " << outVectorView[ row ] << std::endl;
      }
      else
      {
         if( outVectorMultiplicator == 0.0 )
+10 −10
Original line number Diff line number Diff line
@@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types
    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
#ifdef HAVE_CUDA
   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
   ,//TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
    //TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >,
    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >
#endif // HAVE_CUDA
>;