Commit 9953d3ab authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'IK/matrices-csr' into 'develop'

Ik/matrices csr

See merge request !55
parents 323b4db2 c9b316aa
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -27,6 +27,11 @@ namespace Benchmarks {
template< typename Real, typename Device, typename Index >
using SlicedEllpack = Matrices::Legacy::SlicedEllpack< Real, Device, Index >;

// Legacy formats
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;


template< typename Matrix >
int setHostTestMatrix( Matrix& matrix,
                       const int elementsPerRow )
@@ -173,7 +178,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
                        const int & elementsPerRow )
{
   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
   benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, size, elementsPerRow );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, size, elementsPerRow );
   benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, size, elementsPerRow );
   benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
   benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, size, elementsPerRow );
+21 −1
Original line number Diff line number Diff line
@@ -61,6 +61,22 @@ using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index
template< typename Real, typename Device, typename Index >
using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;

// Legacy formats
template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;

template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Vector = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRVector >;

template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Light = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRLight >;

template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Adaptive = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRAdaptive >;

template< typename Real, typename Device, typename Index >
using SparseMatrixLegacy_CSR_Stream = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRStream >;

// Get the name (with extension) of input matrix file
std::string getMatrixFileName( const String& InputFileName )
{
@@ -259,7 +275,11 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
#endif

   benchmarkSpMV< Real, Matrices::Legacy::CSR            >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar    >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector    >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light     >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive  >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Stream    >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrix_CSR                 >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, Matrices::Legacy::Ellpack        >( benchmark, hostOutVector, inputFileName, verboseMR );
   benchmarkSpMV< Real, SparseMatrix_Ellpack             >( benchmark, hostOutVector, inputFileName, verboseMR );
+6 −5
Original line number Diff line number Diff line
@@ -16,14 +16,15 @@ using SE_cuda = TNL::Matrices::Legacy::SlicedEllpack< double, TNL::Devices::Cuda

void export_SparseMatrices( py::module & m )
{
    export_Matrix< CSR_host >( m, "CSR" );
    // TODO: This stop working after adding template parameter KernelType to Legacy::CSR
    //export_Matrix< CSR_host >( m, "CSR" );
    export_Matrix< E_host   >( m, "Ellpack" );
    export_Matrix< SE_host  >( m, "SlicedEllpack" );

    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, E_host >);
    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, CSR_host >);
    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, SE_host >);
    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, CSR_host >);
    //m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, E_host >);
    //m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, CSR_host >);
    //m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, SE_host >);
    //m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, CSR_host >);
    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, SE_host >);
    m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, E_host >);
}
+41 −20
Original line number Diff line number Diff line
@@ -31,7 +31,9 @@ class CusparseCSR;
template< typename Device >
class CSRDeviceDependentCode;

template< typename Real, typename Device = Devices::Host, typename Index = int >
enum CSRKernel { CSRScalar, CSRVector, CSRHybrid, CSRLight, CSRAdaptive, CSRStream };

template< typename Real, typename Device = Devices::Host, typename Index = int, CSRKernel KernelType = CSRScalar >
class CSR : public Sparse< Real, Device, Index >
{
private:
@@ -40,7 +42,7 @@ private:
   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;

   // friend class will be needed for templated assignment operators
   template< typename Real2, typename Device2, typename Index2 >
   template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2 >
   friend class CSR;

public:
@@ -60,7 +62,10 @@ public:
             typename _Index = Index >
   using Self = CSR< _Real, _Device, _Index >;

   enum SPMVCudaKernel { scalar, vector, hybrid };
   constexpr CSRKernel getSpMVKernelType() { return KernelType; };
   //enum SPMVCudaKernel { scalar, vector, hybrid };

   using Sparse< Real, Device, Index >::getAllocatedElementsCount;

   CSR();

@@ -85,8 +90,8 @@ public:
   __cuda_callable__
   IndexType getNonZeroRowLengthFast( const IndexType row ) const;

   template< typename Real2, typename Device2, typename Index2 >
   void setLike( const CSR< Real2, Device2, Index2 >& matrix );
   template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2 >
   void setLike( const CSR< Real2, Device2, Index2, KernelType2 >& matrix );

   void reset();

@@ -165,13 +170,13 @@ public:
                       OutVector& outVector ) const;
   // TODO: add const RealType& multiplicator = 1.0 )

   template< typename Real2, typename Index2 >
   void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
   template< typename Real2, typename Index2, CSRKernel KernelType2 >
   void addMatrix( const CSR< Real2, Device, Index2, KernelType2 >& matrix,
                   const RealType& matrixMultiplicator = 1.0,
                   const RealType& thisMatrixMultiplicator = 1.0 );

   template< typename Real2, typename Index2 >
   void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
   template< typename Real2, typename Index2, CSRKernel KernelType2 >
   void getTransposition( const CSR< Real2, Device, Index2, KernelType2 >& matrix,
                          const RealType& matrixMultiplicator = 1.0 );

   template< typename Vector1, typename Vector2 >
@@ -184,9 +189,9 @@ public:
   CSR& operator=( const CSR& matrix );

   // cross-device copy assignment
   template< typename Real2, typename Device2, typename Index2,
   template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2,
             typename = typename Enabler< Device2 >::type >
   CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
   CSR& operator=( const CSR< Real2, Device2, Index2, KernelType2 >& matrix );

   void save( File& file ) const;

@@ -198,10 +203,10 @@ public:

   void print( std::ostream& str ) const;

   void setCudaKernelType( const SPMVCudaKernel kernel );
   //void setCudaKernelType( const SPMVCudaKernel kernel );

   __cuda_callable__
   SPMVCudaKernel getCudaKernelType() const;
   //__cuda_callable__
   //SPMVCudaKernel getCudaKernelType() const;

   void setCudaWarpSize( const int warpSize );

@@ -220,17 +225,33 @@ public:
   __device__
   void spmvCudaVectorized( const InVector& inVector,
                            OutVector& outVector,
                            const IndexType warpStart,
                            const IndexType warpEnd,
                            const IndexType inWarpIdx ) const;
                            const IndexType gridIdx ) const;

   template< typename InVector,
             typename OutVector,
             int warpSize >
   __device__
   void vectorProductCuda( const InVector& inVector,
                           OutVector& outVector,
                           int gridIdx, int *blocks, size_t size ) const;
   
   template< typename InVector,
             typename OutVector,
             int warpSize > 
   __device__
   void spmvCudaLightSpmv( const InVector& inVector,
                            OutVector& outVector,
                            int gridIdx) const;

   template< typename InVector,
             typename OutVector,
             int warpSize > 
   __device__
   void spmvCSRAdaptive( const InVector& inVector,
                           OutVector& outVector,
                           int gridIdx,
                           int *blocks,
                           size_t blocks_size) const;
#endif

   // The following getters allow us to interface TNL with external C-like
@@ -263,7 +284,7 @@ protected:

   Containers::Vector< Index, Device, Index > rowPointers;

   SPMVCudaKernel spmvCudaKernel;
   //SPMVCudaKernel spmvCudaKernel;

   int cudaWarpSize, hybridModeSplit;

+459 −167

File changed.

Preview size limit exceeded, changes collapsed.

Loading