Skip to content
Snippets Groups Projects
spmv-legacy.h 11.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • /***************************************************************************
                              spmv.h  -  description
                                 -------------------
    
    Lukas Cejka's avatar
    Lukas Cejka committed
        begin                : Dec 30, 2018
    
        copyright            : (C) 2015 by Tomas Oberhuber et al.
        email                : tomas.oberhuber@fjfi.cvut.cz
     ***************************************************************************/
    
    /* See Copyright Notice in tnl/Copyright */
    
    
    Lukas Cejka's avatar
    Lukas Cejka committed
    // Implemented by: Lukas Cejka
    
    //      Original implemented by J. Klinkovsky in Benchmarks/BLAS
    
    Lukas Cejka's avatar
    Lukas Cejka committed
    //      This is an edited copy of Benchmarks/BLAS/spmv.h by: Lukas Cejka
    
    #include "SpmvBenchmarkResult.h"
    
    #include <TNL/Matrices/Legacy/CSR.h>
    #include <TNL/Matrices/Legacy/Ellpack.h>
    #include <TNL/Matrices/Legacy/SlicedEllpack.h>
    #include <TNL/Matrices/Legacy/ChunkedEllpack.h>
    #include <TNL/Matrices/Legacy/AdEllpack.h>
    #include <TNL/Matrices/Legacy/BiEllpack.h>
    
    #include <TNL/Matrices/MatrixInfo.h>
    
    
    #include <TNL/Matrices/SparseMatrix.h>
    
    #include <TNL/Containers/Segments/CSR.h>
    #include <TNL/Containers/Segments/Ellpack.h>
    #include <TNL/Containers/Segments/SlicedEllpack.h>
    
       namespace Benchmarks {
          namespace SpMVLegacy {
    
    // Alias to match the number of template parameters with other formats
    
    template< typename Real, typename Device, typename Index >
    
    using SlicedEllpackAlias = Matrices::Legacy::SlicedEllpack< Real, Device, Index >;
    
    // Segments based sparse matrix aliases
    template< typename Real, typename Device, typename Index >
    
    using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >;
    
    template< typename Device, typename Index, typename IndexAllocator >
    using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >;
    
    
    template< typename Real, typename Device, typename Index >
    
    using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >;
    
    template< typename Device, typename Index, typename IndexAllocator >
    using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
    
    
    template< typename Real, typename Device, typename Index >
    
    using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;
    
    // Legacy formats
    template< typename Real, typename Device, typename Index >
    using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;
    
    template< typename Real, typename Device, typename Index >
    using SparseMatrixLegacy_CSR_Vector = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRVector >;
    
    template< typename Real, typename Device, typename Index >
    using SparseMatrixLegacy_CSR_Light = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRLight >;
    
    template< typename Real, typename Device, typename Index >
    using SparseMatrixLegacy_CSR_Adaptive = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRAdaptive >;
    
    template< typename Real, typename Device, typename Index >
    using SparseMatrixLegacy_CSR_Stream = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRStream >;
    
    
    // Get the name (with extension) of input matrix file
    std::string getMatrixFileName( const String& InputFileName )
    
        const size_t last_slash_idx = fileName.find_last_of( "/\\" );
        if( std::string::npos != last_slash_idx )
            fileName.erase( 0, last_slash_idx + 1 );
    
    // Get only the name of the format from getType()
    
    std::string getMatrixFormat( const Matrix& matrix )
    
        std::string mtrxFullType = getType( matrix );
    
        std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
    
        std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
    
    template< typename Matrix >
    std::string getFormatShort( const Matrix& matrix )
    {
        std::string mtrxFullType = getType( matrix );
        std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
        std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
        format = format.substr( format.find(':') + 2);
        format = format.substr( 0, 3 );
    
        return format;
    }
    
    
    // Print information about the matrix.
    
    template< typename Matrix >
    void printMatrixInfo( const Matrix& matrix,
                          std::ostream& str )
    
        str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
    
        str << " Rows: " << matrix.getRows() << std::endl;
        str << " Cols: " << matrix.getColumns() << std::endl;
        str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
    
    }
    
    template< typename Real,
              template< typename, typename, typename > class Matrix,
    
              template< typename, typename, typename, typename > class Vector = Containers::Vector >
    
    Tomáš Oberhuber's avatar
    Tomáš Oberhuber committed
    benchmarkSpMV( Benchmark& benchmark,
    
                   const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector,
    
    Tomáš Oberhuber's avatar
    Tomáš Oberhuber committed
                   const String& inputFileName,
    
       using HostMatrix = Matrix< Real, Devices::Host, int >;
       using CudaMatrix = Matrix< Real, Devices::Cuda, int >;
       using HostVector = Containers::Vector< Real, Devices::Host, int >;
       using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
    
       HostMatrix hostMatrix;
    
       CudaMatrix cudaMatrix;
    
       MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR );
    
       benchmark.setMetadataColumns( Benchmark::MetadataColumns({
    
             { "matrix name", convertToString( inputFileName ) },
    
             { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
             { "rows", convertToString( hostMatrix.getRows() ) },
             { "columns", convertToString( hostMatrix.getColumns() ) },
    
             { "matrix format", MatrixInfo< HostMatrix >::getFormat() }
    
       const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
       const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
       benchmark.setOperation( datasetSize );
    
       /***
        * Benchmark SpMV on host
        */
       HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() );
    
       auto resetHostVectors = [&]() {
    
          hostInVector = 1.0;
          hostOutVector = 0.0;
    
       auto spmvHost = [&]() {
    
          hostMatrix.vectorProduct( hostInVector, hostOutVector );
    
       SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector );
       benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults );
    
       /***
        * Benchmark SpMV on CUDA
        */
    
       cudaMatrix = hostMatrix;
       CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() );
    
       auto resetCudaVectors = [&]() {
          cudaInVector = 1.0;
          cudaOutVector = 0.0;
       };
    
       auto spmvCuda = [&]() {
          cudaMatrix.vectorProduct( cudaInVector, cudaOutVector );
       };
       SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector );
       benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults );
    
    }
    
    template< typename Real = double,
              typename Index = int >
    
    Tomáš Oberhuber's avatar
    Tomáš Oberhuber committed
    benchmarkSpmvSynthetic( Benchmark& benchmark,
    
                            const String& inputFileName,
                            bool verboseMR )
    
       using CSRHostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >;
       using CSRCudaMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >;
       using HostVector = Containers::Vector< Real, Devices::Host, int >;
       using CudaVector = Containers::Vector< Real, Devices::Cuda, int >;
    
       CSRHostMatrix csrHostMatrix;
       CSRCudaMatrix csrCudaMatrix;
    
       ////
       // Set-up benchmark datasize
       //
       MatrixReader< CSRHostMatrix >::readMtxFile( inputFileName, csrHostMatrix, verboseMR );
       const int elements = csrHostMatrix.getNumberOfNonzeroMatrixElements();
       const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
       benchmark.setOperation( datasetSize );
    
       ////
       // Perform benchmark on host with CSR as a reference CPU format
       //
       benchmark.setMetadataColumns( Benchmark::MetadataColumns({
    
             { "matrix name", convertToString( inputFileName ) },
    
             { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
             { "rows", convertToString( csrHostMatrix.getRows() ) },
             { "columns", convertToString( csrHostMatrix.getColumns() ) },
             { "matrix format", String( "CSR" ) }
          } ));
    
       HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() );
    
       auto resetHostVectors = [&]() {
          hostInVector = 1.0;
          hostOutVector == 0.0;
       };
    
       auto spmvCSRHost = [&]() {
           csrHostMatrix.vectorProduct( hostInVector, hostOutVector );
       };
    
       benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost );
    
       ////
       // Perform benchmark on CUDA device with cuSparse as a reference GPU format
       //
    
    #ifdef HAVE_CUDA
    
       benchmark.setMetadataColumns( Benchmark::MetadataColumns({
    
             { "matrix name", convertToString( inputFileName ) },
    
             { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) },
             { "rows", convertToString( csrHostMatrix.getRows() ) },
             { "columns", convertToString( csrHostMatrix.getColumns() ) },
             { "matrix format", String( "cuSparse" ) }
          } ));
    
    
       cusparseHandle_t cusparseHandle;
       cusparseCreate( &cusparseHandle );
    
    
       csrCudaMatrix = csrHostMatrix;
    
    
       // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
    
       csrHostMatrix.reset();
    
       TNL::CusparseCSR< Real > cusparseMatrix;
       cusparseMatrix.init( csrCudaMatrix, &cusparseHandle );
    
       CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() );
    
       auto resetCusparseVectors = [&]() {
          cusparseInVector = 1.0;
          cusparseOutVector == 0.0;
       };
    
       auto spmvCusparse = [&]() {
           cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector );
       };
    
       benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
    
       benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar    >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector    >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light     >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive  >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Stream    >( benchmark, hostOutVector, inputFileName, verboseMR );
    
       benchmarkSpMV< Real, SparseMatrix_CSR                 >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, Matrices::Legacy::Ellpack        >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrix_Ellpack             >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SlicedEllpackAlias               >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, SparseMatrix_SlicedEllpack       >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
       benchmarkSpMV< Real, Matrices::Legacy::BiEllpack      >( benchmark, hostOutVector, inputFileName, verboseMR );
       /* AdEllpack is broken
       benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
        */
    
    } // namespace SpMVLegacy
    
    } // namespace Benchmarks
    } // namespace TNL