Skip to content
Snippets Groups Projects
spmv.h 18.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • /***************************************************************************
                              spmv.h  -  description
                                 -------------------
        begin                : Dec 30, 2015
        copyright            : (C) 2015 by Tomas Oberhuber et al.
        email                : tomas.oberhuber@fjfi.cvut.cz
     ***************************************************************************/
    
    /* See Copyright Notice in tnl/Copyright */
    
    // Implemented by: Jakub Klinkovsky
    
    #pragma once
    
    #include "../Benchmarks.h"
    
    #include <TNL/Pointers/DevicePointer.h>
    #include <TNL/Matrices/CSR.h>
    #include <TNL/Matrices/Ellpack.h>
    #include <TNL/Matrices/SlicedEllpack.h>
    #include <TNL/Matrices/ChunkedEllpack.h>
    
    
    #include <TNL/Matrices/MatrixReader.h>
    using namespace TNL::Matrices;
    
    #include <cusparse.h>
    #include "cusparseCSRMatrix.h"
    using namespace TNL;
    
    
    namespace TNL {
    namespace Benchmarks {
    
    // silly alias to match the number of template parameters with other formats
    template< typename Real, typename Device, typename Index >
    using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;
    
    
    // Get only the name of the format from getType()
    
    std::string getMatrixFormat( const Matrix& matrix )
    
        std::string mtrxFullType = matrix.getType();
        std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
    
        std::string format = mtrxType.substr(mtrxType.find(':') + 2);
    
        return format;
    }
    
    template< typename Matrix >
    void printMatrixInfo( const Matrix& matrix,
                          std::ostream& str )
    {    
        str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
    
        str << " Rows: " << matrix.getRows() << std::endl;
        str << " Cols: " << matrix.getColumns() << std::endl;
        str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
    
    }
    
    template< typename Real,
              template< typename, typename, typename > class Matrix,
              template< typename, typename, typename > class Vector = Containers::Vector >
    bool
    benchmarkSpMV( Benchmark & benchmark,
    
        typedef Matrix< Real, Devices::Host, int > HostMatrix;
        typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
        typedef Containers::Vector< Real, Devices::Host, int > HostVector;
        typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
        
        HostMatrix hostMatrix;
        DeviceMatrix deviceMatrix;
        HostVector hostVector, hostVector2;
        CudaVector deviceVector, deviceVector2;
        
    
             // Start a buffer to capture the output of MatrixReader
             std::stringstream buffer;
    
             std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
    
             if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
             {
    
                // Capture the original output of MatrixReader, so it isn't printed by console.
                std::string errorMsgBuffer = buffer.str();
                // Reset the buffer
                std::cerr.rdbuf( old );
    
                // WHY DID I CAPTURE THE ERROR MESSAGE ONLY TO RUN MatrixReader again? Use the above capture to print into log and console?
                
    
                std::string matrixFormat = getMatrixFormat( hostMatrix );
                
    
                //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
                std::stringstream buffer;
    
                std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
    
    
                MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix );
    
                errorMsgBuffer = buffer.str();
                
                // Reset the buffer
                std::cerr.rdbuf( old );
                
                std::string stringErrorMsg = "Benchmark failed: Unable to read the matrix.\n"
                                             "matrix format: " + matrixFormat +
                                             "\nFailed to read the matrix file " + 
    
                                             ( std::string )inputFileName + ".\n" + 
    
                                             errorMsgBuffer;
                
                //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
                if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
                    stringErrorMsg.erase( stringErrorMsg.length() - 1 );
                
                // https://stackoverflow.com/questions/7352099/stdstring-to-char
    
                char* errorMsg = &stringErrorMsg[ 0u ];
    
                // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
                //        a prefix in the log file. 
                //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
                //         and you'll see)
                benchmark.addErrorMessage( errorMsg, 1 );
                
                std::cout << std::endl;
    
             std::string matrixFormat = getMatrixFormat( hostMatrix );
             
    
             //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
             std::stringstream buffer;
             std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
    
             MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix );
    
             std::string errorMsgBuffer = buffer.str();
             
             // Reset the buffer
             std::cerr.rdbuf( old );
              
             std::string stringErrorMsg = "Benchmark failed: Not enough memory.\n"
    
                                          "matrix format: " + matrixFormat + 
    
                                          "\nFailed to allocate memory to read the matrix file " +
                                          ( std::string )inputFileName + ".\n" + 
                                          errorMsgBuffer;
             
             //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
             if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
                    stringErrorMsg.erase( stringErrorMsg.length() - 1 );
    
             // https://stackoverflow.com/questions/7352099/stdstring-to-char
    
             char *errorMsg = &stringErrorMsg[ 0u ];
    
             // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
             //        a prefix in the log file. 
             //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
             //         and you'll see)
             benchmark.addErrorMessage( errorMsg, 1 );
             
             std::cout << std::endl;
             
    
        // printMatrixInfo is redundant, because all the information is in the Benchmark's MetadataColumns
    
    //    printMatrixInfo( hostMatrix, std::cout );
    
    #ifdef HAVE_CUDA
        // FIXME: This doesn't work for ChunkedEllpack, because
    
        //        its cross-device assignment is not implemented yet
    
        deviceMatrix = hostMatrix;
    #endif
    
        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
    
              { "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) },
              { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
    
              { "rows", convertToString( hostMatrix.getRows() ) },
              { "columns", convertToString( hostMatrix.getColumns() ) }
           } ));
    
        hostVector.setSize( hostMatrix.getColumns() );
        hostVector2.setSize( hostMatrix.getRows() );
    
    #ifdef HAVE_CUDA
        deviceVector.setSize( hostMatrix.getColumns() );
        deviceVector2.setSize( hostMatrix.getRows() );
    #endif
    
        // reset function
        auto reset = [&]() {
           hostVector.setValue( 1.0 );
           hostVector2.setValue( 0.0 );
     #ifdef HAVE_CUDA
           deviceVector.setValue( 1.0 );
           deviceVector2.setValue( 0.0 );
     #endif
        };
    
        const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
    
        const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    
        // compute functions
        auto spmvHost = [&]() {
           hostMatrix.vectorProduct( hostVector, hostVector2 );
        };
        auto spmvCuda = [&]() {
           deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
        };
    
        benchmark.setOperation( datasetSize );
        benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
    
        
        // Initialize the host vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
        HostVector resultHostVector2;
        resultHostVector2.setSize( hostVector2.getSize() );
        resultHostVector2.setValue( 0.0 );
        // Copy the values
        for( int i = 0; i < hostVector2.getSize(); i++ )
            resultHostVector2.setElement( i, hostVector2.getElement( i ) );
        
     #ifdef HAVE_CUDA
        benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
     #endif
    
        // Setup the device vector to be compared
        HostVector resultDeviceVector2;
        resultDeviceVector2.setSize( hostVector2.getSize() );
        resultDeviceVector2.setValue( 0.0 );
        
    //    resultDeviceVector2 += deviceVector2; // Throws a segfault.
        
        // Copy the values
        for( int i = 0; i < deviceVector2.getSize(); i++ )
            resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
        
        Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
        Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
        
        std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
        std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
        
        char *absMax = &resultDifferenceAbsMax[ 0u ];
        char *lpNorm = &resultDifferenceLpNorm[ 0u ];
        
        // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
    //    benchmark.addErrorMessage( absMax, 1 );
    //    benchmark.addErrorMessage( lpNorm, 1 );
        
        std::cout << std::endl;
        return true;
    }
    
    // Compares only CSR on GPU and Cusparse on GPU.
    template< typename Real,
              template< typename, typename, typename > class Vector = Containers::Vector >
    bool
    benchmarkCusparseSpMV( Benchmark & benchmark,
                   const String & inputFileName )
    {    
        typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix;
        typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
        typedef Containers::Vector< Real, Devices::Host, int > HostVector;
        typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
        
        CSR_HostMatrix CSRhostMatrix;
        CSR_DeviceMatrix CSRdeviceMatrix;
        CudaVector deviceVector, deviceVector2;
        
        try
          {
             // Start a buffer to capture the output of MatrixReader
             std::stringstream buffer;
             std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
             
             if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix ) )
             {
                // Capture the original output of MatrixReader, so it isn't printed by console.
                std::string errorMsgBuffer = buffer.str();
                // Reset the buffer
                std::cerr.rdbuf( old );
                
                 
                std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
                
                //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
                std::stringstream buffer;
                std::streambuf * old = std::cerr.rdbuf( buffer.rdbuf() );
    
                MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
    
                errorMsgBuffer = buffer.str();
                
                // Reset the buffer
                std::cerr.rdbuf( old );
                
                std::string stringErrorMsg = "Benchmark failed: Unable to read the matrix.\n"
                                             "matrix format: " + matrixFormat +
                                             "\nFailed to read the matrix file " + 
                                             ( std::string )inputFileName + ".\n" + 
                                             errorMsgBuffer;
                
                //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
                if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
                    stringErrorMsg.erase( stringErrorMsg.length() - 1 );
                
                // https://stackoverflow.com/questions/7352099/stdstring-to-char
                char* errorMsg = &stringErrorMsg[ 0u ];
                
                
                // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
                //        a prefix in the log file. 
                //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
                //         and you'll see)
                benchmark.addErrorMessage( errorMsg, 1 );
                
                std::cout << std::endl;
                
                return false;
             }
             std::cerr.rdbuf( old );
          }
          catch( std::bad_alloc )
          {
             std::string matrixFormat = getMatrixFormat( CSRhostMatrix );
             
             //https://stackoverflow.com/questions/5419356/redirect-stdout-stderr-to-a-string
             std::stringstream buffer;
             std::streambuf * old = std::cerr.rdbuf(buffer.rdbuf());
    
             MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix );
    
             std::string errorMsgBuffer = buffer.str();
             
             // Reset the buffer
             std::cerr.rdbuf( old );
              
             std::string stringErrorMsg = "Benchmark failed: Not enough memory.\n"
                                          "matrix format: " + matrixFormat + 
                                          "\nFailed to allocate memory to read the matrix file " +
                                          ( std::string )inputFileName + ".\n" + 
                                          errorMsgBuffer;
             
             //https://stackoverflow.com/questions/1488775/c-remove-new-line-from-multiline-string
             if ( ! stringErrorMsg.empty() && stringErrorMsg[ stringErrorMsg.length() - 1 ] == '\n' )
                    stringErrorMsg.erase( stringErrorMsg.length() - 1 );
             
             // https://stackoverflow.com/questions/7352099/stdstring-to-char
             char *errorMsg = &stringErrorMsg[ 0u ];
             
             // FIXME: Every other benchmark, the errorMsg doesn't have a "!" as 
             //        a prefix in the log file. 
             //        (Try adding more benchmarks in benchmarkSpmvSynthetic(...) 
             //         and you'll see)
             benchmark.addErrorMessage( errorMsg, 1 );
             
             std::cout << std::endl;
             
             return false;
          }
        
        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
              { "matrix format", convertToString( getMatrixFormat( CSRhostMatrix ) ) },
              { "non-zeros", convertToString( CSRhostMatrix.getNumberOfNonzeroMatrixElements() ) },
              { "rows", convertToString( CSRhostMatrix.getRows() ) },
              { "columns", convertToString( CSRhostMatrix.getColumns() ) }
           } ));
        
        cusparseHandle_t cusparseHandle;
        cusparseCreate( &cusparseHandle );
        
    #ifdef HAVE_CUDA
        // FIXME: This doesn't work for ChunkedEllpack, because
        //        its cross-device assignment is not implemented yet
        CSRdeviceMatrix = CSRhostMatrix;
        
        TNL::CusparseCSR< Real > cusparseCSR;
        cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
    #endif
    
    #ifdef HAVE_CUDA
        deviceVector.setSize( CSRhostMatrix.getColumns() );
        deviceVector2.setSize( CSRhostMatrix.getRows() );
    #endif
    
        // reset function
        auto reset = [&]() {
     #ifdef HAVE_CUDA
           deviceVector.setValue( 1.0 );
           deviceVector2.setValue( 0.0 );
     #endif
        };
    
        const int elements = CSRhostMatrix.getNumberOfNonzeroMatrixElements();
    
        const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
    
        // compute functions
        auto spmvCuda = [&]() {
           CSRdeviceMatrix.vectorProduct( deviceVector, deviceVector2 );
        };
        auto spmvCusparse = [&]() {
            cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
        };
    
        benchmark.setOperation( datasetSize );
        
    
     #ifdef HAVE_CUDA
        benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
    
        
        // Initialize the cuda vector to be compared. (The values in hostVector2 will be reset when spmvCuda starts)
        HostVector resultCusparseVector2;
        resultCusparseVector2.setSize( deviceVector2.getSize() );
        resultCusparseVector2.setValue( 0.0 );
        // Copy the values
        for( int i = 0; i < deviceVector2.getSize(); i++ )
            resultCusparseVector2.setElement( i, deviceVector2.getElement( i ) );
        
        benchmark.time< Devices::Cuda >( reset, "GPU-Cusparse", spmvCusparse );
    
    
        // Setup the device vector to be compared
        HostVector resultDeviceVector2;
        resultDeviceVector2.setSize( resultCusparseVector2.getSize() );
        resultDeviceVector2.setValue( 0.0 );
        
        // Copy the values
        for( int i = 0; i < deviceVector2.getSize(); i++ )
            resultDeviceVector2.setElement( i, deviceVector2.getElement( i ) );
        
        Real differenceAbsMax = resultCusparseVector2.differenceAbsMax( resultDeviceVector2 );
        Real differenceLpNorm = resultCusparseVector2.differenceLpNorm( resultDeviceVector2, 1 );
        
        std::string resultDifferenceAbsMax = "differenceAbsMax = " + std::to_string( differenceAbsMax );
        std::string resultDifferenceLpNorm = "differenceLpNorm = " + std::to_string( differenceLpNorm );
        
        char *absMax = &resultDifferenceAbsMax[ 0u ];
        char *lpNorm = &resultDifferenceLpNorm[ 0u ];
        
        // FIXME: THIS ISN'T AN ELEGANT SOLUTION, IT MAKES THE LOG FILE VERY LONG
    //    benchmark.addErrorMessage( absMax, 1 );
    //    benchmark.addErrorMessage( lpNorm, 1 );
        
    
    }
    
    template< typename Real = double,
              typename Index = int >
    bool
    benchmarkSpmvSynthetic( Benchmark & benchmark,
    
    {
       bool result = true;
       // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
    
       result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
    
       
       // This doesn't have a titles (matrix format, rows, cols, etc.) in the output, because the header is the same as before (CSR).
       result |= benchmarkCusparseSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
       
    
       result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
       result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
    //   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
    
       return result;
    }
    
    } // namespace Benchmarks
    } // namespace TNL