Tomáš Oberhuber
--- a/src/Benchmarks/SpMV/spmv.h

+ 40

− 40
+++ b/src/Benchmarks/SpMV/spmv.h

+ 40

− 40
 @@ -40,11 +40,11 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
 std::string getMatrixFileName( const String& InputFileName )
 {
    std::string fileName = InputFileName;
-    
+
    const size_t last_slash_idx = fileName.find_last_of( "/\\" );
    if( std::string::npos != last_slash_idx )
        fileName.erase( 0, last_slash_idx + 1 );
-    
+
    return fileName;
 }

 @@ -55,7 +55,7 @@ std::string getMatrixFormat( const Matrix& matrix )
    std::string mtrxFullType = getType( matrix );
    std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
    std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
-    
+
    return format;
 }

 @@ -63,7 +63,7 @@ std::string getMatrixFormat( const Matrix& matrix )
 template< typename Matrix >
 void printMatrixInfo( const Matrix& matrix,
                      std::ostream& str )
-{    
+{
    str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
    str << " Rows: " << matrix.getRows() << std::endl;
    str << " Cols: " << matrix.getColumns() << std::endl;
 @@ -81,67 +81,67 @@ benchmarkSpMV( Benchmark& benchmark,
    // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function
    typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix;
    typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
-    
+
    CSR_HostMatrix CSRhostMatrix;
    CSR_DeviceMatrix CSRdeviceMatrix;
-    
+
    // Read the matrix for CSR, to set up cuSPARSE
    try
-      {         
+      {
         if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
-         { 
+         {
             throw std::bad_alloc();
             return false;
         }
      }
-      catch( std::bad_alloc e )
+      catch( std::bad_alloc& e )
      {
          e.what();
          return false;
      }
-    
+
 #ifdef HAVE_CUDA
    // cuSPARSE handle setup
    cusparseHandle_t cusparseHandle;
    cusparseCreate( &cusparseHandle );
-    
+
    // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device
    CSRdeviceMatrix = CSRhostMatrix;
-    
+
    // Delete the CSRhostMatrix, so it doesn't take up unnecessary space
    CSRhostMatrix.reset();
-    
+
    // Initialize the cusparseCSR matrix.
    TNL::CusparseCSR< Real > cusparseCSR;
    cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
 #endif
-    
+
    // Setup the format which is given as a template parameter to this function
    typedef Matrix< Real, Devices::Host, int > HostMatrix;
    typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
    typedef Containers::Vector< Real, Devices::Host, int > HostVector;
    typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
-    
+
    HostMatrix hostMatrix;
    DeviceMatrix deviceMatrix;
    HostVector hostVector, hostVector2;
    CudaVector deviceVector, deviceVector2;
-    
+
    // Load the format
    try
-      {         
+      {
         if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
         {
             throw std::bad_alloc();
             return false;
         }
      }
-      catch( std::bad_alloc e )
+      catch( std::bad_alloc& e )
      {
          e.what();
          return false;
      }
-    
+

    // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
    //  because we need the matrix loaded first to get the rows and columns
 @@ -191,17 +191,17 @@ benchmarkSpMV( Benchmark& benchmark,

    benchmark.setOperation( datasetSize );
    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
-    
+
    // Initialize the host vector to be compared.
    //  (The values in hostVector2 will be reset when spmvCuda starts)
    HostVector resultHostVector2;
    resultHostVector2.setSize( hostVector2.getSize() );
    resultHostVector2.setValue( 0.0 );
-    
+
    // Copy the values
    resultHostVector2 = hostVector2;
-    
-    // Setup cuSPARSE MetaData, since it has the same header as CSR, 
+
+    // Setup cuSPARSE MetaData, since it has the same header as CSR,
    //  and therefore will not get its own headers (rows, cols, speedup etc.) in log.
    //      * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
 @@ -211,7 +211,7 @@ benchmarkSpMV( Benchmark& benchmark,
          { "columns", convertToString( hostMatrix.getColumns() ) },
          { "matrix format", convertToString( "CSR-cuSPARSE" ) }
       } ));
-    
+
 #ifdef HAVE_CUDA
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );

 @@ -220,51 +220,51 @@ benchmarkSpMV( Benchmark& benchmark,
    HostVector resultDeviceVector2;
    resultDeviceVector2.setSize( deviceVector2.getSize() );
    resultDeviceVector2.setValue( 0.0 );
-    
+
    resultDeviceVector2 = deviceVector2;
-    
+
    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );
-    
+
    HostVector resultcuSPARSEDeviceVector2;
    resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() );
    resultcuSPARSEDeviceVector2.setValue( 0.0 );
-    
+
    resultcuSPARSEDeviceVector2 = deviceVector2;
-    
+
    // Difference between GPU (curent format) and GPU-cuSPARSE results
    //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
    Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
    //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
    Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 );
-    
+
    std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax );
    std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm );
-    
+
    char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
    char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];
-    
-    
+
+
    // Difference between CPU and GPU results for the current format
    //Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
    Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) );
    //Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
    Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 );
-    
+
    std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax );
    std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm );
-    
+
    char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ];
    char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ];
-    
+
    // Print result differences of CPU and GPU of current format
    std::cout << CPUxGPU_absMax << std::endl;
    std::cout << CPUxGPU_lpNorm << std::endl;
-    
+
    // Print result differences of GPU of current format and GPU with cuSPARSE.
    std::cout << GPUcuSparse_absMax << std::endl;
    std::cout << GPUcuSparse_lpNorm << std::endl;
 #endif
-    
+
    std::cout << std::endl;
    return true;
 }
 @@ -277,11 +277,11 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
                        bool verboseMR )
 {
   bool result = true;
-   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );   
+   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
-   
+
   // AdEllpack is broken
 //   result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
   result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );