Implemented rought version of SpMV Benchmark for mtx files.

2ebb1334 · Lukas Cejka · Tomáš Oberhuber · aa627012 · 2ebb1334 · 2ebb1334
Commit 2ebb1334 authored 6 years ago by Lukas Cejka Committed by Tomáš Oberhuber 5 years ago
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
 using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;

 template< typename Matrix >
-int setHostTestMatrix( Matrix& matrix,
-                       const int elementsPerRow )
+void printMatrixInfo( const String& inputFileName,
+                      const Matrix& matrix,
+                      std::ostream& str )
 {
-   const int size = matrix.getRows();
-   int elements( 0 );
-   for( int row = 0; row < size; row++ ) {
-      int col = row - elementsPerRow / 2;
-      for( int element = 0; element < elementsPerRow; element++ ) {
-         if( col + element >= 0 &&
-            col + element < size )
-         {
-            matrix.setElement( row, col + element, element + 1 );
-            elements++;
-         }
-      }
-   }
-   return elements;
-}
-
-#ifdef HAVE_CUDA
-template< typename Matrix >
-__global__ void setCudaTestMatrixKernel( Matrix* matrix,
-                                         const int elementsPerRow,
-                                         const int gridIdx )
-{
-   const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx >= matrix->getRows() )
-      return;
-   int col = rowIdx - elementsPerRow / 2;
-   for( int element = 0; element < elementsPerRow; element++ ) {
-      if( col + element >= 0 &&
-         col + element < matrix->getColumns() )
-         matrix->setElementFast( rowIdx, col + element, element + 1 );
-   }
-}
-#endif
-
-template< typename Matrix >
-void setCudaTestMatrix( Matrix& matrix,
-                        const int elementsPerRow )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   typedef typename Matrix::RealType RealType;
-   Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
-      setCudaTestMatrixKernel< Matrix >
-         <<< cudaGridSize, cudaBlockSize >>>
-         ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
-        TNL_CHECK_CUDA_DEVICE;
-   }
-#endif
+    // Get only the name of the format from getType().
+    std::string mtrxFullType = matrix.getType();
+    std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
+    std::string type = mtrxType.substr(mtrxType.find(':') + 2);
+    
+    str << "\n Format: " << type << std::endl;
+    str << " Rows: " << matrix.getRows() << std::endl;
+    str << " Cols: " << matrix.getColumns() << std::endl;
+    str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
 }

-
 // TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
 template< typename Real,
          template< typename, typename, typename > class Matrix,
@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
    HostVector hostVector, hostVector2;
    CudaVector deviceVector, deviceVector2;
    
-    if( ! MatrixReader< HostMatrix >::readMtxFile(inputFileName, hostMatrix ) )
-        std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
-    else
-    {
-    #ifdef HAVE_CUDA
-        if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) )
+    try
+      {
+         if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
+         {
            std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
-    #endif
-
-        hostVector.setSize( hostMatrix.getColumns() );
-        hostVector2.setSize( hostMatrix.getRows() );
-
-    #ifdef HAVE_CUDA
-        deviceVector.setSize( deviceMatrix.getColumns() );
-        deviceVector2.setSize( deviceMatrix.getRows() );
-    #endif
-
-        // reset function
-        auto reset = [&]() {
-           hostVector.setValue( 1.0 );
-           hostVector2.setValue( 0.0 );
-     #ifdef HAVE_CUDA
-           deviceVector.setValue( 1.0 );
-           deviceVector2.setValue( 0.0 );
-     #endif
-        };
-        
-        const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
-        
-        const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
-        
-        // compute functions
-        auto spmvHost = [&]() {
-           hostMatrix.vectorProduct( hostVector, hostVector2 );
-        };
-        auto spmvCuda = [&]() {
-           deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
-        };
-
-        benchmark.setOperation( datasetSize );
-        benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
-     #ifdef HAVE_CUDA
-        benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
-     #endif
-        return true;
-    }
+            return false;
+         }
+      }
+      catch( std::bad_alloc )
+      {
+         std::cerr << "Not enough memory to read the matrix." << std::endl;
+         return false;
+      }
+    printMatrixInfo( inputFileName, hostMatrix, std::cout );
+#ifdef HAVE_CUDA
+    // FIXME: This doesn't work for ChunkedEllpack, because
+    //        its cross-device assignment is not implemented yet.
+    deviceMatrix = hostMatrix;
+#endif
+
+    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
+          { "rows", convertToString( hostMatrix.getRows() ) },
+          { "columns", convertToString( hostMatrix.getColumns() ) }
+       } ));
+
+    hostVector.setSize( hostMatrix.getColumns() );
+    hostVector2.setSize( hostMatrix.getRows() );
+
+#ifdef HAVE_CUDA
+    deviceVector.setSize( hostMatrix.getColumns() );
+    deviceVector2.setSize( hostMatrix.getRows() );
+#endif
+
+    // reset function
+    auto reset = [&]() {
+       hostVector.setValue( 1.0 );
+       hostVector2.setValue( 0.0 );
+ #ifdef HAVE_CUDA
+       deviceVector.setValue( 1.0 );
+       deviceVector2.setValue( 0.0 );
+ #endif
+    };
+
+    const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
+
+    const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
+
+    // compute functions
+    auto spmvHost = [&]() {
+       hostMatrix.vectorProduct( hostVector, hostVector2 );
+    };
+    auto spmvCuda = [&]() {
+       deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
+    };
+
+    benchmark.setOperation( datasetSize );
+    benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
+ #ifdef HAVE_CUDA
+    benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
+ #endif
+    return true;
 }

 template< typename Real = double,
@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
   bool result = true;
   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
-//   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow );
-//   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
-//   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow );
+   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
+   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
+//   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
   return result;
 }


--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
                   Benchmark::MetadataMap metadata,
                   const String & inputFileName )
 {
-   // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)
-    
-    typedef Matrices::CSR< Real, Devices::Host, int > CSRType;
-    CSRType csrMatrix;
-    
-    if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) )
-        std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
-    else
-    {
-        const std::size_t rows = csrMatrix.getRows();
-        const std::size_t cols = csrMatrix.getColumns();
-        const String precision = getType< Real >();
-        metadata["precision"] = precision;
-
-        // Sparse matrix-vector multiplication
-        benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
-                                metadata );
-        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
-              { "rows", convertToString( rows ) },
-              { "columns", convertToString( cols ) }
-           } ));
-        benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
-    }
+    const String precision = getType< Real >();
+    metadata["precision"] = precision;
+
+    // Sparse matrix-vector multiplication
+    benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
+                            metadata );
+    benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
 }

 void
@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
 {
   config.addDelimiter( "Benchmark settings:" );
   config.addRequiredEntry< String >( "input-file", "Input file name." );
-   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
+   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log");
   config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
   config.addEntryEnum( "append" );
   config.addEntryEnum( "overwrite" );
-   config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
+   config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" );
   config.addEntryEnum( "float" );
   config.addEntryEnum( "double" );
   config.addEntryEnum( "all" );
@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
   const String & logFileName = parameters.getParameter< String >( "log-file" );
   const String & outputMode = parameters.getParameter< String >( "output-mode" );
   const String & precision = parameters.getParameter< String >( "precision" );
-   // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
-   // which have a default value. The workaround below works for int values, but it is not possible
-   // to pass 64-bit integer values
-//   const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
-//   const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
   const int loops = parameters.getParameter< int >( "loops" );
   const int verbose = parameters.getParameter< int >( "verbose" );

@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
      return EXIT_FAILURE;
   }

-   std::cout << "== BENCHMARK FINISHED ==" << std::endl;
+   std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
   return EXIT_SUCCESS;
 }