Commit 5f62fb7f authored by Lukas Cejka's avatar Lukas Cejka
Browse files

Implemented rought version of SpMV Benchmark for mtx files.

parent 7deae114
Loading
Loading
Loading
Loading
+75 −105
Original line number Diff line number Diff line
@@ -31,65 +31,20 @@ template< typename Real, typename Device, typename Index >
using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;

template< typename Matrix >
int setHostTestMatrix( Matrix& matrix,
                       const int elementsPerRow )
void printMatrixInfo( const String& inputFileName,
                      const Matrix& matrix,
                      std::ostream& str )
{
   const int size = matrix.getRows();
   int elements( 0 );
   for( int row = 0; row < size; row++ ) {
      int col = row - elementsPerRow / 2;
      for( int element = 0; element < elementsPerRow; element++ ) {
         if( col + element >= 0 &&
            col + element < size )
         {
            matrix.setElement( row, col + element, element + 1 );
            elements++;
         }
      }
   }
   return elements;
}

#ifdef HAVE_CUDA
template< typename Matrix >
__global__ void setCudaTestMatrixKernel( Matrix* matrix,
                                         const int elementsPerRow,
                                         const int gridIdx )
{
   const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   if( rowIdx >= matrix->getRows() )
      return;
   int col = rowIdx - elementsPerRow / 2;
   for( int element = 0; element < elementsPerRow; element++ ) {
      if( col + element >= 0 &&
         col + element < matrix->getColumns() )
         matrix->setElementFast( rowIdx, col + element, element + 1 );
    // Get only the name of the format from getType().
    std::string mtrxFullType = matrix.getType();
    std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
    std::string type = mtrxType.substr(mtrxType.find(':') + 2);
    
    str << "\n Format: " << type << std::endl;
    str << " Rows: " << matrix.getRows() << std::endl;
    str << " Cols: " << matrix.getColumns() << std::endl;
    str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
}
}
#endif

template< typename Matrix >
void setCudaTestMatrix( Matrix& matrix,
                        const int elementsPerRow )
{
#ifdef HAVE_CUDA
   typedef typename Matrix::IndexType IndexType;
   typedef typename Matrix::RealType RealType;
   Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
   dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
      if( gridIdx == cudaGrids - 1 )
         cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
      setCudaTestMatrixKernel< Matrix >
         <<< cudaGridSize, cudaBlockSize >>>
         ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
        TNL_CHECK_CUDA_DEVICE;
   }
#endif
}


// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template< typename Real,
@@ -109,21 +64,37 @@ benchmarkSpMV( Benchmark & benchmark,
    HostVector hostVector, hostVector2;
    CudaVector deviceVector, deviceVector2;
    
    try
      {
         if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
         {
            std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
    else
            return false;
         }
      }
      catch( std::bad_alloc )
      {
         std::cerr << "Not enough memory to read the matrix." << std::endl;
         return false;
      }
    printMatrixInfo( inputFileName, hostMatrix, std::cout );
#ifdef HAVE_CUDA
        if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) )
            std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
    // FIXME: This doesn't work for ChunkedEllpack, because
    //        its cross-device assignment is not implemented yet.
    deviceMatrix = hostMatrix;
#endif

    benchmark.setMetadataColumns( Benchmark::MetadataColumns({
          { "rows", convertToString( hostMatrix.getRows() ) },
          { "columns", convertToString( hostMatrix.getColumns() ) }
       } ));

    hostVector.setSize( hostMatrix.getColumns() );
    hostVector2.setSize( hostMatrix.getRows() );

#ifdef HAVE_CUDA
        deviceVector.setSize( deviceMatrix.getColumns() );
        deviceVector2.setSize( deviceMatrix.getRows() );
    deviceVector.setSize( hostMatrix.getColumns() );
    deviceVector2.setSize( hostMatrix.getRows() );
#endif

    // reset function
@@ -155,7 +126,6 @@ benchmarkSpMV( Benchmark & benchmark,
 #endif
    return true;
}
}

template< typename Real = double,
          typename Index = int >
@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
   bool result = true;
   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
//   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow );
//   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
//   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow );
   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
//   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
   return result;
}

+10 −31
Original line number Diff line number Diff line
@@ -43,41 +43,25 @@ runSpMVBenchmarks( Benchmark & benchmark,
                   Benchmark::MetadataMap metadata,
                   const String & inputFileName )
{
   // DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)
    
    typedef Matrices::CSR< Real, Devices::Host, int > CSRType;
    CSRType csrMatrix;
    
    if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) )
        std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
    else
    {
        const std::size_t rows = csrMatrix.getRows();
        const std::size_t cols = csrMatrix.getColumns();
    const String precision = getType< Real >();
    metadata["precision"] = precision;

    // Sparse matrix-vector multiplication
    benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
                            metadata );
        benchmark.setMetadataColumns( Benchmark::MetadataColumns({
              { "rows", convertToString( rows ) },
              { "columns", convertToString( cols ) }
           } ));
    benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
}
}

void
setupConfig( Config::ConfigDescription & config )
{
   config.addDelimiter( "Benchmark settings:" );
   config.addRequiredEntry< String >( "input-file", "Input file name." );
   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
   config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log");
   config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
   config.addEntryEnum( "append" );
   config.addEntryEnum( "overwrite" );
   config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
   config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" );
   config.addEntryEnum( "float" );
   config.addEntryEnum( "double" );
   config.addEntryEnum( "all" );
@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
   const String & logFileName = parameters.getParameter< String >( "log-file" );
   const String & outputMode = parameters.getParameter< String >( "output-mode" );
   const String & precision = parameters.getParameter< String >( "precision" );
   // FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
   // which have a default value. The workaround below works for int values, but it is not possible
   // to pass 64-bit integer values
//   const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
//   const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
   const int loops = parameters.getParameter< int >( "loops" );
   const int verbose = parameters.getParameter< int >( "verbose" );

@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
      return EXIT_FAILURE;
   }

   std::cout << "== BENCHMARK FINISHED ==" << std::endl;
   std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
   return EXIT_SUCCESS;
}