Commit 0c6f26e5 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Programming SPMV benchmark.

parent 8b80f8f3
Loading
Loading
Loading
Loading
+105 −50
Original line number Diff line number Diff line
@@ -130,7 +130,7 @@ double computeThroughput( const long int nonzeroElements,

template< typename Matrix,
          typename Vector >
void benchmarkHostMatrix( const Matrix& matrix,
void benchmarkMatrix( const Matrix& matrix,
                      const Vector& x,
                      Vector& b,
                      const long int nonzeroElements,
@@ -160,6 +160,12 @@ void benchmarkHostMatrix( const Matrix& matrix,
   logFile << "  " << throughput << endl;
}

void writeTestFailed( fstream& logFile )
{
   logFile << "N/A" << endl;
   logFile << "N/A" << endl;
}

template< typename Real >
bool setupBenchmark( const tnlParameterContainer& parameters )
{
@@ -213,7 +219,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
      rowLengthsCuda.setSize( csrMatrix.getRows() );
      rowLengthsCuda = rowLengthsHost;
#endif
      benchmarkHostMatrix( csrMatrix,
      benchmarkMatrix( csrMatrix,
                       hostX,
                       hostB,
                       nonzeroElements,
@@ -226,7 +232,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
      typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType;
      EllpackMatrixType ellpackMatrix;
      ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
      benchmarkHostMatrix( ellpackMatrix,
      benchmarkMatrix( ellpackMatrix,
                       hostX,
                       hostB,
                       nonzeroElements,
@@ -241,10 +247,12 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
      if( ! cudaEllpackMatrix.copyFrom( ellpackMatrix, rowLengthsCuda ) )
      {
         cerr << "I am not able to transfer the matrix on GPU." << endl;
         return false;
         writeTestFailed( logFile );
      }
      else
      {
         cout << " done." << endl;
      benchmarkHostMatrix( cudaEllpackMatrix,
         benchmarkMatrix( cudaEllpackMatrix,
                          cudaX,
                          cudaB,
                          nonzeroElements,
@@ -252,6 +260,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
                          stopTime,
                          verbose,
                          logFile );
      }
      cudaEllpackMatrix.reset();
#endif
      ellpackMatrix.reset();
@@ -259,7 +268,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
      typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType;
      SlicedEllpackMatrixType slicedEllpackMatrix;
      slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
      benchmarkHostMatrix( slicedEllpackMatrix,
      benchmarkMatrix( slicedEllpackMatrix,
                       hostX,
                       hostB,
                       nonzeroElements,
@@ -267,12 +276,35 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
                       stopTime,
                       verbose,
                       logFile );
#ifdef HAVE_CUDA
      typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType;
      SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix;
      cout << "Copying matrix to GPU... ";
      if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) )
      {
         cerr << "I am not able to transfer the matrix on GPU." << endl;
         writeTestFailed( logFile );
      }
      else
      {
         cout << " done." << endl;
         benchmarkMatrix( cudaSlicedEllpackMatrix,
                          cudaX,
                          cudaB,
                          nonzeroElements,
                          "SlicedEllpack Cuda",
                          stopTime,
                          verbose,
                          logFile );
      }
      cudaSlicedEllpackMatrix.reset();
#endif
      slicedEllpackMatrix.reset();

      typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType;
      ChunkedEllpackMatrixType chunkedEllpackMatrix;
      chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost );
      benchmarkHostMatrix( chunkedEllpackMatrix,
      benchmarkMatrix( chunkedEllpackMatrix,
                       hostX,
                       hostB,
                       nonzeroElements,
@@ -280,6 +312,29 @@ bool setupBenchmark( const tnlParameterContainer& parameters )
                       stopTime,
                       verbose,
                       logFile );
#ifdef HAVE_CUDA
      typedef tnlChunkedEllpackMatrix< Real, tnlCuda, int > ChunkedEllpackMatrixCudaType;
      ChunkedEllpackMatrixCudaType cudaChunkedEllpackMatrix;
      cout << "Copying matrix to GPU... ";
      if( ! cudaChunkedEllpackMatrix.copyFrom( chunkedEllpackMatrix, rowLengthsCuda ) )
      {
         cerr << "I am not able to transfer the matrix on GPU." << endl;
         writeTestFailed( logFile );
      }
      else
      {
         cout << " done." << endl;
         benchmarkMatrix( cudaChunkedEllpackMatrix,
                          cudaX,
                          cudaB,
                          nonzeroElements,
                          "ChunkedEllpack Cuda",
                          stopTime,
                          verbose,
                          logFile );
      }
      cudaChunkedEllpackMatrix.reset();
#endif
      chunkedEllpackMatrix.reset();
   }
}