Commit b2334f97 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Refactoring the matrix benchmarks.

parent 999d1ba3
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -47,6 +47,19 @@ Index tnlSparseMatrix< Real, Device, Index >::getNumberOfMatrixElements() const
   return this->values.getSize();
}

template< typename Real,
          typename Device,
          typename Index >
Index tnlSparseMatrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
{
   IndexType nonzeroElements( 0 );
   for( IndexType i = 0; i < this->values.getSize(); i++ )
      if( this->columnIndexes.getElement( i ) != this-> columns &&
          this->values.getElement( i ) != 0.0 )
         nonzeroElements++;
   return nonzeroElements;
}

template< typename Real,
          typename Device,
          typename Index >
+2 −0
Original line number Diff line number Diff line
@@ -39,6 +39,8 @@ class tnlSparseMatrix : public tnlMatrix< Real, Device, Index >

   IndexType getNumberOfMatrixElements() const;

   IndexType getNumberOfNonzeroMatrixElements() const;

   void reset();

   bool save( tnlFile& file ) const;
+11 −11
Original line number Diff line number Diff line
@@ -9,18 +9,18 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h
                              tnlSpmvBenchmarkRgCSRMatrix.h )
     

#IF( BUILD_CUDA )
#    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu )
#    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )        
#ELSE()
#    ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp )
#    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )
#ENDIF()
#TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion}
#                                                              ${CUSPARSE_LIBRARY} )
IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu )
    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )        
ELSE()
    ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp )
    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )
ENDIF()
TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion}
                                                              ${CUSPARSE_LIBRARY} )

#INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt}
#         RUNTIME DESTINATION bin )
INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt}
         RUNTIME DESTINATION bin )


#IF( BUILD_CUDA )
+26 −30
Original line number Diff line number Diff line
@@ -28,12 +28,13 @@
#include <matrices/tnlSlicedEllpackMatrix.h>
#include <matrices/tnlChunkedEllpackMatrix.h>
#include <matrices/tnlCSRMatrix.h>
#include <matrices/tnlMatrixReader.h>
#include <core/mfuncs.h>
#include "tnlSpmvBenchmarkCSRMatrix.h"
#include "tnlSpmvBenchmarkCusparseCSRMatrix.h"
#include "tnlSpmvBenchmark.h"
/*#include "tnlSpmvBenchmarkCusparseCSRMatrix.h"
#include "tnlSpmvBenchmarkHybridMatrix.h"
#include "tnlSpmvBenchmarkRgCSRMatrix.h"
#include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h"
#include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h"*/

#include "tnlConfig.h"
const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg.desc";
@@ -41,6 +42,7 @@ const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg.

using namespace std;

/*
double bestCudaRgCSRGflops( 0 );

template< typename Real >
@@ -100,6 +102,7 @@ void benchmarkRgCSRFormat( const tnlCSRMatrix< Real, tnlHost, int >& csrMatrix,
      cudaRgCsrMatrixBenchmark. tearDown();
   }
}
*/

template< typename RealType >
bool benchmarkMatrix( const tnlParameterContainer& parameters )
@@ -111,6 +114,13 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
   CsrMatrix csrMatrix;

   const tnlString& inputFileName = parameters.GetParameter< tnlString >( "input-file" );
   const tnlString& inputMtxFileName = parameters.GetParameter< tnlString >( "input-mtx-file" );
   const tnlString& logFileName = parameters.GetParameter< tnlString >( "log-file" );
   const tnlString& pdfFileName = parameters.GetParameter< tnlString >( "pdf-file" );
   bool verbose = parameters.GetParameter< bool >( "verbose" );
   const int maxIterations = parameters.GetParameter< tnlString >( "max-iterations" );


   fstream inputFile;
   inputFile.open( inputFileName.getString(), ios::in );
   if( ! inputFile )
@@ -124,18 +134,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
   /****
    * Check the number of the non-zero elements
    */
   const long int nonzeroElements = csrMatrix. checkNonzeroElements();
   if( nonzeroElements != csrMatrix. getNonzeroElements() )
      cerr << "WARNING: The matrix reports " << csrMatrix. getNonzeroElements() << " but actually there are " << nonzeroElements << " non-zero elements." << endl;
   const long int nonzeroElements = csrMatrix. getNumberOfNonzeroMatrixElements();
   if( verbose )
      cout << "Matrix size: " << csrMatrix. getSize()
      cout << "Matrix rows: " << csrMatrix.getRows() 
           << " Matrix columns: " << csrMatrix.getColumns()
           << " Non-zero elements: " << nonzeroElements << endl;

   const long int size = csrMatrix. getSize();
   tnlVector< Real, tnlHost > refX( "ref-x", size ), refB( "ref-b", size);
   tnlVector< Real, tnlCuda > cudaX( "cudaX", size );
   const long int rows = csrMatrix.getRows();
   const long int columns = csrMatrix.getColumns();
   tnlVector< RealType, tnlHost > refX( "ref-x", columns ), refB( "ref-b", rows );
   tnlVector< RealType, tnlCuda > cudaX( "cudaX", columns );
   refX. setValue( 0.0 );
   for( int i = 0; i < size; i ++ )
   for( int i = 0; i < columns; i ++ )
      refX[ i ] = 1.0; //( Real ) i * 1.0 / ( Real ) size;
   cudaX = refX;
   csrMatrix. vectorProduct( refX, refB );
@@ -143,7 +153,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
   /****
    * CSR format benchmark
    */
   tnlSpmvBenchmarkCSRMatrix< Real, int > csrMatrixBenchmark;
   tnlSpmvBenchmark< tnlCSRMatrix< RealType, tnlHost, int > > csrMatrixBenchmark;

   /****
    * Use the first instance of tnlSpmvBenchmark which we have
@@ -153,19 +163,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
      csrMatrixBenchmark. writeProgressTableHeader();

   csrMatrixBenchmark. setup( csrMatrix );
   if( formatTest )
   {
      if( verbose )
            cout << "Reading the FULL matrix ... " << endl;
      tnlFullMatrix< Real, tnlHost, int > fullMatrix( "full-matrix" );
      fstream mtxFile;
      mtxFile. open( inputMtxFile. getString(), ios :: in );
      if( ! fullMatrix. read( mtxFile, verbose ) )
         cerr << "Unable to get the FULL matrix." << endl;
      else
         csrMatrixBenchmark. testMatrix( fullMatrix, verbose );
      mtxFile. close();
   }
   csrMatrixBenchmark. setMaxIterations( maxIterations );
   csrMatrixBenchmark. runBenchmark( refX, refB, verbose );
   csrMatrixBenchmark. tearDown();
@@ -187,17 +184,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
       */
      long int allElements = csrMatrix. getSize() * csrMatrix. getSize();
      logFile << "          <tr>" << endl;
      logFile << "             <td> <a href=\"" << pdfFile << "\">" << inputFile << "</a> </td>" << endl;
      logFile << "             <td> <a href=\"" << pdfFileName << "\">" << inputFile << "</a> </td>" << endl;
      logFile << "             <td> " << csrMatrix. getSize() << "</td>" << endl;
      logFile << "             <td> " << nonzeroElements << "</td>" << endl;
      logFile << "             <td> " << ( double ) nonzeroElements / allElements * 100.0 << "</td>" << endl;
      csrMatrixBenchmark. writeToLogTable( logFile,
                                           csrMatrixBenchmark. getGflops(),
                                           inputMtxFile,
                                           inputMtxFileName,
                                           csrMatrix,
                                           false );
   }

#ifdef UNDEF
   /****
    * Cusparse CSR format benchmark
    */
@@ -396,6 +394,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )
      cudaRgCsrMatrixBenchmark. tearDown();
   }

#endif


   if( logFileName )
@@ -409,9 +408,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters )

int main( int argc, char* argv[] )
{
   dbgFunctionName( "", "main" );
   dbgInit( "" );

   tnlParameterContainer parameters;
   tnlConfigDescription conf_desc;

+15 −409
Original line number Diff line number Diff line
/***************************************************************************
                          tnlSpmvBenchmark.h  -  description
                             -------------------
    begin                : May 15, 2011
    copyright            : (C) 2011 by Tomas Oberhuber
    begin                : Dec 29, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

@@ -18,425 +18,31 @@
#ifndef TNLSPMVBENCHMARK_H_
#define TNLSPMVBENCHMARK_H_

#include "tnlSpmvBenchmarkBase.h"
#include <matrices/tnlCSRMatrix.h>
#include <core/tnlTimerRT.h>
#include <core/mfuncs.h>


double tnlSpmvBenchmarkPrecision( const double& ) { return 1.0e-12; }
float tnlSpmvBenchmarkPrecision( const float& ) { return 1.0e-4; }


template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
template< typename Matrix >
class tnlSpmvBenchmark
{
   public:

   tnlSpmvBenchmark();

   virtual bool setup( const tnlCSRMatrix< Real, tnlHost, Index >& matrix ) = 0;

   virtual void tearDown() = 0;

   virtual void writeProgress() const = 0;

   /****
    * This is virtual only the purpose of testing external formats like
    * the Hybrid format from the CUSP library. This format is not wrapped
    * in tnlMatrix.
    */
   virtual void runBenchmark( const tnlVector< Real, Device, Index >& x,
                              const tnlVector< Real, tnlHost, Index >& refB,
                              bool verbose );

   bool getBenchmarkWasSuccesful() const;

   double getGflops() const;

   double getTime() const;

   void setMaxIterations( const int maxIterations );

   int getIterations() const;

   Index getArtificialZeros() const;

   Real getMaxError() const;

   void writeProgressTableHeader();

   virtual void writeToLogTable( ostream& logFile,
                                 const double& csrGflops,
                                 const tnlString& inputMtxFile,
                                 const tnlCSRMatrix< Real, tnlHost, Index >& csrMatrix,
                                 bool writeMatrixInfo  ) const = 0;

   /*!***
    * This method test if the matrix is stored properly usually against full or CSR matrix.
    * It is useful test for more complicated formats. Matrices stored on CUDA device are
    * tested by SpMV with complete basis made of vectors e_0, \ldots e_{N-1}.
    */
   virtual bool testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix,
                            int verbose ) const;

   protected:

   /****
    * This is helper method for generating HTML table with benchmark results
    */
   tnlString getBgColorBySpeedUp( const double& speedUp ) const;

   /****
    * Helper method for writing matrix statistics and information to HTML
    */
   bool printMatrixInHtml( const tnlString& fileName,
                           tnlMatrix< Real >& matrix ) const;


   bool benchmarkWasSuccesful;

   bool setupOk;

   double gflops;

   double time;

   /****
    * Max number of SpMV repetitions.
    */
   int maxIterations;

   /****
    * Real number of repetitions.
    */
   int iterations;

   Index artificialZeros;

   Real maxError;

   Index firstErrorOccurence;

   Matrix< Real, Device, Index > matrix;

   /****
    * Parameters for the progress table columns
    */

   int formatColumnWidth;

   int timeColumnWidth;

   int iterationsColumnWidth;

   int gflopsColumnWidth;

   int benchmarkStatusColumnWidth;

   int infoColumnWidth;

};

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
tnlSpmvBenchmark< Real, Device, Index, Matrix > :: tnlSpmvBenchmark()
   : benchmarkWasSuccesful( false ),
     setupOk( false ),
     gflops( 0.0 ),
     time( 0.0 ),
     maxIterations( 0 ),
     iterations( 0.0 ),
     artificialZeros( 0 ),
     maxError( 0.0 ),
     firstErrorOccurence( 0 ),
     matrix( "spmvBenchmark::matrix" ),
     formatColumnWidth( 40 ),
     timeColumnWidth( 12 ),
     iterationsColumnWidth( 15 ),
     gflopsColumnWidth( 12 ),
     benchmarkStatusColumnWidth( 12 ),
     infoColumnWidth( 20 )
{

}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBenchmarkWasSuccesful() const
{
   return this -> benchmarkWasSuccesful;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getGflops() const
template< typename Real, typename Device, typename Index >
class tnlSpmvBenchmark< tnlCSRMatrix< Real, Device, Index > > : public tnlSpmvBenchmarkBase< tnlCSRMatrix< Real, Device, Index > >
{
   return this -> gflops;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getTime() const
{
   return this -> time;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: setMaxIterations( const int maxIterations )
{
   this -> maxIterations = maxIterations;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
int tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getIterations() const
{
   return this -> iterations;
}


template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
Index tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getArtificialZeros() const
{
   return this -> artificialZeros;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
Real tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getMaxError() const
{
   return this -> maxError;
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark( const tnlVector< Real, Device, Index >& x,
                                                                      const tnlVector< Real, tnlHost, Index >& refB,
                                                                      bool verbose )
{
   benchmarkWasSuccesful = false;
   if( ! setupOk )
      return;
#ifndef HAVE_CUDA
   if( Device :: getDevice() == tnlCudaDevice )
   {
      if( verbose )
         writeProgress();
      return;
   }
#endif

   tnlVector< Real, Device, Index > b( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" );
   if( ! b. setSize( refB. getSize() ) )
      return;

   iterations = 0;

   tnlTimerRT rt_timer;
   rt_timer. Reset();
   //maxIterations = 1;

   for( int i = 0; i < maxIterations; i ++ )
   {
      matrix. vectorProduct( x, b );
      iterations ++;
   }

   this -> time = rt_timer. GetTime();

   firstErrorOccurence = 0;
   tnlVector< Real, tnlHost, Index > resB( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" );
   if( ! resB. setSize( b. getSize() ) )
   {
      cerr << "I am not able to allocate copy of vector b on the host." << endl;
      return;
   }
   resB = b;
   benchmarkWasSuccesful = true;
   for( Index j = 0; j < refB. getSize(); j ++ )
   {
      //f << refB[ j ] << " - " << host_b[ j ] << " = "  << refB[ j ] - host_b[ j ] <<  endl;
      Real error( 0.0 );
      if( refB[ j ] != 0.0 )
         error = ( Real ) fabs( refB[ j ] - resB[ j ] ) /  ( Real ) fabs( refB[ j ] );
      else
         error = ( Real ) fabs( refB[ j ] );
      if( error > maxError )
         firstErrorOccurence = j;
      this -> maxError = Max( this -> maxError, error );

      /*if( error > tnlSpmvBenchmarkPrecision( error ) )
         benchmarkWasSuccesful = false;*/

   }
   //cout << "First error was on " << firstErrorOccurence << endl;

   double flops = 2.0 * iterations * matrix. getNonzeroElements();
   this -> gflops = flops / time * 1.0e-9;
   artificialZeros = matrix. getArtificialZeroElements();

   if( verbose )
      writeProgress();
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: writeProgressTableHeader()
{
   int totalWidth = this -> formatColumnWidth +
                    this -> timeColumnWidth +
                    this -> iterationsColumnWidth +
                    this -> gflopsColumnWidth +
                    this -> benchmarkStatusColumnWidth +
                    this -> infoColumnWidth;

   cout << left << setw( this -> formatColumnWidth - 5 ) << "MATRIX FORMAT"
        << left << setw( 5 ) << "BLOCK"
        << right << setw( this -> timeColumnWidth ) << "TIME"
        << right << setw( this -> iterationsColumnWidth ) << "ITERATIONS"
        << right << setw( this -> gflopsColumnWidth ) << "GFLOPS"
        << right << setw( this -> benchmarkStatusColumnWidth ) << "CHECK"
        << left << setw(  this -> infoColumnWidth ) << " INFO" << endl
        << setfill( '-' ) << setw( totalWidth ) << "--" << endl
        << setfill( ' ');
}

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
tnlString tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBgColorBySpeedUp( const double& speedUp ) const
{
   if( speedUp >= 30.0 )
      return tnlString( "#FF9900" );
   if( speedUp >= 25.0 )
      return tnlString( "#FFAA00" );
   if( speedUp >= 20.0 )
      return tnlString( "#FFBB00" );
   if( speedUp >= 15.0 )
      return tnlString( "#FFCC00" );
   if( speedUp >= 10.0 )
      return tnlString( "#FFDD00" );
   if( speedUp >= 5.0 )
      return tnlString( "#FFEE00" );
   if( speedUp >= 1.0 )
      return tnlString( "#FFFF00" );
   return tnlString( "#FFFFFF" );
}
   public:

   typedef Real RealType;
   typedef Device DeviceType;
   typedef Index IndexType;

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: printMatrixInHtml( const tnlString& fileName,
                                                                           tnlMatrix< Real >& matrix ) const
{
   //cout << "Writing to file " << fileName << endl;
   fstream file;
   file. open( fileName. getString(), ios :: out );
   if( ! file )
   {
      cerr << "I am not able to open the file " << fileName << endl;
      return false;
   }
   file << "<html>" << endl;
   file << "   <body>" << endl;
   matrix. printOut( file, "html" );
   file << "   </body>" << endl;
   file << "</html>" << endl;
   file. close();
   return true;
}
   bool setup( const tnlCSRMatrix< RealType, tnlHost, IndexType >& matrix );

template< typename Real,
          typename Device,
          typename Index,
          template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix >
bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix,
                                                                    int verbose ) const
{
   if( ! this -> setupOk )
      return false;
   void tearDown();

#ifndef HAVE_CUDA
   if( Device :: getDevice() == tnlCudaDevice )
      return false;
#endif
   void writeProgress() const;
};

   const Index size = matrix. getSize();
   if( size != testMatrix. getSize() )
   {
      cerr << "Both matrices " << this -> matrix. getName() << " and " << testMatrix. getName()
           << " have different sizes: " << size << " and " << testMatrix. getSize() << "." << endl;
      return false;
   }
   if( Device :: getDevice() == tnlHostDevice )
   {
      for( Index i = 0; i < size; i ++ )
      {
         for( Index j = 0; j < size; j ++ )
            if( matrix. getElement( i, j ) != testMatrix. getElement( i, j ) )
            {
               if( verbose )
                  cout << "Comparing with testing matrix: " << i + 1 << " / " << size << " error at column " << j << "." << endl;
               return false;
            }
         if( verbose )
            cout << "Comparing with testing matrix: " << i + 1 << " / " << size << "           \r" << flush;
      }
   }
   if( Device :: getDevice() == tnlCudaDevice )
   {
#ifdef HAVE_CUDA
      tnlVector< Real, Device, Index > x( "x" ), b( "b" );
      if( ! x. setSize( size ) || ! b. setSize( size ) )
         return false;
      for( Index j = 0; j < size; j ++ )
      {
         x. setValue( 0.0 );
         x. setElement( j, 1.0 );
         this -> matrix. vectorProduct( x, b );
         for( Index i = 0; i < size; i ++ )
            if( b. getElement( i ) != testMatrix. getElement( i, j ) )
            {
               if( verbose )
                  cout << "Comparing with testing matrix: " << j + 1 << " / " << size << " error at line " << i << "." << endl;
               return false;
            }
         if( verbose )
            cout << "Comparing with testing matrix: " << j + 1 << " / " << size << "           \r" << flush;
      }
#endif
   }
   //if( verbose )
   //   cout << endl;
   return true;
}
#include "tnlSpmvBenchmark_impl.h"

#endif /* TNLSPMVBENCHMARK_H_ */
Loading