Loading src/implementation/matrices/tnlSparseMatrix_impl.h +13 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,19 @@ Index tnlSparseMatrix< Real, Device, Index >::getNumberOfMatrixElements() const return this->values.getSize(); } template< typename Real, typename Device, typename Index > Index tnlSparseMatrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const { IndexType nonzeroElements( 0 ); for( IndexType i = 0; i < this->values.getSize(); i++ ) if( this->columnIndexes.getElement( i ) != this-> columns && this->values.getElement( i ) != 0.0 ) nonzeroElements++; return nonzeroElements; } template< typename Real, typename Device, typename Index > Loading src/matrices/tnlSparseMatrix.h +2 −0 Original line number Diff line number Diff line Loading @@ -39,6 +39,8 @@ class tnlSparseMatrix : public tnlMatrix< Real, Device, Index > IndexType getNumberOfMatrixElements() const; IndexType getNumberOfNonzeroMatrixElements() const; void reset(); bool save( tnlFile& file ) const; Loading tests/benchmarks/CMakeLists.txt +11 −11 Original line number Diff line number Diff line Loading @@ -9,18 +9,18 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h tnlSpmvBenchmarkRgCSRMatrix.h ) #IF( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) # SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) #ELSE() # ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) # SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) #ENDIF() #TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion} # ${CUSPARSE_LIBRARY} ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ELSE() ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ENDIF() TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) #INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt} # RUNTIME DESTINATION bin ) INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt} RUNTIME DESTINATION bin ) #IF( BUILD_CUDA ) Loading tests/benchmarks/sparse-matrix-benchmark.h +26 −30 Original line number Diff line number Diff line Loading @@ -28,12 +28,13 @@ #include <matrices/tnlSlicedEllpackMatrix.h> #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlCSRMatrix.h> #include <matrices/tnlMatrixReader.h> #include <core/mfuncs.h> #include "tnlSpmvBenchmarkCSRMatrix.h" #include "tnlSpmvBenchmarkCusparseCSRMatrix.h" #include "tnlSpmvBenchmark.h" /*#include "tnlSpmvBenchmarkCusparseCSRMatrix.h" #include "tnlSpmvBenchmarkHybridMatrix.h" #include "tnlSpmvBenchmarkRgCSRMatrix.h" #include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h" #include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h"*/ #include "tnlConfig.h" const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg.desc"; Loading @@ -41,6 +42,7 @@ const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg. using namespace std; /* double bestCudaRgCSRGflops( 0 ); template< typename Real > Loading Loading @@ -100,6 +102,7 @@ void benchmarkRgCSRFormat( const tnlCSRMatrix< Real, tnlHost, int >& csrMatrix, cudaRgCsrMatrixBenchmark. tearDown(); } } */ template< typename RealType > bool benchmarkMatrix( const tnlParameterContainer& parameters ) Loading @@ -111,6 +114,13 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) CsrMatrix csrMatrix; const tnlString& inputFileName = parameters.GetParameter< tnlString >( "input-file" ); const tnlString& inputMtxFileName = parameters.GetParameter< tnlString >( "input-mtx-file" ); const tnlString& logFileName = parameters.GetParameter< tnlString >( "log-file" ); const tnlString& pdfFileName = parameters.GetParameter< tnlString >( "pdf-file" ); bool verbose = parameters.GetParameter< bool >( "verbose" ); const int maxIterations = parameters.GetParameter< tnlString >( "max-iterations" ); fstream inputFile; inputFile.open( inputFileName.getString(), ios::in ); if( ! inputFile ) Loading @@ -124,18 +134,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) /**** * Check the number of the non-zero elements */ const long int nonzeroElements = csrMatrix. checkNonzeroElements(); if( nonzeroElements != csrMatrix. getNonzeroElements() ) cerr << "WARNING: The matrix reports " << csrMatrix. getNonzeroElements() << " but actually there are " << nonzeroElements << " non-zero elements." << endl; const long int nonzeroElements = csrMatrix. getNumberOfNonzeroMatrixElements(); if( verbose ) cout << "Matrix size: " << csrMatrix. getSize() cout << "Matrix rows: " << csrMatrix.getRows() << " Matrix columns: " << csrMatrix.getColumns() << " Non-zero elements: " << nonzeroElements << endl; const long int size = csrMatrix. getSize(); tnlVector< Real, tnlHost > refX( "ref-x", size ), refB( "ref-b", size); tnlVector< Real, tnlCuda > cudaX( "cudaX", size ); const long int rows = csrMatrix.getRows(); const long int columns = csrMatrix.getColumns(); tnlVector< RealType, tnlHost > refX( "ref-x", columns ), refB( "ref-b", rows ); tnlVector< RealType, tnlCuda > cudaX( "cudaX", columns ); refX. setValue( 0.0 ); for( int i = 0; i < size; i ++ ) for( int i = 0; i < columns; i ++ ) refX[ i ] = 1.0; //( Real ) i * 1.0 / ( Real ) size; cudaX = refX; csrMatrix. vectorProduct( refX, refB ); Loading @@ -143,7 +153,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) /**** * CSR format benchmark */ tnlSpmvBenchmarkCSRMatrix< Real, int > csrMatrixBenchmark; tnlSpmvBenchmark< tnlCSRMatrix< RealType, tnlHost, int > > csrMatrixBenchmark; /**** * Use the first instance of tnlSpmvBenchmark which we have Loading @@ -153,19 +163,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) csrMatrixBenchmark. writeProgressTableHeader(); csrMatrixBenchmark. setup( csrMatrix ); if( formatTest ) { if( verbose ) cout << "Reading the FULL matrix ... " << endl; tnlFullMatrix< Real, tnlHost, int > fullMatrix( "full-matrix" ); fstream mtxFile; mtxFile. open( inputMtxFile. getString(), ios :: in ); if( ! fullMatrix. read( mtxFile, verbose ) ) cerr << "Unable to get the FULL matrix." << endl; else csrMatrixBenchmark. testMatrix( fullMatrix, verbose ); mtxFile. close(); } csrMatrixBenchmark. setMaxIterations( maxIterations ); csrMatrixBenchmark. runBenchmark( refX, refB, verbose ); csrMatrixBenchmark. tearDown(); Loading @@ -187,17 +184,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) */ long int allElements = csrMatrix. getSize() * csrMatrix. getSize(); logFile << " <tr>" << endl; logFile << " <td> <a href=\"" << pdfFile << "\">" << inputFile << "</a> </td>" << endl; logFile << " <td> <a href=\"" << pdfFileName << "\">" << inputFile << "</a> </td>" << endl; logFile << " <td> " << csrMatrix. getSize() << "</td>" << endl; logFile << " <td> " << nonzeroElements << "</td>" << endl; logFile << " <td> " << ( double ) nonzeroElements / allElements * 100.0 << "</td>" << endl; csrMatrixBenchmark. writeToLogTable( logFile, csrMatrixBenchmark. getGflops(), inputMtxFile, inputMtxFileName, csrMatrix, false ); } #ifdef UNDEF /**** * Cusparse CSR format benchmark */ Loading Loading @@ -396,6 +394,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) cudaRgCsrMatrixBenchmark. tearDown(); } #endif if( logFileName ) Loading @@ -409,9 +408,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) int main( int argc, char* argv[] ) { dbgFunctionName( "", "main" ); dbgInit( "" ); tnlParameterContainer parameters; tnlConfigDescription conf_desc; Loading tests/benchmarks/tnlSpmvBenchmark.h +15 −409 Original line number Diff line number Diff line /*************************************************************************** tnlSpmvBenchmark.h - description ------------------- begin : May 15, 2011 copyright : (C) 2011 by Tomas Oberhuber begin : Dec 29, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ Loading @@ -18,425 +18,31 @@ #ifndef TNLSPMVBENCHMARK_H_ #define TNLSPMVBENCHMARK_H_ #include "tnlSpmvBenchmarkBase.h" #include <matrices/tnlCSRMatrix.h> #include <core/tnlTimerRT.h> #include <core/mfuncs.h> double tnlSpmvBenchmarkPrecision( const double& ) { return 1.0e-12; } float tnlSpmvBenchmarkPrecision( const float& ) { return 1.0e-4; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > template< typename Matrix > class tnlSpmvBenchmark { public: tnlSpmvBenchmark(); virtual bool setup( const tnlCSRMatrix< Real, tnlHost, Index >& matrix ) = 0; virtual void tearDown() = 0; virtual void writeProgress() const = 0; /**** * This is virtual only the purpose of testing external formats like * the Hybrid format from the CUSP library. This format is not wrapped * in tnlMatrix. */ virtual void runBenchmark( const tnlVector< Real, Device, Index >& x, const tnlVector< Real, tnlHost, Index >& refB, bool verbose ); bool getBenchmarkWasSuccesful() const; double getGflops() const; double getTime() const; void setMaxIterations( const int maxIterations ); int getIterations() const; Index getArtificialZeros() const; Real getMaxError() const; void writeProgressTableHeader(); virtual void writeToLogTable( ostream& logFile, const double& csrGflops, const tnlString& inputMtxFile, const tnlCSRMatrix< Real, tnlHost, Index >& csrMatrix, bool writeMatrixInfo ) const = 0; /*!*** * This method test if the matrix is stored properly usually against full or CSR matrix. * It is useful test for more complicated formats. Matrices stored on CUDA device are * tested by SpMV with complete basis made of vectors e_0, \ldots e_{N-1}. */ virtual bool testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix, int verbose ) const; protected: /**** * This is helper method for generating HTML table with benchmark results */ tnlString getBgColorBySpeedUp( const double& speedUp ) const; /**** * Helper method for writing matrix statistics and information to HTML */ bool printMatrixInHtml( const tnlString& fileName, tnlMatrix< Real >& matrix ) const; bool benchmarkWasSuccesful; bool setupOk; double gflops; double time; /**** * Max number of SpMV repetitions. */ int maxIterations; /**** * Real number of repetitions. */ int iterations; Index artificialZeros; Real maxError; Index firstErrorOccurence; Matrix< Real, Device, Index > matrix; /**** * Parameters for the progress table columns */ int formatColumnWidth; int timeColumnWidth; int iterationsColumnWidth; int gflopsColumnWidth; int benchmarkStatusColumnWidth; int infoColumnWidth; }; template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > tnlSpmvBenchmark< Real, Device, Index, Matrix > :: tnlSpmvBenchmark() : benchmarkWasSuccesful( false ), setupOk( false ), gflops( 0.0 ), time( 0.0 ), maxIterations( 0 ), iterations( 0.0 ), artificialZeros( 0 ), maxError( 0.0 ), firstErrorOccurence( 0 ), matrix( "spmvBenchmark::matrix" ), formatColumnWidth( 40 ), timeColumnWidth( 12 ), iterationsColumnWidth( 15 ), gflopsColumnWidth( 12 ), benchmarkStatusColumnWidth( 12 ), infoColumnWidth( 20 ) { } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBenchmarkWasSuccesful() const { return this -> benchmarkWasSuccesful; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getGflops() const template< typename Real, typename Device, typename Index > class tnlSpmvBenchmark< tnlCSRMatrix< Real, Device, Index > > : public tnlSpmvBenchmarkBase< tnlCSRMatrix< Real, Device, Index > > { return this -> gflops; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getTime() const { return this -> time; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: setMaxIterations( const int maxIterations ) { this -> maxIterations = maxIterations; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > int tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getIterations() const { return this -> iterations; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > Index tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getArtificialZeros() const { return this -> artificialZeros; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > Real tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getMaxError() const { return this -> maxError; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark( const tnlVector< Real, Device, Index >& x, const tnlVector< Real, tnlHost, Index >& refB, bool verbose ) { benchmarkWasSuccesful = false; if( ! setupOk ) return; #ifndef HAVE_CUDA if( Device :: getDevice() == tnlCudaDevice ) { if( verbose ) writeProgress(); return; } #endif tnlVector< Real, Device, Index > b( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); if( ! b. setSize( refB. getSize() ) ) return; iterations = 0; tnlTimerRT rt_timer; rt_timer. Reset(); //maxIterations = 1; for( int i = 0; i < maxIterations; i ++ ) { matrix. vectorProduct( x, b ); iterations ++; } this -> time = rt_timer. GetTime(); firstErrorOccurence = 0; tnlVector< Real, tnlHost, Index > resB( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); if( ! resB. setSize( b. getSize() ) ) { cerr << "I am not able to allocate copy of vector b on the host." << endl; return; } resB = b; benchmarkWasSuccesful = true; for( Index j = 0; j < refB. getSize(); j ++ ) { //f << refB[ j ] << " - " << host_b[ j ] << " = " << refB[ j ] - host_b[ j ] << endl; Real error( 0.0 ); if( refB[ j ] != 0.0 ) error = ( Real ) fabs( refB[ j ] - resB[ j ] ) / ( Real ) fabs( refB[ j ] ); else error = ( Real ) fabs( refB[ j ] ); if( error > maxError ) firstErrorOccurence = j; this -> maxError = Max( this -> maxError, error ); /*if( error > tnlSpmvBenchmarkPrecision( error ) ) benchmarkWasSuccesful = false;*/ } //cout << "First error was on " << firstErrorOccurence << endl; double flops = 2.0 * iterations * matrix. getNonzeroElements(); this -> gflops = flops / time * 1.0e-9; artificialZeros = matrix. getArtificialZeroElements(); if( verbose ) writeProgress(); } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: writeProgressTableHeader() { int totalWidth = this -> formatColumnWidth + this -> timeColumnWidth + this -> iterationsColumnWidth + this -> gflopsColumnWidth + this -> benchmarkStatusColumnWidth + this -> infoColumnWidth; cout << left << setw( this -> formatColumnWidth - 5 ) << "MATRIX FORMAT" << left << setw( 5 ) << "BLOCK" << right << setw( this -> timeColumnWidth ) << "TIME" << right << setw( this -> iterationsColumnWidth ) << "ITERATIONS" << right << setw( this -> gflopsColumnWidth ) << "GFLOPS" << right << setw( this -> benchmarkStatusColumnWidth ) << "CHECK" << left << setw( this -> infoColumnWidth ) << " INFO" << endl << setfill( '-' ) << setw( totalWidth ) << "--" << endl << setfill( ' '); } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > tnlString tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBgColorBySpeedUp( const double& speedUp ) const { if( speedUp >= 30.0 ) return tnlString( "#FF9900" ); if( speedUp >= 25.0 ) return tnlString( "#FFAA00" ); if( speedUp >= 20.0 ) return tnlString( "#FFBB00" ); if( speedUp >= 15.0 ) return tnlString( "#FFCC00" ); if( speedUp >= 10.0 ) return tnlString( "#FFDD00" ); if( speedUp >= 5.0 ) return tnlString( "#FFEE00" ); if( speedUp >= 1.0 ) return tnlString( "#FFFF00" ); return tnlString( "#FFFFFF" ); } public: typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: printMatrixInHtml( const tnlString& fileName, tnlMatrix< Real >& matrix ) const { //cout << "Writing to file " << fileName << endl; fstream file; file. open( fileName. getString(), ios :: out ); if( ! file ) { cerr << "I am not able to open the file " << fileName << endl; return false; } file << "<html>" << endl; file << " <body>" << endl; matrix. printOut( file, "html" ); file << " </body>" << endl; file << "</html>" << endl; file. close(); return true; } bool setup( const tnlCSRMatrix< RealType, tnlHost, IndexType >& matrix ); template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix, int verbose ) const { if( ! this -> setupOk ) return false; void tearDown(); #ifndef HAVE_CUDA if( Device :: getDevice() == tnlCudaDevice ) return false; #endif void writeProgress() const; }; const Index size = matrix. getSize(); if( size != testMatrix. getSize() ) { cerr << "Both matrices " << this -> matrix. getName() << " and " << testMatrix. getName() << " have different sizes: " << size << " and " << testMatrix. getSize() << "." << endl; return false; } if( Device :: getDevice() == tnlHostDevice ) { for( Index i = 0; i < size; i ++ ) { for( Index j = 0; j < size; j ++ ) if( matrix. getElement( i, j ) != testMatrix. getElement( i, j ) ) { if( verbose ) cout << "Comparing with testing matrix: " << i + 1 << " / " << size << " error at column " << j << "." << endl; return false; } if( verbose ) cout << "Comparing with testing matrix: " << i + 1 << " / " << size << " \r" << flush; } } if( Device :: getDevice() == tnlCudaDevice ) { #ifdef HAVE_CUDA tnlVector< Real, Device, Index > x( "x" ), b( "b" ); if( ! x. setSize( size ) || ! b. setSize( size ) ) return false; for( Index j = 0; j < size; j ++ ) { x. setValue( 0.0 ); x. setElement( j, 1.0 ); this -> matrix. vectorProduct( x, b ); for( Index i = 0; i < size; i ++ ) if( b. getElement( i ) != testMatrix. getElement( i, j ) ) { if( verbose ) cout << "Comparing with testing matrix: " << j + 1 << " / " << size << " error at line " << i << "." << endl; return false; } if( verbose ) cout << "Comparing with testing matrix: " << j + 1 << " / " << size << " \r" << flush; } #endif } //if( verbose ) // cout << endl; return true; } #include "tnlSpmvBenchmark_impl.h" #endif /* TNLSPMVBENCHMARK_H_ */ Loading
src/implementation/matrices/tnlSparseMatrix_impl.h +13 −0 Original line number Diff line number Diff line Loading @@ -47,6 +47,19 @@ Index tnlSparseMatrix< Real, Device, Index >::getNumberOfMatrixElements() const return this->values.getSize(); } template< typename Real, typename Device, typename Index > Index tnlSparseMatrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const { IndexType nonzeroElements( 0 ); for( IndexType i = 0; i < this->values.getSize(); i++ ) if( this->columnIndexes.getElement( i ) != this-> columns && this->values.getElement( i ) != 0.0 ) nonzeroElements++; return nonzeroElements; } template< typename Real, typename Device, typename Index > Loading
src/matrices/tnlSparseMatrix.h +2 −0 Original line number Diff line number Diff line Loading @@ -39,6 +39,8 @@ class tnlSparseMatrix : public tnlMatrix< Real, Device, Index > IndexType getNumberOfMatrixElements() const; IndexType getNumberOfNonzeroMatrixElements() const; void reset(); bool save( tnlFile& file ) const; Loading
tests/benchmarks/CMakeLists.txt +11 −11 Original line number Diff line number Diff line Loading @@ -9,18 +9,18 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h tnlSpmvBenchmarkRgCSRMatrix.h ) #IF( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) # SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) #ELSE() # ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) # SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) #ENDIF() #TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion} # ${CUSPARSE_LIBRARY} ) IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ELSE() ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ENDIF() TARGET_LINK_LIBRARIES( tnl-sparse-matrix-benchmark${debugExt} tnl${debugExt}-${tnlVersion} ${CUSPARSE_LIBRARY} ) #INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt} # RUNTIME DESTINATION bin ) INSTALL( TARGETS tnl-sparse-matrix-benchmark${debugExt} RUNTIME DESTINATION bin ) #IF( BUILD_CUDA ) Loading
tests/benchmarks/sparse-matrix-benchmark.h +26 −30 Original line number Diff line number Diff line Loading @@ -28,12 +28,13 @@ #include <matrices/tnlSlicedEllpackMatrix.h> #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlCSRMatrix.h> #include <matrices/tnlMatrixReader.h> #include <core/mfuncs.h> #include "tnlSpmvBenchmarkCSRMatrix.h" #include "tnlSpmvBenchmarkCusparseCSRMatrix.h" #include "tnlSpmvBenchmark.h" /*#include "tnlSpmvBenchmarkCusparseCSRMatrix.h" #include "tnlSpmvBenchmarkHybridMatrix.h" #include "tnlSpmvBenchmarkRgCSRMatrix.h" #include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h" #include "tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h"*/ #include "tnlConfig.h" const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg.desc"; Loading @@ -41,6 +42,7 @@ const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg. using namespace std; /* double bestCudaRgCSRGflops( 0 ); template< typename Real > Loading Loading @@ -100,6 +102,7 @@ void benchmarkRgCSRFormat( const tnlCSRMatrix< Real, tnlHost, int >& csrMatrix, cudaRgCsrMatrixBenchmark. tearDown(); } } */ template< typename RealType > bool benchmarkMatrix( const tnlParameterContainer& parameters ) Loading @@ -111,6 +114,13 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) CsrMatrix csrMatrix; const tnlString& inputFileName = parameters.GetParameter< tnlString >( "input-file" ); const tnlString& inputMtxFileName = parameters.GetParameter< tnlString >( "input-mtx-file" ); const tnlString& logFileName = parameters.GetParameter< tnlString >( "log-file" ); const tnlString& pdfFileName = parameters.GetParameter< tnlString >( "pdf-file" ); bool verbose = parameters.GetParameter< bool >( "verbose" ); const int maxIterations = parameters.GetParameter< tnlString >( "max-iterations" ); fstream inputFile; inputFile.open( inputFileName.getString(), ios::in ); if( ! inputFile ) Loading @@ -124,18 +134,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) /**** * Check the number of the non-zero elements */ const long int nonzeroElements = csrMatrix. checkNonzeroElements(); if( nonzeroElements != csrMatrix. getNonzeroElements() ) cerr << "WARNING: The matrix reports " << csrMatrix. getNonzeroElements() << " but actually there are " << nonzeroElements << " non-zero elements." << endl; const long int nonzeroElements = csrMatrix. getNumberOfNonzeroMatrixElements(); if( verbose ) cout << "Matrix size: " << csrMatrix. getSize() cout << "Matrix rows: " << csrMatrix.getRows() << " Matrix columns: " << csrMatrix.getColumns() << " Non-zero elements: " << nonzeroElements << endl; const long int size = csrMatrix. getSize(); tnlVector< Real, tnlHost > refX( "ref-x", size ), refB( "ref-b", size); tnlVector< Real, tnlCuda > cudaX( "cudaX", size ); const long int rows = csrMatrix.getRows(); const long int columns = csrMatrix.getColumns(); tnlVector< RealType, tnlHost > refX( "ref-x", columns ), refB( "ref-b", rows ); tnlVector< RealType, tnlCuda > cudaX( "cudaX", columns ); refX. setValue( 0.0 ); for( int i = 0; i < size; i ++ ) for( int i = 0; i < columns; i ++ ) refX[ i ] = 1.0; //( Real ) i * 1.0 / ( Real ) size; cudaX = refX; csrMatrix. vectorProduct( refX, refB ); Loading @@ -143,7 +153,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) /**** * CSR format benchmark */ tnlSpmvBenchmarkCSRMatrix< Real, int > csrMatrixBenchmark; tnlSpmvBenchmark< tnlCSRMatrix< RealType, tnlHost, int > > csrMatrixBenchmark; /**** * Use the first instance of tnlSpmvBenchmark which we have Loading @@ -153,19 +163,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) csrMatrixBenchmark. writeProgressTableHeader(); csrMatrixBenchmark. setup( csrMatrix ); if( formatTest ) { if( verbose ) cout << "Reading the FULL matrix ... " << endl; tnlFullMatrix< Real, tnlHost, int > fullMatrix( "full-matrix" ); fstream mtxFile; mtxFile. open( inputMtxFile. getString(), ios :: in ); if( ! fullMatrix. read( mtxFile, verbose ) ) cerr << "Unable to get the FULL matrix." << endl; else csrMatrixBenchmark. testMatrix( fullMatrix, verbose ); mtxFile. close(); } csrMatrixBenchmark. setMaxIterations( maxIterations ); csrMatrixBenchmark. runBenchmark( refX, refB, verbose ); csrMatrixBenchmark. tearDown(); Loading @@ -187,17 +184,18 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) */ long int allElements = csrMatrix. getSize() * csrMatrix. getSize(); logFile << " <tr>" << endl; logFile << " <td> <a href=\"" << pdfFile << "\">" << inputFile << "</a> </td>" << endl; logFile << " <td> <a href=\"" << pdfFileName << "\">" << inputFile << "</a> </td>" << endl; logFile << " <td> " << csrMatrix. getSize() << "</td>" << endl; logFile << " <td> " << nonzeroElements << "</td>" << endl; logFile << " <td> " << ( double ) nonzeroElements / allElements * 100.0 << "</td>" << endl; csrMatrixBenchmark. writeToLogTable( logFile, csrMatrixBenchmark. getGflops(), inputMtxFile, inputMtxFileName, csrMatrix, false ); } #ifdef UNDEF /**** * Cusparse CSR format benchmark */ Loading Loading @@ -396,6 +394,7 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) cudaRgCsrMatrixBenchmark. tearDown(); } #endif if( logFileName ) Loading @@ -409,9 +408,6 @@ bool benchmarkMatrix( const tnlParameterContainer& parameters ) int main( int argc, char* argv[] ) { dbgFunctionName( "", "main" ); dbgInit( "" ); tnlParameterContainer parameters; tnlConfigDescription conf_desc; Loading
tests/benchmarks/tnlSpmvBenchmark.h +15 −409 Original line number Diff line number Diff line /*************************************************************************** tnlSpmvBenchmark.h - description ------------------- begin : May 15, 2011 copyright : (C) 2011 by Tomas Oberhuber begin : Dec 29, 2013 copyright : (C) 2013 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ Loading @@ -18,425 +18,31 @@ #ifndef TNLSPMVBENCHMARK_H_ #define TNLSPMVBENCHMARK_H_ #include "tnlSpmvBenchmarkBase.h" #include <matrices/tnlCSRMatrix.h> #include <core/tnlTimerRT.h> #include <core/mfuncs.h> double tnlSpmvBenchmarkPrecision( const double& ) { return 1.0e-12; } float tnlSpmvBenchmarkPrecision( const float& ) { return 1.0e-4; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > template< typename Matrix > class tnlSpmvBenchmark { public: tnlSpmvBenchmark(); virtual bool setup( const tnlCSRMatrix< Real, tnlHost, Index >& matrix ) = 0; virtual void tearDown() = 0; virtual void writeProgress() const = 0; /**** * This is virtual only the purpose of testing external formats like * the Hybrid format from the CUSP library. This format is not wrapped * in tnlMatrix. */ virtual void runBenchmark( const tnlVector< Real, Device, Index >& x, const tnlVector< Real, tnlHost, Index >& refB, bool verbose ); bool getBenchmarkWasSuccesful() const; double getGflops() const; double getTime() const; void setMaxIterations( const int maxIterations ); int getIterations() const; Index getArtificialZeros() const; Real getMaxError() const; void writeProgressTableHeader(); virtual void writeToLogTable( ostream& logFile, const double& csrGflops, const tnlString& inputMtxFile, const tnlCSRMatrix< Real, tnlHost, Index >& csrMatrix, bool writeMatrixInfo ) const = 0; /*!*** * This method test if the matrix is stored properly usually against full or CSR matrix. * It is useful test for more complicated formats. Matrices stored on CUDA device are * tested by SpMV with complete basis made of vectors e_0, \ldots e_{N-1}. */ virtual bool testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix, int verbose ) const; protected: /**** * This is helper method for generating HTML table with benchmark results */ tnlString getBgColorBySpeedUp( const double& speedUp ) const; /**** * Helper method for writing matrix statistics and information to HTML */ bool printMatrixInHtml( const tnlString& fileName, tnlMatrix< Real >& matrix ) const; bool benchmarkWasSuccesful; bool setupOk; double gflops; double time; /**** * Max number of SpMV repetitions. */ int maxIterations; /**** * Real number of repetitions. */ int iterations; Index artificialZeros; Real maxError; Index firstErrorOccurence; Matrix< Real, Device, Index > matrix; /**** * Parameters for the progress table columns */ int formatColumnWidth; int timeColumnWidth; int iterationsColumnWidth; int gflopsColumnWidth; int benchmarkStatusColumnWidth; int infoColumnWidth; }; template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > tnlSpmvBenchmark< Real, Device, Index, Matrix > :: tnlSpmvBenchmark() : benchmarkWasSuccesful( false ), setupOk( false ), gflops( 0.0 ), time( 0.0 ), maxIterations( 0 ), iterations( 0.0 ), artificialZeros( 0 ), maxError( 0.0 ), firstErrorOccurence( 0 ), matrix( "spmvBenchmark::matrix" ), formatColumnWidth( 40 ), timeColumnWidth( 12 ), iterationsColumnWidth( 15 ), gflopsColumnWidth( 12 ), benchmarkStatusColumnWidth( 12 ), infoColumnWidth( 20 ) { } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBenchmarkWasSuccesful() const { return this -> benchmarkWasSuccesful; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getGflops() const template< typename Real, typename Device, typename Index > class tnlSpmvBenchmark< tnlCSRMatrix< Real, Device, Index > > : public tnlSpmvBenchmarkBase< tnlCSRMatrix< Real, Device, Index > > { return this -> gflops; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > double tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getTime() const { return this -> time; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: setMaxIterations( const int maxIterations ) { this -> maxIterations = maxIterations; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > int tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getIterations() const { return this -> iterations; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > Index tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getArtificialZeros() const { return this -> artificialZeros; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > Real tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getMaxError() const { return this -> maxError; } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark( const tnlVector< Real, Device, Index >& x, const tnlVector< Real, tnlHost, Index >& refB, bool verbose ) { benchmarkWasSuccesful = false; if( ! setupOk ) return; #ifndef HAVE_CUDA if( Device :: getDevice() == tnlCudaDevice ) { if( verbose ) writeProgress(); return; } #endif tnlVector< Real, Device, Index > b( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); if( ! b. setSize( refB. getSize() ) ) return; iterations = 0; tnlTimerRT rt_timer; rt_timer. Reset(); //maxIterations = 1; for( int i = 0; i < maxIterations; i ++ ) { matrix. vectorProduct( x, b ); iterations ++; } this -> time = rt_timer. GetTime(); firstErrorOccurence = 0; tnlVector< Real, tnlHost, Index > resB( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); if( ! resB. setSize( b. getSize() ) ) { cerr << "I am not able to allocate copy of vector b on the host." << endl; return; } resB = b; benchmarkWasSuccesful = true; for( Index j = 0; j < refB. getSize(); j ++ ) { //f << refB[ j ] << " - " << host_b[ j ] << " = " << refB[ j ] - host_b[ j ] << endl; Real error( 0.0 ); if( refB[ j ] != 0.0 ) error = ( Real ) fabs( refB[ j ] - resB[ j ] ) / ( Real ) fabs( refB[ j ] ); else error = ( Real ) fabs( refB[ j ] ); if( error > maxError ) firstErrorOccurence = j; this -> maxError = Max( this -> maxError, error ); /*if( error > tnlSpmvBenchmarkPrecision( error ) ) benchmarkWasSuccesful = false;*/ } //cout << "First error was on " << firstErrorOccurence << endl; double flops = 2.0 * iterations * matrix. getNonzeroElements(); this -> gflops = flops / time * 1.0e-9; artificialZeros = matrix. getArtificialZeroElements(); if( verbose ) writeProgress(); } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > void tnlSpmvBenchmark< Real, Device, Index, Matrix > :: writeProgressTableHeader() { int totalWidth = this -> formatColumnWidth + this -> timeColumnWidth + this -> iterationsColumnWidth + this -> gflopsColumnWidth + this -> benchmarkStatusColumnWidth + this -> infoColumnWidth; cout << left << setw( this -> formatColumnWidth - 5 ) << "MATRIX FORMAT" << left << setw( 5 ) << "BLOCK" << right << setw( this -> timeColumnWidth ) << "TIME" << right << setw( this -> iterationsColumnWidth ) << "ITERATIONS" << right << setw( this -> gflopsColumnWidth ) << "GFLOPS" << right << setw( this -> benchmarkStatusColumnWidth ) << "CHECK" << left << setw( this -> infoColumnWidth ) << " INFO" << endl << setfill( '-' ) << setw( totalWidth ) << "--" << endl << setfill( ' '); } template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > tnlString tnlSpmvBenchmark< Real, Device, Index, Matrix > :: getBgColorBySpeedUp( const double& speedUp ) const { if( speedUp >= 30.0 ) return tnlString( "#FF9900" ); if( speedUp >= 25.0 ) return tnlString( "#FFAA00" ); if( speedUp >= 20.0 ) return tnlString( "#FFBB00" ); if( speedUp >= 15.0 ) return tnlString( "#FFCC00" ); if( speedUp >= 10.0 ) return tnlString( "#FFDD00" ); if( speedUp >= 5.0 ) return tnlString( "#FFEE00" ); if( speedUp >= 1.0 ) return tnlString( "#FFFF00" ); return tnlString( "#FFFFFF" ); } public: typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: printMatrixInHtml( const tnlString& fileName, tnlMatrix< Real >& matrix ) const { //cout << "Writing to file " << fileName << endl; fstream file; file. open( fileName. getString(), ios :: out ); if( ! file ) { cerr << "I am not able to open the file " << fileName << endl; return false; } file << "<html>" << endl; file << " <body>" << endl; matrix. printOut( file, "html" ); file << " </body>" << endl; file << "</html>" << endl; file. close(); return true; } bool setup( const tnlCSRMatrix< RealType, tnlHost, IndexType >& matrix ); template< typename Real, typename Device, typename Index, template< typename matrixReal, typename matrixDevice, typename matrixIndex > class Matrix > bool tnlSpmvBenchmark< Real, Device, Index, Matrix > :: testMatrix( const tnlMatrix< Real, tnlHost, Index >& testMatrix, int verbose ) const { if( ! this -> setupOk ) return false; void tearDown(); #ifndef HAVE_CUDA if( Device :: getDevice() == tnlCudaDevice ) return false; #endif void writeProgress() const; }; const Index size = matrix. getSize(); if( size != testMatrix. getSize() ) { cerr << "Both matrices " << this -> matrix. getName() << " and " << testMatrix. getName() << " have different sizes: " << size << " and " << testMatrix. getSize() << "." << endl; return false; } if( Device :: getDevice() == tnlHostDevice ) { for( Index i = 0; i < size; i ++ ) { for( Index j = 0; j < size; j ++ ) if( matrix. getElement( i, j ) != testMatrix. getElement( i, j ) ) { if( verbose ) cout << "Comparing with testing matrix: " << i + 1 << " / " << size << " error at column " << j << "." << endl; return false; } if( verbose ) cout << "Comparing with testing matrix: " << i + 1 << " / " << size << " \r" << flush; } } if( Device :: getDevice() == tnlCudaDevice ) { #ifdef HAVE_CUDA tnlVector< Real, Device, Index > x( "x" ), b( "b" ); if( ! x. setSize( size ) || ! b. setSize( size ) ) return false; for( Index j = 0; j < size; j ++ ) { x. setValue( 0.0 ); x. setElement( j, 1.0 ); this -> matrix. vectorProduct( x, b ); for( Index i = 0; i < size; i ++ ) if( b. getElement( i ) != testMatrix. getElement( i, j ) ) { if( verbose ) cout << "Comparing with testing matrix: " << j + 1 << " / " << size << " error at line " << i << "." << endl; return false; } if( verbose ) cout << "Comparing with testing matrix: " << j + 1 << " / " << size << " \r" << flush; } #endif } //if( verbose ) // cout << endl; return true; } #include "tnlSpmvBenchmark_impl.h" #endif /* TNLSPMVBENCHMARK_H_ */