Loading tests/benchmarks/tnl-benchmark-spmv.h +69 −15 Original line number Diff line number Diff line Loading @@ -29,7 +29,9 @@ #include <config/tnlParameterContainer.h> #include <matrices/tnlCSRMatrix.h> #include <matrices/tnlEllpackMatrix.h> #include <matrices/tnlEllpackSymMatrix.h> #include <matrices/tnlSlicedEllpackMatrix.h> #include <matrices/tnlSlicedEllpackSymMatrix.h> #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlMatrixReader.h> #include <matrices/tnlBiEllpackMatrix.h> Loading Loading @@ -545,7 +547,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) long int allocatedElements; double padding; /* typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; EllpackMatrixType ellpackMatrix; if( ! ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) writeTestFailed( logFile, 7 ); Loading Loading @@ -590,29 +592,81 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) ellpackMatrix.reset(); } typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType; SlicedEllpackMatrixType slicedEllpackMatrix; if( ! slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) typedef tnlEllpackSymMatrix< Real, tnlHost, int > EllpackSmMatrixType; EllpackSymMatrixType ellpackSymMatrix; if( ! tnlMatrixReader< ellpackSymMatrix >::readMtxFile( file, matrix, verbose, sym ) ) writeTestFailed( logFile, 7 ); else { allocatedElements = slicedEllpackMatrix.getNumberOfMatrixElements(); allocatedElements = ellpackSymMatrix.getNumberOfMatrixElements(); padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0; logFile << " " << padding << endl; benchmarkMatrix( slicedEllpackMatrix, benchmarkMatrix( ellpackSymMatrix, hostX, hostB, nonzeroElements, "SlicedEllpack Host", "EllpackSym Host", stopTime, baseline, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType; SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix; typedef tnlEllpackSymMatrix< Real, tnlCuda, int > EllpackSymMatrixCudaType; EllpackSymMatrixCudaType cudaEllpackSymMatrix; cout << "Copying matrix to GPU... "; if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) ) for( int i = 0; i < rowLengthsHost.getSize(); i++ ) rowLengthsHost[ i ] = ellpackSymMatrix.getRowLength( i ); rowLengthsCuda = rowLengthsHost; if( ! cudaEllpackMatrix.copyFrom( ellpackSymMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile, 3 ); } else { cout << " done. \r"; benchmarkMatrix( cudaEllpackSymMatrix, cudaX, cudaB, nonzeroElements, "EllpackSym Cuda", stopTime, baseline, verbose, logFile ); } cudaEllpackSymMatrix.reset(); #endif ellpackSymMatrix.reset(); } typedef tnlSlicedEllpackSymMatrix< Real, tnlHost, int > SlicedEllpackSymMatrixType; SlicedEllpackSymMatrixType slicedEllpackSymMatrix; if( ! tnlMatrixReader< slicedEllpackSymMatrix >::readMtxFile( file, matrix, verbose, sym ) ) writeTestFailed( logFile, 7 ); else { allocatedElements = slicedEllpackSymMatrix.getNumberOfMatrixElements(); padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0; logFile << " " << padding << endl; benchmarkMatrix( slicedEllpackSymMatrix, hostX, hostB, nonzeroElements, "SlicedEllpackSym Host", stopTime, baseline, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackSymMatrix< Real, tnlCuda, int > SlicedEllpackSymMatrixCudaType; SlicedEllpackSymMatrixCudaType cudaSlicedEllpackSymMatrix; cout << "Copying matrix to GPU... "; for( int i = 0; i < rowLengthsHost.getSize(); i++ ) rowLengthsHost[ i ] = slicedEllpackSymMatrix.getRowLength( i ); if( ! cudaSlicedEllpackSymMatrix.copyFrom( slicedEllpackSymMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile, 3 ); Loading @@ -620,22 +674,22 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) else { cout << " done. \r"; benchmarkMatrix( cudaSlicedEllpackMatrix, benchmarkMatrix( cudaSlicedEllpackSymMatrix, cudaX, cudaB, nonzeroElements, "SlicedEllpack Cuda", "SlicedEllpackSym Cuda", stopTime, baseline, verbose, logFile ); } cudaSlicedEllpackMatrix.reset(); cudaSlicedEllpackSymMatrix.reset(); #endif slicedEllpackMatrix.reset(); slicedEllpackSymMatrix.reset(); } typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; /* typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; ChunkedEllpackMatrixType chunkedEllpackMatrix; if( ! chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) writeTestFailed( logFile, 7 ); Loading Loading
tests/benchmarks/tnl-benchmark-spmv.h +69 −15 Original line number Diff line number Diff line Loading @@ -29,7 +29,9 @@ #include <config/tnlParameterContainer.h> #include <matrices/tnlCSRMatrix.h> #include <matrices/tnlEllpackMatrix.h> #include <matrices/tnlEllpackSymMatrix.h> #include <matrices/tnlSlicedEllpackMatrix.h> #include <matrices/tnlSlicedEllpackSymMatrix.h> #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlMatrixReader.h> #include <matrices/tnlBiEllpackMatrix.h> Loading Loading @@ -545,7 +547,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) long int allocatedElements; double padding; /* typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; EllpackMatrixType ellpackMatrix; if( ! ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) writeTestFailed( logFile, 7 ); Loading Loading @@ -590,29 +592,81 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) ellpackMatrix.reset(); } typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType; SlicedEllpackMatrixType slicedEllpackMatrix; if( ! slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) typedef tnlEllpackSymMatrix< Real, tnlHost, int > EllpackSmMatrixType; EllpackSymMatrixType ellpackSymMatrix; if( ! tnlMatrixReader< ellpackSymMatrix >::readMtxFile( file, matrix, verbose, sym ) ) writeTestFailed( logFile, 7 ); else { allocatedElements = slicedEllpackMatrix.getNumberOfMatrixElements(); allocatedElements = ellpackSymMatrix.getNumberOfMatrixElements(); padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0; logFile << " " << padding << endl; benchmarkMatrix( slicedEllpackMatrix, benchmarkMatrix( ellpackSymMatrix, hostX, hostB, nonzeroElements, "SlicedEllpack Host", "EllpackSym Host", stopTime, baseline, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType; SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix; typedef tnlEllpackSymMatrix< Real, tnlCuda, int > EllpackSymMatrixCudaType; EllpackSymMatrixCudaType cudaEllpackSymMatrix; cout << "Copying matrix to GPU... "; if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) ) for( int i = 0; i < rowLengthsHost.getSize(); i++ ) rowLengthsHost[ i ] = ellpackSymMatrix.getRowLength( i ); rowLengthsCuda = rowLengthsHost; if( ! cudaEllpackMatrix.copyFrom( ellpackSymMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile, 3 ); } else { cout << " done. \r"; benchmarkMatrix( cudaEllpackSymMatrix, cudaX, cudaB, nonzeroElements, "EllpackSym Cuda", stopTime, baseline, verbose, logFile ); } cudaEllpackSymMatrix.reset(); #endif ellpackSymMatrix.reset(); } typedef tnlSlicedEllpackSymMatrix< Real, tnlHost, int > SlicedEllpackSymMatrixType; SlicedEllpackSymMatrixType slicedEllpackSymMatrix; if( ! tnlMatrixReader< slicedEllpackSymMatrix >::readMtxFile( file, matrix, verbose, sym ) ) writeTestFailed( logFile, 7 ); else { allocatedElements = slicedEllpackSymMatrix.getNumberOfMatrixElements(); padding = ( double ) allocatedElements / ( double ) nonzeroElements * 100.0 - 100.0; logFile << " " << padding << endl; benchmarkMatrix( slicedEllpackSymMatrix, hostX, hostB, nonzeroElements, "SlicedEllpackSym Host", stopTime, baseline, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackSymMatrix< Real, tnlCuda, int > SlicedEllpackSymMatrixCudaType; SlicedEllpackSymMatrixCudaType cudaSlicedEllpackSymMatrix; cout << "Copying matrix to GPU... "; for( int i = 0; i < rowLengthsHost.getSize(); i++ ) rowLengthsHost[ i ] = slicedEllpackSymMatrix.getRowLength( i ); if( ! cudaSlicedEllpackSymMatrix.copyFrom( slicedEllpackSymMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile, 3 ); Loading @@ -620,22 +674,22 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) else { cout << " done. \r"; benchmarkMatrix( cudaSlicedEllpackMatrix, benchmarkMatrix( cudaSlicedEllpackSymMatrix, cudaX, cudaB, nonzeroElements, "SlicedEllpack Cuda", "SlicedEllpackSym Cuda", stopTime, baseline, verbose, logFile ); } cudaSlicedEllpackMatrix.reset(); cudaSlicedEllpackSymMatrix.reset(); #endif slicedEllpackMatrix.reset(); slicedEllpackSymMatrix.reset(); } typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; /* typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; ChunkedEllpackMatrixType chunkedEllpackMatrix; if( ! chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ) ) writeTestFailed( logFile, 7 ); Loading