Loading tests/benchmarks/tnl-benchmark-spmv.h +105 −50 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ double computeThroughput( const long int nonzeroElements, template< typename Matrix, typename Vector > void benchmarkHostMatrix( const Matrix& matrix, void benchmarkMatrix( const Matrix& matrix, const Vector& x, Vector& b, const long int nonzeroElements, Loading Loading @@ -160,6 +160,12 @@ void benchmarkHostMatrix( const Matrix& matrix, logFile << " " << throughput << endl; } void writeTestFailed( fstream& logFile ) { logFile << "N/A" << endl; logFile << "N/A" << endl; } template< typename Real > bool setupBenchmark( const tnlParameterContainer& parameters ) { Loading Loading @@ -213,7 +219,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) rowLengthsCuda.setSize( csrMatrix.getRows() ); rowLengthsCuda = rowLengthsHost; #endif benchmarkHostMatrix( csrMatrix, benchmarkMatrix( csrMatrix, hostX, hostB, nonzeroElements, Loading @@ -226,7 +232,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; EllpackMatrixType ellpackMatrix; ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( ellpackMatrix, benchmarkMatrix( ellpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -241,10 +247,12 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) if( ! cudaEllpackMatrix.copyFrom( ellpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; return false; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkHostMatrix( cudaEllpackMatrix, benchmarkMatrix( cudaEllpackMatrix, cudaX, cudaB, nonzeroElements, Loading @@ -252,6 +260,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); } cudaEllpackMatrix.reset(); #endif ellpackMatrix.reset(); Loading @@ -259,7 +268,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType; SlicedEllpackMatrixType slicedEllpackMatrix; slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( slicedEllpackMatrix, benchmarkMatrix( slicedEllpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -267,12 +276,35 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType; SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix; cout << "Copying matrix to GPU... "; if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkMatrix( cudaSlicedEllpackMatrix, cudaX, cudaB, nonzeroElements, "SlicedEllpack Cuda", stopTime, verbose, logFile ); } cudaSlicedEllpackMatrix.reset(); #endif slicedEllpackMatrix.reset(); typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; ChunkedEllpackMatrixType chunkedEllpackMatrix; chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( chunkedEllpackMatrix, benchmarkMatrix( chunkedEllpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -280,6 +312,29 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlChunkedEllpackMatrix< Real, tnlCuda, int > ChunkedEllpackMatrixCudaType; ChunkedEllpackMatrixCudaType cudaChunkedEllpackMatrix; cout << "Copying matrix to GPU... "; if( ! cudaChunkedEllpackMatrix.copyFrom( chunkedEllpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkMatrix( cudaChunkedEllpackMatrix, cudaX, cudaB, nonzeroElements, "ChunkedEllpack Cuda", stopTime, verbose, logFile ); } cudaChunkedEllpackMatrix.reset(); #endif chunkedEllpackMatrix.reset(); } } Loading Loading
tests/benchmarks/tnl-benchmark-spmv.h +105 −50 Original line number Diff line number Diff line Loading @@ -130,7 +130,7 @@ double computeThroughput( const long int nonzeroElements, template< typename Matrix, typename Vector > void benchmarkHostMatrix( const Matrix& matrix, void benchmarkMatrix( const Matrix& matrix, const Vector& x, Vector& b, const long int nonzeroElements, Loading Loading @@ -160,6 +160,12 @@ void benchmarkHostMatrix( const Matrix& matrix, logFile << " " << throughput << endl; } void writeTestFailed( fstream& logFile ) { logFile << "N/A" << endl; logFile << "N/A" << endl; } template< typename Real > bool setupBenchmark( const tnlParameterContainer& parameters ) { Loading Loading @@ -213,7 +219,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) rowLengthsCuda.setSize( csrMatrix.getRows() ); rowLengthsCuda = rowLengthsHost; #endif benchmarkHostMatrix( csrMatrix, benchmarkMatrix( csrMatrix, hostX, hostB, nonzeroElements, Loading @@ -226,7 +232,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) typedef tnlEllpackMatrix< Real, tnlHost, int > EllpackMatrixType; EllpackMatrixType ellpackMatrix; ellpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( ellpackMatrix, benchmarkMatrix( ellpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -241,10 +247,12 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) if( ! cudaEllpackMatrix.copyFrom( ellpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; return false; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkHostMatrix( cudaEllpackMatrix, benchmarkMatrix( cudaEllpackMatrix, cudaX, cudaB, nonzeroElements, Loading @@ -252,6 +260,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); } cudaEllpackMatrix.reset(); #endif ellpackMatrix.reset(); Loading @@ -259,7 +268,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) typedef tnlSlicedEllpackMatrix< Real, tnlHost, int > SlicedEllpackMatrixType; SlicedEllpackMatrixType slicedEllpackMatrix; slicedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( slicedEllpackMatrix, benchmarkMatrix( slicedEllpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -267,12 +276,35 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlSlicedEllpackMatrix< Real, tnlCuda, int > SlicedEllpackMatrixCudaType; SlicedEllpackMatrixCudaType cudaSlicedEllpackMatrix; cout << "Copying matrix to GPU... "; if( ! cudaSlicedEllpackMatrix.copyFrom( slicedEllpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkMatrix( cudaSlicedEllpackMatrix, cudaX, cudaB, nonzeroElements, "SlicedEllpack Cuda", stopTime, verbose, logFile ); } cudaSlicedEllpackMatrix.reset(); #endif slicedEllpackMatrix.reset(); typedef tnlChunkedEllpackMatrix< Real, tnlHost, int > ChunkedEllpackMatrixType; ChunkedEllpackMatrixType chunkedEllpackMatrix; chunkedEllpackMatrix.copyFrom( csrMatrix, rowLengthsHost ); benchmarkHostMatrix( chunkedEllpackMatrix, benchmarkMatrix( chunkedEllpackMatrix, hostX, hostB, nonzeroElements, Loading @@ -280,6 +312,29 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, verbose, logFile ); #ifdef HAVE_CUDA typedef tnlChunkedEllpackMatrix< Real, tnlCuda, int > ChunkedEllpackMatrixCudaType; ChunkedEllpackMatrixCudaType cudaChunkedEllpackMatrix; cout << "Copying matrix to GPU... "; if( ! cudaChunkedEllpackMatrix.copyFrom( chunkedEllpackMatrix, rowLengthsCuda ) ) { cerr << "I am not able to transfer the matrix on GPU." << endl; writeTestFailed( logFile ); } else { cout << " done." << endl; benchmarkMatrix( cudaChunkedEllpackMatrix, cudaX, cudaB, nonzeroElements, "ChunkedEllpack Cuda", stopTime, verbose, logFile ); } cudaChunkedEllpackMatrix.reset(); #endif chunkedEllpackMatrix.reset(); } } Loading