Implemented rought version of SpMV Benchmark for mtx files. (5f62fb7f) · Commits · TNL / tnl-dev

src/Benchmarks/SpMV/spmv.h

+75 −105

Original line number	Diff line number	Diff line
		@@ -31,65 +31,20 @@ template< typename Real, typename Device, typename Index >
		using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >;

		template< typename Matrix >
		int setHostTestMatrix( Matrix& matrix,
		const int elementsPerRow )
		void printMatrixInfo( const String& inputFileName,
		const Matrix& matrix,
		std::ostream& str )
		{
		const int size = matrix.getRows();
		int elements( 0 );
		for( int row = 0; row < size; row++ ) {
		int col = row - elementsPerRow / 2;
		for( int element = 0; element < elementsPerRow; element++ ) {
		if( col + element >= 0 &&
		col + element < size )
		{
		matrix.setElement( row, col + element, element + 1 );
		elements++;
		}
		}
		}
		return elements;
		}

		#ifdef HAVE_CUDA
		template< typename Matrix >
		__global__ void setCudaTestMatrixKernel( Matrix* matrix,
		const int elementsPerRow,
		const int gridIdx )
		{
		const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
		if( rowIdx >= matrix->getRows() )
		return;
		int col = rowIdx - elementsPerRow / 2;
		for( int element = 0; element < elementsPerRow; element++ ) {
		if( col + element >= 0 &&
		col + element < matrix->getColumns() )
		matrix->setElementFast( rowIdx, col + element, element + 1 );
		// Get only the name of the format from getType().
		std::string mtrxFullType = matrix.getType();
		std::string mtrxType = mtrxFullType.substr(0, mtrxFullType.find("<"));
		std::string type = mtrxType.substr(mtrxType.find(':') + 2);

		str << "\n Format: " << type << std::endl;
		str << " Rows: " << matrix.getRows() << std::endl;
		str << " Cols: " << matrix.getColumns() << std::endl;
		str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
		}
		}
		#endif

		template< typename Matrix >
		void setCudaTestMatrix( Matrix& matrix,
		const int elementsPerRow )
		{
		#ifdef HAVE_CUDA
		typedef typename Matrix::IndexType IndexType;
		typedef typename Matrix::RealType RealType;
		Pointers::DevicePointer< Matrix > kernel_matrix( matrix );
		dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
		const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
		const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
		for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) {
		if( gridIdx == cudaGrids - 1 )
		cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
		setCudaTestMatrixKernel< Matrix >
		<<< cudaGridSize, cudaBlockSize >>>
		( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx );
		TNL_CHECK_CUDA_DEVICE;
		}
		#endif
		}


		// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
		template< typename Real,
		@@ -109,21 +64,37 @@ benchmarkSpMV( Benchmark & benchmark,
		HostVector hostVector, hostVector2;
		CudaVector deviceVector, deviceVector2;

		try
		{
		if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix ) )
		{
		std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
		else
		return false;
		}
		}
		catch( std::bad_alloc )
		{
		std::cerr << "Not enough memory to read the matrix." << std::endl;
		return false;
		}
		printMatrixInfo( inputFileName, hostMatrix, std::cout );
		#ifdef HAVE_CUDA
		if( ! MatrixReader< DeviceMatrix >::readMtxFile(inputFileName, deviceMatrix ) )
		std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
		// FIXME: This doesn't work for ChunkedEllpack, because
		// its cross-device assignment is not implemented yet.
		deviceMatrix = hostMatrix;
		#endif

		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "rows", convertToString( hostMatrix.getRows() ) },
		{ "columns", convertToString( hostMatrix.getColumns() ) }
		} ));

		hostVector.setSize( hostMatrix.getColumns() );
		hostVector2.setSize( hostMatrix.getRows() );

		#ifdef HAVE_CUDA
		deviceVector.setSize( deviceMatrix.getColumns() );
		deviceVector2.setSize( deviceMatrix.getRows() );
		deviceVector.setSize( hostMatrix.getColumns() );
		deviceVector2.setSize( hostMatrix.getRows() );
		#endif

		// reset function
		@@ -155,7 +126,6 @@ benchmarkSpMV( Benchmark & benchmark,
		#endif
		return true;
		}
		}

		template< typename Real = double,
		typename Index = int >
		@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
		bool result = true;
		// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
		result \|= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName );
		// result \|= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, size, elementsPerRow );
		// result \|= benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
		// result \|= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, size, elementsPerRow );
		result \|= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName );
		result \|= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName );
		// result \|= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName );
		return result;
		}

src/Benchmarks/SpMV/tnl-benchmark-spmv.h

+10 −31

Original line number	Diff line number	Diff line
		@@ -43,41 +43,25 @@ runSpMVBenchmarks( Benchmark & benchmark,
		Benchmark::MetadataMap metadata,
		const String & inputFileName )
		{
		// DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)

		typedef Matrices::CSR< Real, Devices::Host, int > CSRType;
		CSRType csrMatrix;

		if( ! MatrixReader< CSRType >::readMtxFile( inputFileName, csrMatrix ) )
		std::cerr << "I am not able to read the matrix file " << inputFileName << "." << std::endl;
		else
		{
		const std::size_t rows = csrMatrix.getRows();
		const std::size_t cols = csrMatrix.getColumns();
		const String precision = getType< Real >();
		metadata["precision"] = precision;

		// Sparse matrix-vector multiplication
		benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")",
		metadata );
		benchmark.setMetadataColumns( Benchmark::MetadataColumns({
		{ "rows", convertToString( rows ) },
		{ "columns", convertToString( cols ) }
		} ));
		benchmarkSpmvSynthetic< Real >( benchmark, inputFileName );
		}
		}

		void
		setupConfig( Config::ConfigDescription & config )
		{
		config.addDelimiter( "Benchmark settings:" );
		config.addRequiredEntry< String >( "input-file", "Input file name." );
		config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-blas.log");
		config.addEntry< String >( "log-file", "Log file name.", "tnl-benchmark-spmv.log");
		config.addEntry< String >( "output-mode", "Mode for opening the log file.", "overwrite" );
		config.addEntryEnum( "append" );
		config.addEntryEnum( "overwrite" );
		config.addEntry< String >( "precision", "Precision of the arithmetics.", "double" );
		config.addEntry< String >( "precision", "Precision of the arithmetics.", "all" );
		config.addEntryEnum( "float" );
		config.addEntryEnum( "double" );
		config.addEntryEnum( "all" );
		@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
		const String & logFileName = parameters.getParameter< String >( "log-file" );
		const String & outputMode = parameters.getParameter< String >( "output-mode" );
		const String & precision = parameters.getParameter< String >( "precision" );
		// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
		// which have a default value. The workaround below works for int values, but it is not possible
		// to pass 64-bit integer values
		// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
		// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
		const int loops = parameters.getParameter< int >( "loops" );
		const int verbose = parameters.getParameter< int >( "verbose" );

		@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
		return EXIT_FAILURE;
		}

		std::cout << "== BENCHMARK FINISHED ==" << std::endl;
		std::cout << "\n== BENCHMARK FINISHED ==" << std::endl;
		return EXIT_SUCCESS;
		}