Added another benchmark of matrix setup. (c8bb8d1e) · Commits · TNL / tnl-dev

Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp

+28 −0

Original line number	Diff line number	Diff line
		@@ -17,6 +17,20 @@ void setElement_on_host( const int matrixSize, Matrix& matrix )
		matrix.setElement( i, j, i + j );
		}

		template< typename Matrix >
		void setElement_on_host_and_transfer( const int matrixSize, Matrix& matrix )
		{
		using RealType = typename Matrix::RealType;
		using IndexType = typename Matrix::IndexType;
		using HostMatrix = TNL::Matrices::DenseMatrix< RealType, TNL::Devices::Host, IndexType >;
		HostMatrix hostMatrix( matrixSize, matrixSize );

		for( int j = 0; j < matrixSize; j++ )
		for( int i = 0; i < matrixSize; i++ )
		hostMatrix.setElement( i, j, i + j );
		matrix = hostMatrix;
		}

		template< typename Matrix >
		void setElement_on_device( const int matrixSize, Matrix& matrix )
		{
		@@ -85,6 +99,20 @@ void setupDenseMatrix()
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		if( std::is_same< Device, TNL::Devices::Cuda >::value )
		{
		std::cout << " setElement on host and transfer on GPU: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::DenseMatrix< float, Device, int > matrix;
		setElement_on_host_and_transfer( matrixSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;
		}

		std::cout << " getRow: ";
		timer.reset();
		timer.start();

Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp

+42 −0

Original line number	Diff line number	Diff line
		@@ -47,6 +47,34 @@ void setElement_on_host( const int gridSize, Matrix& matrix )
		}
		}

		template< typename Matrix >
		void setElement_on_host_and_transfer( const int gridSize, Matrix& matrix )
		{
		using RealType = typename Matrix::RealType;
		using IndexType = typename Matrix::IndexType;
		using HostMatrix = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >;
		const int matrixSize = gridSize * gridSize;
		HostMatrix hostMatrix( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) );

		for( int j = 0; j < gridSize; j++ )
		for( int i = 0; i < gridSize; i++ )
		{
		const int rowIdx = j * gridSize + i;
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 )
		hostMatrix.setElement( rowIdx, rowIdx, 1.0 );
		else
		{
		hostMatrix.setElement( rowIdx, rowIdx - gridSize, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx - 1, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx, -4.0 );
		hostMatrix.setElement( rowIdx, rowIdx + 1, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx + gridSize, 1.0 );
		}
		}
		matrix = hostMatrix;
		}


		template< typename Matrix >
		void setElement_on_device( const int gridSize, Matrix& matrix )
		{
		@@ -173,6 +201,20 @@ void laplaceOperatorMultidiagonalMatrix()
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		if( std::is_same< Device, TNL::Devices::Cuda >::value )
		{
		std::cout << " setElement on host and transfer on GPU: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix;
		setElement_on_host_and_transfer( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;
		}

		std::cout << " setElement on device: ";
		timer.reset();
		timer.start();

Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp

+43 −0

Original line number	Diff line number	Diff line
		@@ -65,6 +65,35 @@ void setElement_on_host( const int gridSize, Matrix& matrix )
		}
		}

		template< typename Matrix >
		void setElement_on_host_and_transfer( const int gridSize, Matrix& matrix )
		{
		using RealType = typename Matrix::RealType;
		using HostMatrix = typename Matrix::Self< RealType, TNL::Devices::Host >;

		const int matrixSize = gridSize * gridSize;
		TNL::Containers::Vector< int, typename HostMatrix::DeviceType, int > rowCapacities( matrixSize, 5 );
		HostMatrix hostMatrix( matrixSize, matrixSize );
		hostMatrix.setRowCapacities( rowCapacities );

		for( int j = 0; j < gridSize; j++ )
		for( int i = 0; i < gridSize; i++ )
		{
		const int rowIdx = j * gridSize + i;
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 )
		hostMatrix.setElement( rowIdx, rowIdx, 1.0 );
		else
		{
		hostMatrix.setElement( rowIdx, rowIdx - gridSize, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx - 1, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx, -4.0 );
		hostMatrix.setElement( rowIdx, rowIdx + 1, 1.0 );
		hostMatrix.setElement( rowIdx, rowIdx + gridSize, 1.0 );
		}
		}
		matrix = hostMatrix;
		}

		template< typename Matrix >
		void setElement_on_device( const int gridSize, Matrix& matrix )
		{
		@@ -208,6 +237,20 @@ void laplaceOperatorSparseMatrix()
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		if( std::is_same< Device, TNL::Devices::Cuda >::value )
		{
		std::cout << " setElement on host and transfer on GPU: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::SparseMatrix< float, Device, int > matrix;
		setElement_on_host_and_transfer( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;
		}

		std::cout << " setElement on device: ";
		timer.reset();
		timer.start();