Added matrix setup bechmarks for dense and multidiagonal matrix. (f5010d55) · Commits · TNL / tnl-dev

Documentation/Tutorials/Matrices/CMakeLists.txt

+29 −8

Original line number	Diff line number	Diff line
		@@ -94,15 +94,36 @@ IF( BUILD_CUDA )
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out
		OUTPUT SparseMatrixViewExample_setElement.out )

		CUDA_ADD_EXECUTABLE( MatrixSetup_Benchmark_cuda MatrixSetup_Benchmark.cu )
		ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark_cuda >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out
		OUTPUT MatrixSetup_Benchmark.out )
		CUDA_ADD_EXECUTABLE( DenseMatrixSetup_Benchmark_cuda DenseMatrixSetup_Benchmark.cu )
		ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark_cuda >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out
		OUTPUT DenseMatrixSetup_Benchmark.out )

		CUDA_ADD_EXECUTABLE( SparseMatrixSetup_Benchmark_cuda SparseMatrixSetup_Benchmark.cu )
		ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark_cuda >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out
		OUTPUT SparseMatrixSetup_Benchmark.out )

		CUDA_ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark_cuda MultidiagonalMatrixSetup_Benchmark.cu )
		ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark_cuda >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out
		OUTPUT MultidiagonalMatrixSetup_Benchmark.out )

		ELSE()
		ADD_EXECUTABLE( MatrixSetup_Benchmark MatrixSetup_Benchmark_cuda.cpp )
		ADD_CUSTOM_COMMAND( COMMAND MatrixSetup_Benchmark >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MatrixSetup_Benchmark.out
		OUTPUT MatrixSetup_Benchmark.out )
		ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark_cuda.cpp )
		ADD_CUSTOM_COMMAND( COMMAND DenseMatrixSetup_Benchmark >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixSetup_Benchmark.out
		OUTPUT DenseMatrixSetup_Benchmark.out )

		ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark_cuda.cpp )
		ADD_CUSTOM_COMMAND( COMMAND SparseMatrixSetup_Benchmark >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixSetup_Benchmark.out
		OUTPUT SparseMatrixSetup_Benchmark.out )

		ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark_cuda.cpp )
		ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixSetup_Benchmark >
		${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixSetup_Benchmark.out
		OUTPUT MultidiagonalMatrixSetup_Benchmark.out )
		ENDIF()

		IF( BUILD_CUDA )

Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cpp

0 → 100644

+123 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <TNL/Algorithms/ParallelFor.h>
		#include <TNL/Matrices/SparseMatrix.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Timer.h>

		const int testsCount = 5;

		template< typename Matrix >
		void setElement_on_host( const int matrixSize, Matrix& matrix )
		{
		matrix.setDimensions( matrixSize, matrixSize );

		for( int j = 0; j < matrixSize; j++ )
		for( int i = 0; i < matrixSize; i++ )
		matrix.setElement( i, j, i + j );
		}

		template< typename Matrix >
		void setElement_on_device( const int matrixSize, Matrix& matrix )
		{
		matrix.setDimensions( matrixSize, matrixSize );

		auto matrixView = matrix.getView();
		auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
		matrixView.setElement( i, j, i + j );
		};
		TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, matrixSize, matrixSize, f );
		}

		template< typename Matrix >
		void getRow( const int matrixSize, Matrix& matrix )
		{
		matrix.setDimensions( matrixSize, matrixSize );

		auto matrixView = matrix.getView();
		auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
		auto row = matrixView.getRow( rowIdx );
		for( int i = 0; i < matrixSize; i++ )
		row.setElement( i, rowIdx + i );
		};
		TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f );
		}

		template< typename Matrix >
		void forRows( const int matrixSize, Matrix& matrix )
		{
		matrix.setDimensions( matrixSize, matrixSize );

		auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, float& value, bool& compute ) mutable {
		value = rowIdx + columnIdx;
		};
		matrix.forRows( 0, matrixSize, f );
		}

		template< typename Device >
		void setupDenseMatrix()
		{
		std::cout << " Dense matrix test:" << std::endl;
		for( int matrixSize = 16; matrixSize <= 8192; matrixSize *= 2 )
		{
		std::cout << " Matrix size = " << matrixSize << std::endl;
		TNL::Timer timer;

		std::cout << " setElement on host: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::DenseMatrix< float, Device, int > matrix;
		setElement_on_host( matrixSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " setElement on device: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::DenseMatrix< float, Device, int > matrix;
		setElement_on_device( matrixSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " getRow: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::DenseMatrix< float, Device, int > matrix;
		getRow( matrixSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " forRows: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::DenseMatrix< float, Device, int > matrix;
		forRows( matrixSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;
		}
		}


		int main( int argc, char* argv[] )
		{
		std::cout << "Creating dense matrix on CPU ... " << std::endl;
		setupDenseMatrix< TNL::Devices::Host >();


		#ifdef HAVE_CUDA
		std::cout << "Creating dense matrix on CUDA GPU ... " << std::endl;
		setupDenseMatrix< TNL::Devices::Cuda >();
		#endif
		}

Documentation/Tutorials/Matrices/DenseMatrixSetup_Benchmark.cu

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		DenseMatrixSetup_Benchmark.cpp
		No newline at end of file

Documentation/Tutorials/Matrices/MatrixSetup_Benchmark.cu

deleted120000 → 0

+0 −1

Original line number	Diff line number	Diff line
		MatrixSetup_Benchmark.cpp
		No newline at end of file

Documentation/Tutorials/Matrices/MultidiagonalMatrixSetup_Benchmark.cpp

0 → 100644

+221 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <TNL/Algorithms/ParallelFor.h>
		#include <TNL/Matrices/MultidiagonalMatrix.h>
		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Timer.h>

		const int testsCount = 5;

		template< typename Device >
		TNL::Containers::Vector< int, Device > getOffsets( const int gridSize )
		{
		TNL::Containers::Vector< int, Device > offsets( 5 );
		offsets.setElement( 0, -gridSize );
		offsets.setElement( 1, -1 );
		offsets.setElement( 2, 0 );
		offsets.setElement( 3, 1 );
		offsets.setElement( 4, gridSize );
		return offsets;
		}

		template< typename Matrix >
		void setElement_on_host( const int gridSize, Matrix& matrix )
		{
		/***
		* Set matrix representing approximation of the Laplace operator on regular
		* grid using the finite difference method by means setElement method called
		* from the host system.
		*/
		const int matrixSize = gridSize * gridSize;
		matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) );

		for( int j = 0; j < gridSize; j++ )
		for( int i = 0; i < gridSize; i++ )
		{
		const int rowIdx = j * gridSize + i;
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 )
		matrix.setElement( rowIdx, rowIdx, 1.0 );
		else
		{
		matrix.setElement( rowIdx, rowIdx - gridSize, 1.0 );
		matrix.setElement( rowIdx, rowIdx - 1, 1.0 );
		matrix.setElement( rowIdx, rowIdx, -4.0 );
		matrix.setElement( rowIdx, rowIdx + 1, 1.0 );
		matrix.setElement( rowIdx, rowIdx + gridSize, 1.0 );
		}
		}
		}

		template< typename Matrix >
		void setElement_on_device( const int gridSize, Matrix& matrix )
		{
		/***
		* Set matrix representing approximation of the Laplace operator on regular
		* grid using the finite difference method by means of setElement method called
		* from the native device.
		*/
		const int matrixSize = gridSize * gridSize;
		matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) );

		auto matrixView = matrix.getView();
		auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
		const int rowIdx = j * gridSize + i;
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 )
		matrixView.setElement( rowIdx, rowIdx, 1.0 );
		else
		{
		matrixView.setElement( rowIdx, rowIdx - gridSize, 1.0 );
		matrixView.setElement( rowIdx, rowIdx - 1, 1.0 );
		matrixView.setElement( rowIdx, rowIdx, -4.0 );
		matrixView.setElement( rowIdx, rowIdx + 1, 1.0 );
		matrixView.setElement( rowIdx, rowIdx + gridSize, 1.0 );
		}
		};
		TNL::Algorithms::ParallelFor2D< typename Matrix::DeviceType >::exec( 0, 0, gridSize, gridSize, f );
		}

		template< typename Matrix >
		void getRow( const int gridSize, Matrix& matrix )
		{
		/***
		* Set matrix representing approximation of the Laplace operator on regular
		* grid using the finite difference method by means of getRow method.
		*/
		const int matrixSize = gridSize * gridSize;
		matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) );

		auto matrixView = matrix.getView();
		auto f = [=] __cuda_callable__ ( int rowIdx ) mutable {
		const int i = rowIdx % gridSize;
		const int j = rowIdx / gridSize;
		auto row = matrixView.getRow( rowIdx );
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 )
		row.setElement( 2, 1.0 );
		else
		{
		row.setElement( 0, 1.0 );
		row.setElement( 1, 1.0 );
		row.setElement( 2, -4.0 );
		row.setElement( 3, 1.0 );
		row.setElement( 4, 1.0 );
		}
		};
		TNL::Algorithms::ParallelFor< typename Matrix::DeviceType >::exec( 0, matrixSize, f );
		}

		template< typename Matrix >
		void forRows( const int gridSize, Matrix& matrix )
		{
		/***
		* Set matrix representing approximation of the Laplace operator on regular
		* grid using the finite difference method by means of forRows method.
		*/

		const int matrixSize = gridSize * gridSize;
		matrix.setDimensions( matrixSize, matrixSize, getOffsets< typename Matrix::DeviceType >( gridSize ) );

		auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, float& value, bool& compute ) mutable {
		const int i = rowIdx % gridSize;
		const int j = rowIdx / gridSize;
		if( i == 0 \|\| j == 0 \|\| i == gridSize - 1 \|\| j == gridSize - 1 && localIdx == 0 )
		{
		columnIdx = rowIdx;
		value = 1.0;
		}
		else
		{
		switch( localIdx )
		{
		case 0:
		columnIdx = rowIdx - gridSize;
		value = 1.0;
		break;
		case 1:
		columnIdx = rowIdx - 1;
		value = 1.0;
		break;
		case 2:
		columnIdx = rowIdx;
		value = -4.0;
		break;
		case 3:
		columnIdx = rowIdx + 1;
		value = 1.0;
		break;
		case 4:
		columnIdx = rowIdx + gridSize;
		value = 1.0;
		break;
		}
		}
		};
		matrix.forRows( 0, matrixSize, f );
		}

		template< typename Device >
		void laplaceOperatorMultidiagonalMatrix()
		{
		std::cout << " Sparse matrix test:" << std::endl;
		for( int gridSize = 16; gridSize <= 8192; gridSize *= 2 )
		{
		std::cout << " Grid size = " << gridSize << std::endl;
		TNL::Timer timer;

		std::cout << " setElement on host: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix;
		setElement_on_host( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " setElement on device: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix;
		setElement_on_device( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " getRow: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix;
		getRow( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		std::cout << " forRows: ";
		timer.reset();
		timer.start();
		for( int i = 0; i < testsCount; i++ )
		{
		TNL::Matrices::MultidiagonalMatrix< float, Device, int > matrix;
		forRows( gridSize, matrix );
		}
		timer.stop();
		std::cout << timer.getRealTime() / ( double ) testsCount << " sec." << std::endl;

		}
		}

		int main( int argc, char* argv[] )
		{
		std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl;
		laplaceOperatorMultidiagonalMatrix< TNL::Devices::Host >();

		#ifdef HAVE_CUDA
		std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl;
		laplaceOperatorMultidiagonalMatrix< TNL::Devices::Cuda >();
		#endif
		}