Added cuSPARSE wrapper for ILU(0) (b0cd8f73) · Commits · TNL / tnl-dev

src/TNL/Matrices/CSR.h

+18 −0

Original line number	Diff line number	Diff line
		@@ -231,18 +231,36 @@ public:
		return this->rowPointers;
		}

		Containers::Vector< Index, Device, Index >&
		getRowPointers()
		{
		return this->rowPointers;
		}

		const Containers::Vector< Index, Device, Index >&
		getColumnIndexes() const
		{
		return this->columnIndexes;
		}

		Containers::Vector< Index, Device, Index >&
		getColumnIndexes()
		{
		return this->columnIndexes;
		}

		const Containers::Vector< Real, Device, Index >&
		getValues() const
		{
		return this->values;
		}

		Containers::Vector< Real, Device, Index >&
		getValues()
		{
		return this->values;
		}

		protected:

		Containers::Vector< Index, Device, Index > rowPointers;

src/TNL/Solvers/Linear/Preconditioners/ILU0.h

+114 −0

Original line number	Diff line number	Diff line
		@@ -12,10 +12,16 @@

		#pragma once

		#include <type_traits>

		#include <TNL/Object.h>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Matrices/CSR.h>

		#ifdef HAVE_CUDA
		#include <cusparse.h>
		#endif

		namespace TNL {
		namespace Solvers {
		namespace Linear {
		@@ -50,6 +56,114 @@ protected:
		Matrices::CSR< RealType, DeviceType, IndexType > U;
		};

		template<>
		class ILU0< double, Devices::Cuda, int >
		{
		public:
		using RealType = double;
		using DeviceType = Devices::Cuda;
		using IndexType = int;

		ILU0()
		{
		#ifdef HAVE_CUDA
		cusparseCreate( &handle );
		#endif
		}

		template< typename MatrixPointer >
		void update( const MatrixPointer& matrixPointer );

		template< typename Vector1, typename Vector2 >
		bool solve( const Vector1& b, Vector2& x ) const;

		String getType() const
		{
		return String( "ILU0" );
		}

		~ILU0()
		{
		#ifdef HAVE_CUDA
		resetMatrices();
		cusparseDestroy( handle );
		#endif
		}

		protected:
		#ifdef HAVE_CUDA
		Matrices::CSR< RealType, DeviceType, IndexType > A;
		Containers::Vector< RealType, DeviceType, IndexType > y;

		cusparseHandle_t handle;

		cusparseMatDescr_t descr_A = 0;
		cusparseMatDescr_t descr_L = 0;
		cusparseMatDescr_t descr_U = 0;
		csrilu02Info_t info_A = 0;
		csrsv2Info_t info_L = 0;
		csrsv2Info_t info_U = 0;

		const cusparseSolvePolicy_t policy_A = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
		const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
		const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
		const cusparseOperation_t trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE;
		const cusparseOperation_t trans_U = CUSPARSE_OPERATION_NON_TRANSPOSE;

		Containers::Array< char, DeviceType, int > pBuffer;

		// scaling factor for triangular solves
		const double alpha = 1.0;

		void resetMatrices()
		{
		if( descr_A ) {
		cusparseDestroyMatDescr( descr_A );
		descr_A = 0;
		}
		if( descr_L ) {
		cusparseDestroyMatDescr( descr_L );
		descr_L = 0;
		}
		if( descr_U ) {
		cusparseDestroyMatDescr( descr_U );
		descr_U = 0;
		}
		if( info_A ) {
		cusparseDestroyCsrilu02Info( info_A );
		info_A = 0;
		}
		if( info_L ) {
		cusparseDestroyCsrsv2Info( info_L );
		info_L = 0;
		}
		if( info_U ) {
		cusparseDestroyCsrsv2Info( info_U );
		info_U = 0;
		}
		pBuffer.reset();
		}

		// TODO: extend Matrices::copySparseMatrix accordingly
		template< typename Matrix,
		typename = typename std::enable_if< ! std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type >
		void copyMatrix( const Matrix& matrix )
		{
		typename Matrix::CudaType A_tmp;
		A_tmp = matrix;
		Matrices::copySparseMatrix( A, A_tmp );
		}

		template< typename Matrix,
		typename = typename std::enable_if< std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type,
		typename = void >
		void copyMatrix( const Matrix& matrix )
		{
		Matrices::copySparseMatrix( A, matrix );
		}
		#endif
		};

		} // namespace Preconditioners
		} // namespace Linear
		} // namespace Solvers

src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h

+141 −0

Original line number	Diff line number	Diff line
		@@ -153,6 +153,147 @@ solve( const Vector1& b, Vector2& x ) const
		return true;
		}


		template< typename MatrixPointer >
		void
		ILU0< double, Devices::Cuda, int >::
		update( const MatrixPointer& matrixPointer )
		{
		#ifdef HAVE_CUDA
		// TODO: only numerical factorization has to be done every time, split the rest into separate "setup" method which is called less often
		resetMatrices();

		// Note: the decomposition will be in-place, matrices L and U will have the
		// storage of A
		copyMatrix( *matrixPointer );

		const int m = A.getRows();
		const int nnz = A.getValues().getSize();

		y.setSize( m );

		// create matrix descriptors
		cusparseCreateMatDescr( &descr_A );
		cusparseSetMatIndexBase( descr_A, CUSPARSE_INDEX_BASE_ZERO );
		cusparseSetMatType( descr_A, CUSPARSE_MATRIX_TYPE_GENERAL );

		cusparseCreateMatDescr( &descr_L );
		cusparseSetMatIndexBase( descr_L, CUSPARSE_INDEX_BASE_ZERO );
		cusparseSetMatType( descr_L, CUSPARSE_MATRIX_TYPE_GENERAL );
		cusparseSetMatFillMode( descr_L, CUSPARSE_FILL_MODE_LOWER );
		cusparseSetMatDiagType( descr_L, CUSPARSE_DIAG_TYPE_UNIT );

		cusparseCreateMatDescr( &descr_U);
		cusparseSetMatIndexBase( descr_U, CUSPARSE_INDEX_BASE_ZERO );
		cusparseSetMatType( descr_U, CUSPARSE_MATRIX_TYPE_GENERAL );
		cusparseSetMatFillMode( descr_U, CUSPARSE_FILL_MODE_UPPER );
		cusparseSetMatDiagType( descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT );

		// create info structures
		cusparseCreateCsrilu02Info( &info_A );
		cusparseCreateCsrsv2Info( &info_L );
		cusparseCreateCsrsv2Info( &info_U );

		// query how much memory will be needed in csrilu02 and csrsv2, and allocate the buffer
		int pBufferSize_A, pBufferSize_L, pBufferSize_U;
		cusparseDcsrilu02_bufferSize( handle, m, nnz, descr_A,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_A, &pBufferSize_A );
		cusparseDcsrsv2_bufferSize( handle, trans_L, m, nnz, descr_L,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_L, &pBufferSize_L );
		cusparseDcsrsv2_bufferSize( handle, trans_U, m, nnz, descr_U,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_U, &pBufferSize_U );
		const int pBufferSize = max( pBufferSize_A, max( pBufferSize_L, pBufferSize_U ) );
		pBuffer.setSize( pBufferSize );

		// Symbolic analysis of the incomplete LU decomposition
		cusparseDcsrilu02_analysis( handle, m, nnz, descr_A,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_A, policy_A, pBuffer.getData() );
		int structural_zero;
		cusparseStatus_t
		status = cusparseXcsrilu02_zeroPivot( handle, info_A, &structural_zero );
		if( CUSPARSE_STATUS_ZERO_PIVOT == status ) {
		std::cerr << "A(" << structural_zero << ", " << structural_zero << ") is missing." << std::endl;
		throw 1;
		}

		// Analysis for the triangular solves for L and U
		// Trick: the lower (upper) triangular part of A has the same sparsity
		// pattern as L (U), so we can do the analysis for csrsv2 on the matrix A.
		cusparseDcsrsv2_analysis( handle, trans_L, m, nnz, descr_L,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_L, policy_L, pBuffer.getData() );
		cusparseDcsrsv2_analysis( handle, trans_U, m, nnz, descr_U,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_U, policy_U, pBuffer.getData() );

		// Numerical incomplete LU decomposition
		cusparseDcsrilu02( handle, m, nnz, descr_A,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_A, policy_A, pBuffer.getData() );
		int numerical_zero;
		status = cusparseXcsrilu02_zeroPivot( handle, info_A, &numerical_zero );
		if( CUSPARSE_STATUS_ZERO_PIVOT == status ) {
		std::cerr << "A(" << numerical_zero << ", " << numerical_zero << ") is zero." << std::endl;
		throw 1;
		}
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		template< typename Vector1, typename Vector2 >
		bool
		ILU0< double, Devices::Cuda, int >::
		solve( const Vector1& b, Vector2& x ) const
		{
		#ifdef HAVE_CUDA
		const int m = A.getRows();
		const int nnz = A.getValues().getSize();

		// Step 1: solve y from Ly = b
		cusparseDcsrsv2_solve( handle, trans_L, m, nnz, &alpha, descr_L,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_L,
		b.getData(),
		(RealType*) y.getData(),
		policy_L, (void*) pBuffer.getData() );

		// Step 2: solve x from Ux = y
		cusparseDcsrsv2_solve( handle, trans_U, m, nnz, &alpha, descr_U,
		A.getValues().getData(),
		A.getRowPointers().getData(),
		A.getColumnIndexes().getData(),
		info_U,
		y.getData(),
		x.getData(),
		policy_U, (void*) pBuffer.getData() );

		return true;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		} // namespace Preconditioners
		} // namespace Linear
		} // namespace Solvers

src/TNL/Solvers/SolverStarter_impl.h

+0 −3

Original line number	Diff line number	Diff line
		@@ -294,11 +294,8 @@ class SolverStarterPreconditionerSetter
		return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Dummy, ConfigTag >::run( problem, parameters );
		if( preconditioner == "diagonal" )
		return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Diagonal, ConfigTag >::run( problem, parameters );
		// TODO: implement parallel ILU or device-dependent build config tags for preconditioners
		#ifndef HAVE_CUDA
		if( preconditioner == "ilu0" )
		return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::ILU0, ConfigTag >::run( problem, parameters );
		#endif

		std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0." << std::endl;
		return false;