Loading src/TNL/Matrices/CSR.h +18 −0 Original line number Diff line number Diff line Loading @@ -231,18 +231,36 @@ public: return this->rowPointers; } Containers::Vector< Index, Device, Index >& getRowPointers() { return this->rowPointers; } const Containers::Vector< Index, Device, Index >& getColumnIndexes() const { return this->columnIndexes; } Containers::Vector< Index, Device, Index >& getColumnIndexes() { return this->columnIndexes; } const Containers::Vector< Real, Device, Index >& getValues() const { return this->values; } Containers::Vector< Real, Device, Index >& getValues() { return this->values; } protected: Containers::Vector< Index, Device, Index > rowPointers; Loading src/TNL/Solvers/Linear/Preconditioners/ILU0.h +114 −0 Original line number Diff line number Diff line Loading @@ -12,10 +12,16 @@ #pragma once #include <type_traits> #include <TNL/Object.h> #include <TNL/Containers/Vector.h> #include <TNL/Matrices/CSR.h> #ifdef HAVE_CUDA #include <cusparse.h> #endif namespace TNL { namespace Solvers { namespace Linear { Loading Loading @@ -50,6 +56,114 @@ protected: Matrices::CSR< RealType, DeviceType, IndexType > U; }; template<> class ILU0< double, Devices::Cuda, int > { public: using RealType = double; using DeviceType = Devices::Cuda; using IndexType = int; ILU0() { #ifdef HAVE_CUDA cusparseCreate( &handle ); #endif } template< typename MatrixPointer > void update( const MatrixPointer& matrixPointer ); template< typename Vector1, typename Vector2 > bool solve( const Vector1& b, Vector2& x ) const; String getType() const { return String( "ILU0" ); } ~ILU0() { #ifdef HAVE_CUDA resetMatrices(); cusparseDestroy( handle ); #endif } protected: #ifdef HAVE_CUDA Matrices::CSR< RealType, DeviceType, IndexType > A; Containers::Vector< RealType, DeviceType, IndexType > y; cusparseHandle_t handle; cusparseMatDescr_t descr_A = 0; cusparseMatDescr_t descr_L = 0; cusparseMatDescr_t descr_U = 0; csrilu02Info_t info_A = 0; csrsv2Info_t info_L = 0; csrsv2Info_t info_U = 0; const cusparseSolvePolicy_t policy_A = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseOperation_t trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE; const cusparseOperation_t trans_U = CUSPARSE_OPERATION_NON_TRANSPOSE; Containers::Array< char, DeviceType, int > pBuffer; // scaling factor for triangular solves const double alpha = 1.0; void resetMatrices() { if( descr_A ) { cusparseDestroyMatDescr( descr_A ); descr_A = 0; } if( descr_L ) { cusparseDestroyMatDescr( descr_L ); descr_L = 0; } if( descr_U ) { cusparseDestroyMatDescr( descr_U ); descr_U = 0; } if( info_A ) { cusparseDestroyCsrilu02Info( info_A ); info_A = 0; } if( info_L ) { cusparseDestroyCsrsv2Info( info_L ); info_L = 0; } if( info_U ) { cusparseDestroyCsrsv2Info( info_U ); info_U = 0; } pBuffer.reset(); } // TODO: extend Matrices::copySparseMatrix accordingly template< typename Matrix, typename = typename std::enable_if< ! std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type > void copyMatrix( const Matrix& matrix ) { typename Matrix::CudaType A_tmp; A_tmp = matrix; Matrices::copySparseMatrix( A, A_tmp ); } template< typename Matrix, typename = typename std::enable_if< std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type, typename = void > void copyMatrix( const Matrix& matrix ) { Matrices::copySparseMatrix( A, matrix ); } #endif }; } // namespace Preconditioners } // namespace Linear } // namespace Solvers Loading src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +141 −0 Original line number Diff line number Diff line Loading @@ -153,6 +153,147 @@ solve( const Vector1& b, Vector2& x ) const return true; } template< typename MatrixPointer > void ILU0< double, Devices::Cuda, int >:: update( const MatrixPointer& matrixPointer ) { #ifdef HAVE_CUDA // TODO: only numerical factorization has to be done every time, split the rest into separate "setup" method which is called less often resetMatrices(); // Note: the decomposition will be in-place, matrices L and U will have the // storage of A copyMatrix( *matrixPointer ); const int m = A.getRows(); const int nnz = A.getValues().getSize(); y.setSize( m ); // create matrix descriptors cusparseCreateMatDescr( &descr_A ); cusparseSetMatIndexBase( descr_A, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_A, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseCreateMatDescr( &descr_L ); cusparseSetMatIndexBase( descr_L, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_L, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatFillMode( descr_L, CUSPARSE_FILL_MODE_LOWER ); cusparseSetMatDiagType( descr_L, CUSPARSE_DIAG_TYPE_UNIT ); cusparseCreateMatDescr( &descr_U); cusparseSetMatIndexBase( descr_U, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_U, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatFillMode( descr_U, CUSPARSE_FILL_MODE_UPPER ); cusparseSetMatDiagType( descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT ); // create info structures cusparseCreateCsrilu02Info( &info_A ); cusparseCreateCsrsv2Info( &info_L ); cusparseCreateCsrsv2Info( &info_U ); // query how much memory will be needed in csrilu02 and csrsv2, and allocate the buffer int pBufferSize_A, pBufferSize_L, pBufferSize_U; cusparseDcsrilu02_bufferSize( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, &pBufferSize_A ); cusparseDcsrsv2_bufferSize( handle, trans_L, m, nnz, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, &pBufferSize_L ); cusparseDcsrsv2_bufferSize( handle, trans_U, m, nnz, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, &pBufferSize_U ); const int pBufferSize = max( pBufferSize_A, max( pBufferSize_L, pBufferSize_U ) ); pBuffer.setSize( pBufferSize ); // Symbolic analysis of the incomplete LU decomposition cusparseDcsrilu02_analysis( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int structural_zero; cusparseStatus_t status = cusparseXcsrilu02_zeroPivot( handle, info_A, &structural_zero ); if( CUSPARSE_STATUS_ZERO_PIVOT == status ) { std::cerr << "A(" << structural_zero << ", " << structural_zero << ") is missing." << std::endl; throw 1; } // Analysis for the triangular solves for L and U // Trick: the lower (upper) triangular part of A has the same sparsity // pattern as L (U), so we can do the analysis for csrsv2 on the matrix A. cusparseDcsrsv2_analysis( handle, trans_L, m, nnz, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, policy_L, pBuffer.getData() ); cusparseDcsrsv2_analysis( handle, trans_U, m, nnz, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, policy_U, pBuffer.getData() ); // Numerical incomplete LU decomposition cusparseDcsrilu02( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int numerical_zero; status = cusparseXcsrilu02_zeroPivot( handle, info_A, &numerical_zero ); if( CUSPARSE_STATUS_ZERO_PIVOT == status ) { std::cerr << "A(" << numerical_zero << ", " << numerical_zero << ") is zero." << std::endl; throw 1; } #else throw Exceptions::CudaSupportMissing(); #endif } template< typename Vector1, typename Vector2 > bool ILU0< double, Devices::Cuda, int >:: solve( const Vector1& b, Vector2& x ) const { #ifdef HAVE_CUDA const int m = A.getRows(); const int nnz = A.getValues().getSize(); // Step 1: solve y from Ly = b cusparseDcsrsv2_solve( handle, trans_L, m, nnz, &alpha, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, b.getData(), (RealType*) y.getData(), policy_L, (void*) pBuffer.getData() ); // Step 2: solve x from Ux = y cusparseDcsrsv2_solve( handle, trans_U, m, nnz, &alpha, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, y.getData(), x.getData(), policy_U, (void*) pBuffer.getData() ); return true; #else throw Exceptions::CudaSupportMissing(); #endif } } // namespace Preconditioners } // namespace Linear } // namespace Solvers Loading src/TNL/Solvers/SolverStarter_impl.h +0 −3 Original line number Diff line number Diff line Loading @@ -294,11 +294,8 @@ class SolverStarterPreconditionerSetter return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Dummy, ConfigTag >::run( problem, parameters ); if( preconditioner == "diagonal" ) return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Diagonal, ConfigTag >::run( problem, parameters ); // TODO: implement parallel ILU or device-dependent build config tags for preconditioners #ifndef HAVE_CUDA if( preconditioner == "ilu0" ) return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::ILU0, ConfigTag >::run( problem, parameters ); #endif std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0." << std::endl; return false; Loading Loading
src/TNL/Matrices/CSR.h +18 −0 Original line number Diff line number Diff line Loading @@ -231,18 +231,36 @@ public: return this->rowPointers; } Containers::Vector< Index, Device, Index >& getRowPointers() { return this->rowPointers; } const Containers::Vector< Index, Device, Index >& getColumnIndexes() const { return this->columnIndexes; } Containers::Vector< Index, Device, Index >& getColumnIndexes() { return this->columnIndexes; } const Containers::Vector< Real, Device, Index >& getValues() const { return this->values; } Containers::Vector< Real, Device, Index >& getValues() { return this->values; } protected: Containers::Vector< Index, Device, Index > rowPointers; Loading
src/TNL/Solvers/Linear/Preconditioners/ILU0.h +114 −0 Original line number Diff line number Diff line Loading @@ -12,10 +12,16 @@ #pragma once #include <type_traits> #include <TNL/Object.h> #include <TNL/Containers/Vector.h> #include <TNL/Matrices/CSR.h> #ifdef HAVE_CUDA #include <cusparse.h> #endif namespace TNL { namespace Solvers { namespace Linear { Loading Loading @@ -50,6 +56,114 @@ protected: Matrices::CSR< RealType, DeviceType, IndexType > U; }; template<> class ILU0< double, Devices::Cuda, int > { public: using RealType = double; using DeviceType = Devices::Cuda; using IndexType = int; ILU0() { #ifdef HAVE_CUDA cusparseCreate( &handle ); #endif } template< typename MatrixPointer > void update( const MatrixPointer& matrixPointer ); template< typename Vector1, typename Vector2 > bool solve( const Vector1& b, Vector2& x ) const; String getType() const { return String( "ILU0" ); } ~ILU0() { #ifdef HAVE_CUDA resetMatrices(); cusparseDestroy( handle ); #endif } protected: #ifdef HAVE_CUDA Matrices::CSR< RealType, DeviceType, IndexType > A; Containers::Vector< RealType, DeviceType, IndexType > y; cusparseHandle_t handle; cusparseMatDescr_t descr_A = 0; cusparseMatDescr_t descr_L = 0; cusparseMatDescr_t descr_U = 0; csrilu02Info_t info_A = 0; csrsv2Info_t info_L = 0; csrsv2Info_t info_U = 0; const cusparseSolvePolicy_t policy_A = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseSolvePolicy_t policy_L = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseSolvePolicy_t policy_U = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseOperation_t trans_L = CUSPARSE_OPERATION_NON_TRANSPOSE; const cusparseOperation_t trans_U = CUSPARSE_OPERATION_NON_TRANSPOSE; Containers::Array< char, DeviceType, int > pBuffer; // scaling factor for triangular solves const double alpha = 1.0; void resetMatrices() { if( descr_A ) { cusparseDestroyMatDescr( descr_A ); descr_A = 0; } if( descr_L ) { cusparseDestroyMatDescr( descr_L ); descr_L = 0; } if( descr_U ) { cusparseDestroyMatDescr( descr_U ); descr_U = 0; } if( info_A ) { cusparseDestroyCsrilu02Info( info_A ); info_A = 0; } if( info_L ) { cusparseDestroyCsrsv2Info( info_L ); info_L = 0; } if( info_U ) { cusparseDestroyCsrsv2Info( info_U ); info_U = 0; } pBuffer.reset(); } // TODO: extend Matrices::copySparseMatrix accordingly template< typename Matrix, typename = typename std::enable_if< ! std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type > void copyMatrix( const Matrix& matrix ) { typename Matrix::CudaType A_tmp; A_tmp = matrix; Matrices::copySparseMatrix( A, A_tmp ); } template< typename Matrix, typename = typename std::enable_if< std::is_same< DeviceType, typename Matrix::DeviceType >::value >::type, typename = void > void copyMatrix( const Matrix& matrix ) { Matrices::copySparseMatrix( A, matrix ); } #endif }; } // namespace Preconditioners } // namespace Linear } // namespace Solvers Loading
src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +141 −0 Original line number Diff line number Diff line Loading @@ -153,6 +153,147 @@ solve( const Vector1& b, Vector2& x ) const return true; } template< typename MatrixPointer > void ILU0< double, Devices::Cuda, int >:: update( const MatrixPointer& matrixPointer ) { #ifdef HAVE_CUDA // TODO: only numerical factorization has to be done every time, split the rest into separate "setup" method which is called less often resetMatrices(); // Note: the decomposition will be in-place, matrices L and U will have the // storage of A copyMatrix( *matrixPointer ); const int m = A.getRows(); const int nnz = A.getValues().getSize(); y.setSize( m ); // create matrix descriptors cusparseCreateMatDescr( &descr_A ); cusparseSetMatIndexBase( descr_A, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_A, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseCreateMatDescr( &descr_L ); cusparseSetMatIndexBase( descr_L, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_L, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatFillMode( descr_L, CUSPARSE_FILL_MODE_LOWER ); cusparseSetMatDiagType( descr_L, CUSPARSE_DIAG_TYPE_UNIT ); cusparseCreateMatDescr( &descr_U); cusparseSetMatIndexBase( descr_U, CUSPARSE_INDEX_BASE_ZERO ); cusparseSetMatType( descr_U, CUSPARSE_MATRIX_TYPE_GENERAL ); cusparseSetMatFillMode( descr_U, CUSPARSE_FILL_MODE_UPPER ); cusparseSetMatDiagType( descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT ); // create info structures cusparseCreateCsrilu02Info( &info_A ); cusparseCreateCsrsv2Info( &info_L ); cusparseCreateCsrsv2Info( &info_U ); // query how much memory will be needed in csrilu02 and csrsv2, and allocate the buffer int pBufferSize_A, pBufferSize_L, pBufferSize_U; cusparseDcsrilu02_bufferSize( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, &pBufferSize_A ); cusparseDcsrsv2_bufferSize( handle, trans_L, m, nnz, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, &pBufferSize_L ); cusparseDcsrsv2_bufferSize( handle, trans_U, m, nnz, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, &pBufferSize_U ); const int pBufferSize = max( pBufferSize_A, max( pBufferSize_L, pBufferSize_U ) ); pBuffer.setSize( pBufferSize ); // Symbolic analysis of the incomplete LU decomposition cusparseDcsrilu02_analysis( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int structural_zero; cusparseStatus_t status = cusparseXcsrilu02_zeroPivot( handle, info_A, &structural_zero ); if( CUSPARSE_STATUS_ZERO_PIVOT == status ) { std::cerr << "A(" << structural_zero << ", " << structural_zero << ") is missing." << std::endl; throw 1; } // Analysis for the triangular solves for L and U // Trick: the lower (upper) triangular part of A has the same sparsity // pattern as L (U), so we can do the analysis for csrsv2 on the matrix A. cusparseDcsrsv2_analysis( handle, trans_L, m, nnz, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, policy_L, pBuffer.getData() ); cusparseDcsrsv2_analysis( handle, trans_U, m, nnz, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, policy_U, pBuffer.getData() ); // Numerical incomplete LU decomposition cusparseDcsrilu02( handle, m, nnz, descr_A, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int numerical_zero; status = cusparseXcsrilu02_zeroPivot( handle, info_A, &numerical_zero ); if( CUSPARSE_STATUS_ZERO_PIVOT == status ) { std::cerr << "A(" << numerical_zero << ", " << numerical_zero << ") is zero." << std::endl; throw 1; } #else throw Exceptions::CudaSupportMissing(); #endif } template< typename Vector1, typename Vector2 > bool ILU0< double, Devices::Cuda, int >:: solve( const Vector1& b, Vector2& x ) const { #ifdef HAVE_CUDA const int m = A.getRows(); const int nnz = A.getValues().getSize(); // Step 1: solve y from Ly = b cusparseDcsrsv2_solve( handle, trans_L, m, nnz, &alpha, descr_L, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_L, b.getData(), (RealType*) y.getData(), policy_L, (void*) pBuffer.getData() ); // Step 2: solve x from Ux = y cusparseDcsrsv2_solve( handle, trans_U, m, nnz, &alpha, descr_U, A.getValues().getData(), A.getRowPointers().getData(), A.getColumnIndexes().getData(), info_U, y.getData(), x.getData(), policy_U, (void*) pBuffer.getData() ); return true; #else throw Exceptions::CudaSupportMissing(); #endif } } // namespace Preconditioners } // namespace Linear } // namespace Solvers Loading
src/TNL/Solvers/SolverStarter_impl.h +0 −3 Original line number Diff line number Diff line Loading @@ -294,11 +294,8 @@ class SolverStarterPreconditionerSetter return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Dummy, ConfigTag >::run( problem, parameters ); if( preconditioner == "diagonal" ) return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::Diagonal, ConfigTag >::run( problem, parameters ); // TODO: implement parallel ILU or device-dependent build config tags for preconditioners #ifndef HAVE_CUDA if( preconditioner == "ilu0" ) return SolverStarterSemiImplicitSolverSetter< Problem, SemiImplicitSolverTag, Linear::Preconditioners::ILU0, ConfigTag >::run( problem, parameters ); #endif std::cerr << "Unknown preconditioner " << preconditioner << ". It can be only: none, diagonal, ilu0." << std::endl; return false; Loading