Loading src/TNL/Solvers/Linear/Preconditioners/ILU0.h +3 −3 Original line number Diff line number Diff line Loading @@ -20,7 +20,6 @@ #include <TNL/Exceptions/NotImplementedError.h> #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE) #include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h> #include <cusparse.h> #endif Loading Loading @@ -77,7 +76,8 @@ public: protected: // The factors L and U are stored separately and the rows of U are reversed. Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U; using CSR = Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >; CSR L, U; // Specialized methods to distinguish between normal and distributed matrices // in the implementation. Loading Loading @@ -136,7 +136,7 @@ public: protected: #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE) using CSR = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< RealType, DeviceType, IndexType >; using CSR = Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >; Pointers::UniquePointer< CSR > A, L, U; Containers::Vector< RealType, DeviceType, IndexType > y; Loading src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +11 −11 Original line number Diff line number Diff line Loading @@ -203,17 +203,17 @@ update( const MatrixPointer& matrixPointer ) int pBufferSize_A, pBufferSize_L, pBufferSize_U; cusparseDcsrilu02_bufferSize( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, &pBufferSize_A ); cusparseDcsrsv2_bufferSize( handle, trans_L, N, nnz_L, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, &pBufferSize_L ); cusparseDcsrsv2_bufferSize( handle, trans_U, N, nnz_U, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, &pBufferSize_U ); TNL_CHECK_CUDA_DEVICE; Loading @@ -223,7 +223,7 @@ update( const MatrixPointer& matrixPointer ) // Symbolic analysis of the incomplete LU decomposition cusparseDcsrilu02_analysis( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int structural_zero; Loading @@ -240,19 +240,19 @@ update( const MatrixPointer& matrixPointer ) // pattern as L (U), so we can do the analysis for csrsv2 on the matrix A. // cusparseDcsrsv2_analysis( handle, trans_L, N, nnz_A, descr_L, // A->getValues().getData(), // A->getRowPointers().getData(), // A->getSegments().getOffsets().getData(), // A->getColumnIndexes().getData(), // info_L, policy_L, pBuffer.getData() ); // cusparseDcsrsv2_analysis( handle, trans_U, N, nnz_A, descr_U, // A->getValues().getData(), // A->getRowPointers().getData(), // A->getSegments().getOffsets().getData(), // A->getColumnIndexes().getData(), // info_U, policy_U, pBuffer.getData() ); // Numerical incomplete LU decomposition cusparseDcsrilu02( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int numerical_zero; Loading @@ -269,12 +269,12 @@ update( const MatrixPointer& matrixPointer ) // Analysis for the triangular solves for L and U cusparseDcsrsv2_analysis( handle, trans_L, N, nnz_L, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, policy_L, pBuffer.getData() ); cusparseDcsrsv2_analysis( handle, trans_U, N, nnz_U, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, policy_U, pBuffer.getData() ); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -386,7 +386,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const // Step 1: solve y from Ly = b cusparseDcsrsv2_solve( handle, trans_L, N, nnz_L, &alpha, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, b.getData(), Loading @@ -396,7 +396,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const // Step 2: solve x from Ux = y cusparseDcsrsv2_solve( handle, trans_U, N, nnz_U, &alpha, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, y.getData(), Loading Loading
src/TNL/Solvers/Linear/Preconditioners/ILU0.h +3 −3 Original line number Diff line number Diff line Loading @@ -20,7 +20,6 @@ #include <TNL/Exceptions/NotImplementedError.h> #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE) #include <Benchmarks/SpMV/ReferenceFormats/Legacy/CSR.h> #include <cusparse.h> #endif Loading Loading @@ -77,7 +76,8 @@ public: protected: // The factors L and U are stored separately and the rows of U are reversed. Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U; using CSR = Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >; CSR L, U; // Specialized methods to distinguish between normal and distributed matrices // in the implementation. Loading Loading @@ -136,7 +136,7 @@ public: protected: #if defined(HAVE_CUDA) && defined(HAVE_CUSPARSE) using CSR = Benchmarks::SpMV::ReferenceFormats::Legacy::CSR< RealType, DeviceType, IndexType >; using CSR = Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >; Pointers::UniquePointer< CSR > A, L, U; Containers::Vector< RealType, DeviceType, IndexType > y; Loading
src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +11 −11 Original line number Diff line number Diff line Loading @@ -203,17 +203,17 @@ update( const MatrixPointer& matrixPointer ) int pBufferSize_A, pBufferSize_L, pBufferSize_U; cusparseDcsrilu02_bufferSize( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, &pBufferSize_A ); cusparseDcsrsv2_bufferSize( handle, trans_L, N, nnz_L, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, &pBufferSize_L ); cusparseDcsrsv2_bufferSize( handle, trans_U, N, nnz_U, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, &pBufferSize_U ); TNL_CHECK_CUDA_DEVICE; Loading @@ -223,7 +223,7 @@ update( const MatrixPointer& matrixPointer ) // Symbolic analysis of the incomplete LU decomposition cusparseDcsrilu02_analysis( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int structural_zero; Loading @@ -240,19 +240,19 @@ update( const MatrixPointer& matrixPointer ) // pattern as L (U), so we can do the analysis for csrsv2 on the matrix A. // cusparseDcsrsv2_analysis( handle, trans_L, N, nnz_A, descr_L, // A->getValues().getData(), // A->getRowPointers().getData(), // A->getSegments().getOffsets().getData(), // A->getColumnIndexes().getData(), // info_L, policy_L, pBuffer.getData() ); // cusparseDcsrsv2_analysis( handle, trans_U, N, nnz_A, descr_U, // A->getValues().getData(), // A->getRowPointers().getData(), // A->getSegments().getOffsets().getData(), // A->getColumnIndexes().getData(), // info_U, policy_U, pBuffer.getData() ); // Numerical incomplete LU decomposition cusparseDcsrilu02( handle, N, nnz_A, descr_A, A->getValues().getData(), A->getRowPointers().getData(), A->getSegments().getOffsets().getData(), A->getColumnIndexes().getData(), info_A, policy_A, pBuffer.getData() ); int numerical_zero; Loading @@ -269,12 +269,12 @@ update( const MatrixPointer& matrixPointer ) // Analysis for the triangular solves for L and U cusparseDcsrsv2_analysis( handle, trans_L, N, nnz_L, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, policy_L, pBuffer.getData() ); cusparseDcsrsv2_analysis( handle, trans_U, N, nnz_U, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, policy_U, pBuffer.getData() ); TNL_CHECK_CUDA_DEVICE; Loading Loading @@ -386,7 +386,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const // Step 1: solve y from Ly = b cusparseDcsrsv2_solve( handle, trans_L, N, nnz_L, &alpha, descr_L, L->getValues().getData(), L->getRowPointers().getData(), L->getSegments().getOffsets().getData(), L->getColumnIndexes().getData(), info_L, b.getData(), Loading @@ -396,7 +396,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const // Step 2: solve x from Ux = y cusparseDcsrsv2_solve( handle, trans_U, N, nnz_U, &alpha, descr_U, U->getValues().getData(), U->getRowPointers().getData(), U->getSegments().getOffsets().getData(), U->getColumnIndexes().getData(), info_U, y.getData(), Loading