Fixed the diagonal preconditioner for ghost ranges (364f03bf) · Commits · TNL / tnl-dev

src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h

+26 −25

Original line number	Diff line number	Diff line
		@@ -49,14 +49,7 @@ void
		Diagonal< Matrix >::
		solve( ConstVectorViewType b, VectorViewType x ) const
		{
		ConstVectorViewType diag_view( diagonal );

		auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
		{
		x[ i ] = b[ i ] / diag_view[ i ];
		};

		Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
		x = b / diagonal;
		}


		@@ -66,24 +59,33 @@ Diagonal< Matrices::DistributedMatrix< Matrix, Communicator > >::
		update( const MatrixPointer& matrixPointer )
		{
		TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
		TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );

		diagonal.setSize( matrixPointer->getLocalMatrix().getRows() );

		LocalViewType diag_view( diagonal );
		// FIXME: SparseMatrix::getConstView is broken
		// const auto matrix_view = matrixPointer->getLocalMatrix().getConstView();
		const auto matrix_view = matrixPointer->getLocalMatrix().getView();
		const auto row_range = matrixPointer->getLocalRowRange();

		if( matrixPointer->getRows() == matrixPointer->getColumns() ) {
		// square matrix, assume global column indices
		const auto row_range = matrixPointer->getLocalRowRange();
		auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
		{
		const IndexType gi = row_range.getGlobalIndex( i );
		diag_view[ i ] = matrix_view.getElement( i, gi );
		};

		Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
		}
		else {
		// non-square matrix, assume ghost indexing
		TNL_ASSERT_LT( matrixPointer->getLocalMatrix().getRows(), matrixPointer->getLocalMatrix().getColumns(), "the local matrix should have more columns than rows" );
		auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
		{
		diag_view[ i ] = matrix_view.getElement( i, i );
		};
		Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
		}
		}

		template< typename Matrix, typename Communicator >
		void
		@@ -94,15 +96,14 @@ solve( ConstVectorViewType b, VectorViewType x ) const
		const auto b_view = b.getConstLocalView();
		auto x_view = x.getLocalView();

		TNL_ASSERT_EQ( b_view.getSize(), diagonal.getSize(), "The size of the vector b does not match the size of the extracted diagonal." );
		TNL_ASSERT_EQ( x_view.getSize(), diagonal.getSize(), "The size of the vector x does not match the size of the extracted diagonal." );
		// wait for pending synchronization
		b.waitForSynchronization();

		auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
		{
		x_view[ i ] = b_view[ i ] / diag_view[ i ];
		};
		// compute without ghosts (diagonal includes only local rows)
		x_view = b_view / diag_view;

		Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
		// synchronize ghosts
		x.startSynchronization();
		}

		} // namespace Preconditioners