Skip to content
Snippets Groups Projects
Commit 364f03bf authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed the diagonal preconditioner for ghost ranges

parent 4f5444a5
No related branches found
No related tags found
1 merge request!82MPI refactoring
......@@ -49,14 +49,7 @@ void
Diagonal< Matrix >::
solve( ConstVectorViewType b, VectorViewType x ) const
{
ConstVectorViewType diag_view( diagonal );
auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
{
x[ i ] = b[ i ] / diag_view[ i ];
};
Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
x = b / diagonal;
}
......@@ -66,23 +59,32 @@ Diagonal< Matrices::DistributedMatrix< Matrix, Communicator > >::
update( const MatrixPointer& matrixPointer )
{
TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
diagonal.setSize( matrixPointer->getLocalMatrix().getRows() );
LocalViewType diag_view( diagonal );
// FIXME: SparseMatrix::getConstView is broken
// const auto matrix_view = matrixPointer->getLocalMatrix().getConstView();
const auto matrix_view = matrixPointer->getLocalMatrix().getView();
const auto row_range = matrixPointer->getLocalRowRange();
auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
{
const IndexType gi = row_range.getGlobalIndex( i );
diag_view[ i ] = matrix_view.getElement( i, gi );
};
Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
if( matrixPointer->getRows() == matrixPointer->getColumns() ) {
// square matrix, assume global column indices
const auto row_range = matrixPointer->getLocalRowRange();
auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
{
const IndexType gi = row_range.getGlobalIndex( i );
diag_view[ i ] = matrix_view.getElement( i, gi );
};
Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
}
else {
// non-square matrix, assume ghost indexing
TNL_ASSERT_LT( matrixPointer->getLocalMatrix().getRows(), matrixPointer->getLocalMatrix().getColumns(), "the local matrix should have more columns than rows" );
auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
{
diag_view[ i ] = matrix_view.getElement( i, i );
};
Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
}
}
template< typename Matrix, typename Communicator >
......@@ -94,15 +96,14 @@ solve( ConstVectorViewType b, VectorViewType x ) const
const auto b_view = b.getConstLocalView();
auto x_view = x.getLocalView();
TNL_ASSERT_EQ( b_view.getSize(), diagonal.getSize(), "The size of the vector b does not match the size of the extracted diagonal." );
TNL_ASSERT_EQ( x_view.getSize(), diagonal.getSize(), "The size of the vector x does not match the size of the extracted diagonal." );
// wait for pending synchronization
b.waitForSynchronization();
auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
{
x_view[ i ] = b_view[ i ] / diag_view[ i ];
};
// compute without ghosts (diagonal includes only local rows)
x_view = b_view / diag_view;
Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
// synchronize ghosts
x.startSynchronization();
}
} // namespace Preconditioners
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment