Fixed the diagonal preconditioner for ghost ranges

364f03bf · Jakub Klinkovský · 4f5444a5 · 364f03bf
Commit 364f03bf authored 4 years ago by Jakub Klinkovský
--- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h
@@ -49,14 +49,7 @@ void
 Diagonal< Matrix >::
 solve( ConstVectorViewType b, VectorViewType x ) const
 {
-   ConstVectorViewType diag_view( diagonal );
-
-   auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
-   {
-      x[ i ] = b[ i ] / diag_view[ i ];
-   };
-
-   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   x = b / diagonal;
 }


@@ -66,23 +59,32 @@ Diagonal< Matrices::DistributedMatrix< Matrix, Communicator > >::
 update( const MatrixPointer& matrixPointer )
 {
   TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
-   TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
-
   diagonal.setSize( matrixPointer->getLocalMatrix().getRows() );

   LocalViewType diag_view( diagonal );
   // FIXME: SparseMatrix::getConstView is broken
 //   const auto matrix_view = matrixPointer->getLocalMatrix().getConstView();
   const auto matrix_view = matrixPointer->getLocalMatrix().getView();
-   const auto row_range = matrixPointer->getLocalRowRange();

-   auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
-   {
-      const IndexType gi = row_range.getGlobalIndex( i );
-      diag_view[ i ] = matrix_view.getElement( i, gi );
-   };
-
-   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   if( matrixPointer->getRows() == matrixPointer->getColumns() ) {
+      // square matrix, assume global column indices
+      const auto row_range = matrixPointer->getLocalRowRange();
+      auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
+      {
+         const IndexType gi = row_range.getGlobalIndex( i );
+         diag_view[ i ] = matrix_view.getElement( i, gi );
+      };
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   }
+   else {
+      // non-square matrix, assume ghost indexing
+      TNL_ASSERT_LT( matrixPointer->getLocalMatrix().getRows(), matrixPointer->getLocalMatrix().getColumns(), "the local matrix should have more columns than rows" );
+      auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
+      {
+         diag_view[ i ] = matrix_view.getElement( i, i );
+      };
+      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   }
 }

 template< typename Matrix, typename Communicator >
@@ -94,15 +96,14 @@ solve( ConstVectorViewType b, VectorViewType x ) const
   const auto b_view = b.getConstLocalView();
   auto x_view = x.getLocalView();

-   TNL_ASSERT_EQ( b_view.getSize(), diagonal.getSize(), "The size of the vector b does not match the size of the extracted diagonal." );
-   TNL_ASSERT_EQ( x_view.getSize(), diagonal.getSize(), "The size of the vector x does not match the size of the extracted diagonal." );
+   // wait for pending synchronization
+   b.waitForSynchronization();

-   auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
-   {
-      x_view[ i ] = b_view[ i ] / diag_view[ i ];
-   };
+   // compute without ghosts (diagonal includes only local rows)
+   x_view = b_view / diag_view;

-   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
+   // synchronize ghosts
+   x.startSynchronization();
 }

 } // namespace Preconditioners