Commit 364f03bf authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed the diagonal preconditioner for ghost ranges

parent 4f5444a5
Loading
Loading
Loading
Loading
+26 −25
Original line number Diff line number Diff line
@@ -49,14 +49,7 @@ void
Diagonal< Matrix >::
solve( ConstVectorViewType b, VectorViewType x ) const
{
   ConstVectorViewType diag_view( diagonal );

   auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
   {
      x[ i ] = b[ i ] / diag_view[ i ];
   };

   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
   x = b / diagonal;
}


@@ -66,24 +59,33 @@ Diagonal< Matrices::DistributedMatrix< Matrix, Communicator > >::
update( const MatrixPointer& matrixPointer )
{
   TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
   TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );

   diagonal.setSize( matrixPointer->getLocalMatrix().getRows() );

   LocalViewType diag_view( diagonal );
   // FIXME: SparseMatrix::getConstView is broken
//   const auto matrix_view = matrixPointer->getLocalMatrix().getConstView();
   const auto matrix_view = matrixPointer->getLocalMatrix().getView();
   const auto row_range = matrixPointer->getLocalRowRange();

   if( matrixPointer->getRows() == matrixPointer->getColumns() ) {
      // square matrix, assume global column indices
      const auto row_range = matrixPointer->getLocalRowRange();
      auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
      {
         const IndexType gi = row_range.getGlobalIndex( i );
         diag_view[ i ] = matrix_view.getElement( i, gi );
      };

      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
   }
   else {
      // non-square matrix, assume ghost indexing
      TNL_ASSERT_LT( matrixPointer->getLocalMatrix().getRows(), matrixPointer->getLocalMatrix().getColumns(), "the local matrix should have more columns than rows" );
      auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
      {
         diag_view[ i ] = matrix_view.getElement( i, i );
      };
      Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
   }
}

template< typename Matrix, typename Communicator >
void
@@ -94,15 +96,14 @@ solve( ConstVectorViewType b, VectorViewType x ) const
   const auto b_view = b.getConstLocalView();
   auto x_view = x.getLocalView();

   TNL_ASSERT_EQ( b_view.getSize(), diagonal.getSize(), "The size of the vector b does not match the size of the extracted diagonal." );
   TNL_ASSERT_EQ( x_view.getSize(), diagonal.getSize(), "The size of the vector x does not match the size of the extracted diagonal." );
   // wait for pending synchronization
   b.waitForSynchronization();

   auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
   {
      x_view[ i ] = b_view[ i ] / diag_view[ i ];
   };
   // compute without ghosts (diagonal includes only local rows)
   x_view = b_view / diag_view;

   Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
   // synchronize ghosts
   x.startSynchronization();
}

} // namespace Preconditioners