Commit 3c5d17e3 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

MPI refactoring: removed MpiCommunicator from DistributedNDArray, added...

MPI refactoring: removed MpiCommunicator from DistributedNDArray, added Allocator parameter to NDArray
parent 5e7005a6
Loading
Loading
Loading
Loading
+26 −18
Original line number Diff line number Diff line
@@ -12,34 +12,30 @@

#pragma once

#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Containers/NDArray.h>
#include <TNL/Containers/Subrange.h>
#include <TNL/Containers/DistributedNDArrayView.h>

namespace TNL {
namespace Containers {

template< typename NDArray,
          typename Communicator = Communicators::MpiCommunicator,
          typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArray::getDimension(), 0 > >
class DistributedNDArray
{
   using CommunicationGroup = typename Communicator::CommunicationGroup;
public:
   using ValueType = typename NDArray::ValueType;
   using DeviceType = typename NDArray::DeviceType;
   using IndexType = typename NDArray::IndexType;
   using AllocatorType = typename NDArray::AllocatorType;
   using SizesHolderType = typename NDArray::SizesHolderType;
   using PermutationType = typename NDArray::PermutationType;
   using CommunicatorType = Communicator;
   using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArray::SizesHolderType >;
   using LocalRangeType = Subrange< IndexType >;
   using OverlapsType = Overlaps;
   using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArray::NDBaseType, typename NDArray::StridesHolderType, Overlaps >;

   using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Communicator, Overlaps >;
   using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Communicator, Overlaps >;
   using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Overlaps >;
   using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Overlaps >;
   using LocalViewType = typename NDArray::ViewType;
   using ConstLocalViewType = typename NDArray::ConstViewType;

@@ -49,10 +45,17 @@ public:

   DistributedNDArray() = default;

   // The copy-constructor of TNL::Containers::Array makes shallow copy so our
   // copy-constructor cannot be default. Actually, we most likely don't need
   // it anyway, so let's just delete it.
   DistributedNDArray( const DistributedNDArray& ) = delete;
   DistributedNDArray( const AllocatorType& allocator );

   // Copy constructor (makes a deep copy).
   explicit DistributedNDArray( const DistributedNDArray& ) = default;

   // Copy constructor with a specific allocator (makes a deep copy).
   explicit DistributedNDArray( const DistributedNDArray& other, const AllocatorType& allocator )
   : localArray( allocator )
   {
      *this = other;
   }

   // Standard copy-semantics with deep copy, just like regular 1D array.
   // Mismatched sizes cause reallocations.
@@ -79,8 +82,13 @@ public:
      return NDArray::getDimension();
   }

   AllocatorType getAllocator() const
   {
      return localArray.getAllocator();
   }

   __cuda_callable__
   CommunicationGroup getCommunicationGroup() const
   MPI_Comm getCommunicationGroup() const
   {
      return group;
   }
@@ -232,8 +240,8 @@ public:
            localEnds == other.localEnds &&
            localArray == other.localArray;
      bool result = true;
      if( group != CommunicatorType::NullGroup )
         CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group );
      if( group != MPI::NullGroup() )
         MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group );
      return result;
   }

@@ -375,7 +383,7 @@ public:
   }

   template< std::size_t level >
   void setDistribution( IndexType begin, IndexType end, CommunicationGroup group = Communicator::AllGroup )
   void setDistribution( IndexType begin, IndexType end, MPI_Comm group = MPI::AllGroup() )
   {
      static_assert( SizesHolderType::template getStaticSize< level >() == 0, "NDArray cannot be distributed in static dimensions." );
      TNL_ASSERT_GE( begin, 0, "begin must be non-negative" );
@@ -383,7 +391,7 @@ public:
      TNL_ASSERT_LT( begin, end, "begin must be lesser than end" );
      localBegins.template setSize< level >( begin );
      localEnds.template setSize< level >( end );
      TNL_ASSERT( this->group == Communicator::NullGroup || this->group == group,
      TNL_ASSERT( this->group == MPI::NullGroup() || this->group == group,
                  std::cerr << "different groups cannot be combined for different dimensions" );
      this->group = group;
   }
@@ -408,7 +416,7 @@ public:
   void reset()
   {
      localArray.reset();
      group = CommunicatorType::NullGroup;
      group = MPI::NullGroup();
      globalSizes = SizesHolderType{};
      localBegins = LocalBeginsType{};
      localEnds = SizesHolderType{};
@@ -435,7 +443,7 @@ public:

protected:
   NDArray localArray;
   CommunicationGroup group = Communicator::NullGroup;
   MPI_Comm group = MPI::NullGroup();
   SizesHolderType globalSizes;
   // static sizes should have different type: localBegin is always 0, localEnd is always the full size
   LocalBeginsType localBegins;
+31 −31
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <future>

#include <TNL/Containers/ndarray/SynchronizerBuffers.h>
#include <TNL/MPI/Wrappers.h>

namespace TNL {
namespace Containers {
@@ -69,7 +70,6 @@ public:

protected:
   using DistributedNDArrayView = typename DistributedNDArray::ViewType;
   using Communicator = typename DistributedNDArray::CommunicatorType;
   using Buffers = __ndarray_impl::SynchronizerBuffers< DistributedNDArray >;

   DistributedNDArrayView array_view;
@@ -88,12 +88,12 @@ protected:
      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true );

      // issue all send and receive async operations
      std::vector< typename Communicator::Request > requests;
      const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup();
      std::vector< MPI_Request > requests;
      const MPI_Comm group = array_view.getCommunicationGroup();
      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group );

      // wait until send is done
      Communicator::WaitAll( requests.data(), requests.size() );
      MPI::Waitall( requests.data(), requests.size() );

      // copy data from receive buffers
      Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false );
@@ -152,9 +152,9 @@ protected:
         dim_buffers.right_recv_offsets.template setSize< dim >( localEnds.template getSize< dim >() );

         // FIXME: set proper neighbor IDs !!!
         const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup();
         const int rank = Communicator::GetRank(group);
         const int nproc = Communicator::GetSize(group);
         const MPI_Comm group = array_view.getCommunicationGroup();
         const int rank = MPI::GetRank(group);
         const int nproc = MPI::GetSize(group);
         dim_buffers.left_neighbor = (rank + nproc - 1) % nproc;
         dim_buffers.right_neighbor = (rank + 1) % nproc;
      }
@@ -221,30 +221,30 @@ protected:
         auto& dim_buffers = buffers.template getDimBuffers< dim >();

         if( LBM_HACK == false ) {
            requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData(),
            requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData(),
                                            dim_buffers.left_send_view.getStorageSize(),
                                            dim_buffers.left_neighbor, 0, group ) );
            requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData(),
            requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData(),
                                            dim_buffers.left_recv_view.getStorageSize(),
                                            dim_buffers.left_neighbor, 1, group ) );
            requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData(),
            requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData(),
                                            dim_buffers.right_send_view.getStorageSize(),
                                            dim_buffers.right_neighbor, 1, group ) );
            requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData(),
            requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData(),
                                            dim_buffers.right_recv_view.getStorageSize(),
                                            dim_buffers.right_neighbor, 0, group ) );
         }
         else {
            requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData() + 0,
            requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData() + 0,
                                            dim_buffers.left_send_view.getStorageSize() / 27 * 9,
                                            dim_buffers.left_neighbor, 0, group ) );
            requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18,
            requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18,
                                            dim_buffers.left_recv_view.getStorageSize() / 27 * 9,
                                            dim_buffers.left_neighbor, 1, group ) );
            requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18,
            requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18,
                                            dim_buffers.right_send_view.getStorageSize() / 27 * 9,
                                            dim_buffers.right_neighbor, 1, group ) );
            requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData() + 0,
            requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData() + 0,
                                            dim_buffers.right_recv_view.getStorageSize() / 27 * 9,
                                            dim_buffers.right_neighbor, 0, group ) );
         }
+9 −12
Original line number Diff line number Diff line
@@ -12,33 +12,30 @@

#pragma once

#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Containers/NDArrayView.h>
#include <TNL/Containers/Subrange.h>
#include <TNL/MPI/Wrappers.h>

namespace TNL {
namespace Containers {

template< typename NDArrayView,
          typename Communicator = Communicators::MpiCommunicator,
          typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArrayView::getDimension(), 0 > >
class DistributedNDArrayView
{
   using CommunicationGroup = typename Communicator::CommunicationGroup;
public:
   using ValueType = typename NDArrayView::ValueType;
   using DeviceType = typename NDArrayView::DeviceType;
   using IndexType = typename NDArrayView::IndexType;
   using SizesHolderType = typename NDArrayView::SizesHolderType;
   using PermutationType = typename NDArrayView::PermutationType;
   using CommunicatorType = Communicator;
   using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArrayView::SizesHolderType >;
   using LocalRangeType = Subrange< IndexType >;
   using OverlapsType = Overlaps;
   using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArrayView::NDBaseType, typename NDArrayView::StridesHolderType, Overlaps >;

   using ViewType = DistributedNDArrayView< NDArrayView, Communicator, Overlaps >;
   using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Communicator, Overlaps >;
   using ViewType = DistributedNDArrayView< NDArrayView, Overlaps >;
   using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Overlaps >;
   using LocalViewType = NDArrayView;
   using ConstLocalViewType = typename NDArrayView::ConstViewType;

@@ -49,7 +46,7 @@ public:

   // explicit initialization by local array view, global sizes and local begins and ends
   __cuda_callable__
   DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, CommunicationGroup group )
   DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, MPI_Comm group )
   : localView(localView), group(group), globalSizes(globalSizes), localBegins(localBegins), localEnds(localEnds) {}

   // Copy-constructor does shallow copy, so views can be passed-by-value into
@@ -112,7 +109,7 @@ public:
   void reset()
   {
      localView.reset();
      group = CommunicatorType::NullGroup;
      group = MPI::NullGroup();
      globalSizes = SizesHolderType{};
      localBegins = LocalBeginsType{};
      localEnds = SizesHolderType{};
@@ -124,7 +121,7 @@ public:
   }

   __cuda_callable__
   CommunicationGroup getCommunicationGroup() const
   MPI_Comm getCommunicationGroup() const
   {
      return group;
   }
@@ -276,8 +273,8 @@ public:
            localEnds == other.localEnds &&
            localView == other.localView;
      bool result = true;
      if( group != CommunicatorType::NullGroup )
         CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group );
      if( group != MPI::NullGroup() )
         MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group );
      return result;
   }

@@ -406,7 +403,7 @@ public:

protected:
   NDArrayView localView;
   CommunicationGroup group = Communicator::NullGroup;
   MPI_Comm group = MPI::NullGroup();
   SizesHolderType globalSizes;
   // static sizes should have different type: localBegin is always 0, localEnd is always the full size
   LocalBeginsType localBegins;
+66 −12
Original line number Diff line number Diff line
@@ -59,10 +59,8 @@ public:

   NDArrayStorage() = default;

   // The copy-constructor of TNL::Containers::Array makes shallow copy so our
   // copy-constructor cannot be default. Actually, we most likely don't need
   // it anyway, so let's just delete it.
   NDArrayStorage( const NDArrayStorage& ) = delete;
   // Copy constructor (makes a deep copy).
   explicit NDArrayStorage( const NDArrayStorage& ) = default;

   // Standard copy-semantics with deep copy, just like regular 1D array.
   // Mismatched sizes cause reallocations.
@@ -326,21 +324,49 @@ template< typename Value,
          typename SizesHolder,
          typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >,  // identity by default
          typename Device = Devices::Host,
          typename Index = typename SizesHolder::IndexType >
          typename Index = typename SizesHolder::IndexType,
          typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > >
class NDArray
: public NDArrayStorage< Array< Value, Device, Index >,
: public NDArrayStorage< Array< Value, Device, Index, Allocator >,
                         SizesHolder,
                         Permutation,
                         __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > >
{
   using Base = NDArrayStorage< Array< Value, Device, Index >,
   using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >,
                         SizesHolder,
                         Permutation,
                         __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > >;

public:
   // inherit all assignment operators
   // inherit all constructors and assignment operators
   using Base::Base;
   using Base::operator=;

   // default constructor
   NDArray() = default;

   // implement dynamic array interface
   using AllocatorType = Allocator;

   NDArray( const NDArray& allocator )
   {
      // set empty array containing the specified allocator
      this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator );
   }

   // Copy constructor with a specific allocator (makes a deep copy).
   explicit NDArray( const NDArray& other, const AllocatorType& allocator )
   {
      // set empty array containing the specified allocator
      this->array = Array< Value, Device, Index, Allocator >( allocator );
      // copy the data
      *this = other;
   }

   AllocatorType getAllocator() const
   {
      return this->array.getAllocator();
   }
};

template< typename Value,
@@ -372,21 +398,49 @@ template< typename Value,
          typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >,  // identity by default
          typename SliceInfo = SliceInfo<>,  // no slicing by default
          typename Device = Devices::Host,
          typename Index = typename SizesHolder::IndexType >
          typename Index = typename SizesHolder::IndexType,
          typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > >
class SlicedNDArray
: public NDArrayStorage< Array< Value, Device, Index >,
: public NDArrayStorage< Array< Value, Device, Index, Allocator >,
                         SizesHolder,
                         Permutation,
                         __ndarray_impl::SlicedNDArrayBase< SliceInfo > >
{
   using Base = NDArrayStorage< Array< Value, Device, Index >,
   using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >,
                         SizesHolder,
                         Permutation,
                         __ndarray_impl::SlicedNDArrayBase< SliceInfo > >;

public:
   // inherit all assignment operators
   // inherit all constructors and assignment operators
   using Base::Base;
   using Base::operator=;

   // default constructor
   SlicedNDArray() = default;

   // implement dynamic array interface
   using AllocatorType = Allocator;

   SlicedNDArray( const SlicedNDArray& allocator )
   {
      // set empty array containing the specified allocator
      this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator );
   }

   // Copy constructor with a specific allocator (makes a deep copy).
   explicit SlicedNDArray( const SlicedNDArray& other, const AllocatorType& allocator )
   {
      // set empty array containing the specified allocator
      this->array = Array< Value, Device, Index, Allocator >( allocator );
      // copy the data
      *this = other;
   }

   AllocatorType getAllocator() const
   {
      return this->array.getAllocator();
   }
};

} // namespace Containers
+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@
namespace TNL {
namespace Containers {

template< typename Index, typename Communicator >
template< typename Index, typename Communicator = Communicators::MpiCommunicator >
class Partitioner
{
   using CommunicationGroup = typename Communicator::CommunicationGroup;
Loading