Loading src/TNL/Containers/DistributedNDArray.h +26 −18 Original line number Diff line number Diff line Loading @@ -12,34 +12,30 @@ #pragma once #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Containers/NDArray.h> #include <TNL/Containers/Subrange.h> #include <TNL/Containers/DistributedNDArrayView.h> namespace TNL { namespace Containers { template< typename NDArray, typename Communicator = Communicators::MpiCommunicator, typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArray::getDimension(), 0 > > class DistributedNDArray { using CommunicationGroup = typename Communicator::CommunicationGroup; public: using ValueType = typename NDArray::ValueType; using DeviceType = typename NDArray::DeviceType; using IndexType = typename NDArray::IndexType; using AllocatorType = typename NDArray::AllocatorType; using SizesHolderType = typename NDArray::SizesHolderType; using PermutationType = typename NDArray::PermutationType; using CommunicatorType = Communicator; using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArray::SizesHolderType >; using LocalRangeType = Subrange< IndexType >; using OverlapsType = Overlaps; using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArray::NDBaseType, typename NDArray::StridesHolderType, Overlaps >; using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Communicator, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Communicator, Overlaps >; using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Overlaps >; using LocalViewType = typename NDArray::ViewType; using ConstLocalViewType = typename NDArray::ConstViewType; Loading @@ -49,10 +45,17 @@ public: DistributedNDArray() = default; // The copy-constructor of TNL::Containers::Array makes shallow copy so our // copy-constructor cannot be default. Actually, we most likely don't need // it anyway, so let's just delete it. DistributedNDArray( const DistributedNDArray& ) = delete; DistributedNDArray( const AllocatorType& allocator ); // Copy constructor (makes a deep copy). explicit DistributedNDArray( const DistributedNDArray& ) = default; // Copy constructor with a specific allocator (makes a deep copy). explicit DistributedNDArray( const DistributedNDArray& other, const AllocatorType& allocator ) : localArray( allocator ) { *this = other; } // Standard copy-semantics with deep copy, just like regular 1D array. // Mismatched sizes cause reallocations. Loading @@ -79,8 +82,13 @@ public: return NDArray::getDimension(); } AllocatorType getAllocator() const { return localArray.getAllocator(); } __cuda_callable__ CommunicationGroup getCommunicationGroup() const MPI_Comm getCommunicationGroup() const { return group; } Loading Loading @@ -232,8 +240,8 @@ public: localEnds == other.localEnds && localArray == other.localArray; bool result = true; if( group != CommunicatorType::NullGroup ) CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group ); if( group != MPI::NullGroup() ) MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group ); return result; } Loading Loading @@ -375,7 +383,7 @@ public: } template< std::size_t level > void setDistribution( IndexType begin, IndexType end, CommunicationGroup group = Communicator::AllGroup ) void setDistribution( IndexType begin, IndexType end, MPI_Comm group = MPI::AllGroup() ) { static_assert( SizesHolderType::template getStaticSize< level >() == 0, "NDArray cannot be distributed in static dimensions." ); TNL_ASSERT_GE( begin, 0, "begin must be non-negative" ); Loading @@ -383,7 +391,7 @@ public: TNL_ASSERT_LT( begin, end, "begin must be lesser than end" ); localBegins.template setSize< level >( begin ); localEnds.template setSize< level >( end ); TNL_ASSERT( this->group == Communicator::NullGroup || this->group == group, TNL_ASSERT( this->group == MPI::NullGroup() || this->group == group, std::cerr << "different groups cannot be combined for different dimensions" ); this->group = group; } Loading @@ -408,7 +416,7 @@ public: void reset() { localArray.reset(); group = CommunicatorType::NullGroup; group = MPI::NullGroup(); globalSizes = SizesHolderType{}; localBegins = LocalBeginsType{}; localEnds = SizesHolderType{}; Loading @@ -435,7 +443,7 @@ public: protected: NDArray localArray; CommunicationGroup group = Communicator::NullGroup; MPI_Comm group = MPI::NullGroup(); SizesHolderType globalSizes; // static sizes should have different type: localBegin is always 0, localEnd is always the full size LocalBeginsType localBegins; Loading src/TNL/Containers/DistributedNDArraySynchronizer.h +31 −31 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <future> #include <TNL/Containers/ndarray/SynchronizerBuffers.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Containers { Loading Loading @@ -69,7 +70,6 @@ public: protected: using DistributedNDArrayView = typename DistributedNDArray::ViewType; using Communicator = typename DistributedNDArray::CommunicatorType; using Buffers = __ndarray_impl::SynchronizerBuffers< DistributedNDArray >; DistributedNDArrayView array_view; Loading @@ -88,12 +88,12 @@ protected: Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true ); // issue all send and receive async operations std::vector< typename Communicator::Request > requests; const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup(); std::vector< MPI_Request > requests; const MPI_Comm group = array_view.getCommunicationGroup(); Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group ); // wait until send is done Communicator::WaitAll( requests.data(), requests.size() ); MPI::Waitall( requests.data(), requests.size() ); // copy data from receive buffers Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false ); Loading Loading @@ -152,9 +152,9 @@ protected: dim_buffers.right_recv_offsets.template setSize< dim >( localEnds.template getSize< dim >() ); // FIXME: set proper neighbor IDs !!! const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup(); const int rank = Communicator::GetRank(group); const int nproc = Communicator::GetSize(group); const MPI_Comm group = array_view.getCommunicationGroup(); const int rank = MPI::GetRank(group); const int nproc = MPI::GetSize(group); dim_buffers.left_neighbor = (rank + nproc - 1) % nproc; dim_buffers.right_neighbor = (rank + 1) % nproc; } Loading Loading @@ -221,30 +221,30 @@ protected: auto& dim_buffers = buffers.template getDimBuffers< dim >(); if( LBM_HACK == false ) { requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData(), requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData(), dim_buffers.left_send_view.getStorageSize(), dim_buffers.left_neighbor, 0, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData(), requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData(), dim_buffers.left_recv_view.getStorageSize(), dim_buffers.left_neighbor, 1, group ) ); requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData(), requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData(), dim_buffers.right_send_view.getStorageSize(), dim_buffers.right_neighbor, 1, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData(), requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData(), dim_buffers.right_recv_view.getStorageSize(), dim_buffers.right_neighbor, 0, group ) ); } else { requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData() + 0, requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData() + 0, dim_buffers.left_send_view.getStorageSize() / 27 * 9, dim_buffers.left_neighbor, 0, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, dim_buffers.left_recv_view.getStorageSize() / 27 * 9, dim_buffers.left_neighbor, 1, group ) ); requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, dim_buffers.right_send_view.getStorageSize() / 27 * 9, dim_buffers.right_neighbor, 1, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData() + 0, requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData() + 0, dim_buffers.right_recv_view.getStorageSize() / 27 * 9, dim_buffers.right_neighbor, 0, group ) ); } Loading src/TNL/Containers/DistributedNDArrayView.h +9 −12 Original line number Diff line number Diff line Loading @@ -12,33 +12,30 @@ #pragma once #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Containers/NDArrayView.h> #include <TNL/Containers/Subrange.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Containers { template< typename NDArrayView, typename Communicator = Communicators::MpiCommunicator, typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArrayView::getDimension(), 0 > > class DistributedNDArrayView { using CommunicationGroup = typename Communicator::CommunicationGroup; public: using ValueType = typename NDArrayView::ValueType; using DeviceType = typename NDArrayView::DeviceType; using IndexType = typename NDArrayView::IndexType; using SizesHolderType = typename NDArrayView::SizesHolderType; using PermutationType = typename NDArrayView::PermutationType; using CommunicatorType = Communicator; using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArrayView::SizesHolderType >; using LocalRangeType = Subrange< IndexType >; using OverlapsType = Overlaps; using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArrayView::NDBaseType, typename NDArrayView::StridesHolderType, Overlaps >; using ViewType = DistributedNDArrayView< NDArrayView, Communicator, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Communicator, Overlaps >; using ViewType = DistributedNDArrayView< NDArrayView, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Overlaps >; using LocalViewType = NDArrayView; using ConstLocalViewType = typename NDArrayView::ConstViewType; Loading @@ -49,7 +46,7 @@ public: // explicit initialization by local array view, global sizes and local begins and ends __cuda_callable__ DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, CommunicationGroup group ) DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, MPI_Comm group ) : localView(localView), group(group), globalSizes(globalSizes), localBegins(localBegins), localEnds(localEnds) {} // Copy-constructor does shallow copy, so views can be passed-by-value into Loading Loading @@ -112,7 +109,7 @@ public: void reset() { localView.reset(); group = CommunicatorType::NullGroup; group = MPI::NullGroup(); globalSizes = SizesHolderType{}; localBegins = LocalBeginsType{}; localEnds = SizesHolderType{}; Loading @@ -124,7 +121,7 @@ public: } __cuda_callable__ CommunicationGroup getCommunicationGroup() const MPI_Comm getCommunicationGroup() const { return group; } Loading Loading @@ -276,8 +273,8 @@ public: localEnds == other.localEnds && localView == other.localView; bool result = true; if( group != CommunicatorType::NullGroup ) CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group ); if( group != MPI::NullGroup() ) MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group ); return result; } Loading Loading @@ -406,7 +403,7 @@ public: protected: NDArrayView localView; CommunicationGroup group = Communicator::NullGroup; MPI_Comm group = MPI::NullGroup(); SizesHolderType globalSizes; // static sizes should have different type: localBegin is always 0, localEnd is always the full size LocalBeginsType localBegins; Loading src/TNL/Containers/NDArray.h +66 −12 Original line number Diff line number Diff line Loading @@ -59,10 +59,8 @@ public: NDArrayStorage() = default; // The copy-constructor of TNL::Containers::Array makes shallow copy so our // copy-constructor cannot be default. Actually, we most likely don't need // it anyway, so let's just delete it. NDArrayStorage( const NDArrayStorage& ) = delete; // Copy constructor (makes a deep copy). explicit NDArrayStorage( const NDArrayStorage& ) = default; // Standard copy-semantics with deep copy, just like regular 1D array. // Mismatched sizes cause reallocations. Loading Loading @@ -326,21 +324,49 @@ template< typename Value, typename SizesHolder, typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >, // identity by default typename Device = Devices::Host, typename Index = typename SizesHolder::IndexType > typename Index = typename SizesHolder::IndexType, typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > > class NDArray : public NDArrayStorage< Array< Value, Device, Index >, : public NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > > { using Base = NDArrayStorage< Array< Value, Device, Index >, using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > >; public: // inherit all assignment operators // inherit all constructors and assignment operators using Base::Base; using Base::operator=; // default constructor NDArray() = default; // implement dynamic array interface using AllocatorType = Allocator; NDArray( const NDArray& allocator ) { // set empty array containing the specified allocator this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator ); } // Copy constructor with a specific allocator (makes a deep copy). explicit NDArray( const NDArray& other, const AllocatorType& allocator ) { // set empty array containing the specified allocator this->array = Array< Value, Device, Index, Allocator >( allocator ); // copy the data *this = other; } AllocatorType getAllocator() const { return this->array.getAllocator(); } }; template< typename Value, Loading Loading @@ -372,21 +398,49 @@ template< typename Value, typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >, // identity by default typename SliceInfo = SliceInfo<>, // no slicing by default typename Device = Devices::Host, typename Index = typename SizesHolder::IndexType > typename Index = typename SizesHolder::IndexType, typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > > class SlicedNDArray : public NDArrayStorage< Array< Value, Device, Index >, : public NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::SlicedNDArrayBase< SliceInfo > > { using Base = NDArrayStorage< Array< Value, Device, Index >, using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::SlicedNDArrayBase< SliceInfo > >; public: // inherit all assignment operators // inherit all constructors and assignment operators using Base::Base; using Base::operator=; // default constructor SlicedNDArray() = default; // implement dynamic array interface using AllocatorType = Allocator; SlicedNDArray( const SlicedNDArray& allocator ) { // set empty array containing the specified allocator this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator ); } // Copy constructor with a specific allocator (makes a deep copy). explicit SlicedNDArray( const SlicedNDArray& other, const AllocatorType& allocator ) { // set empty array containing the specified allocator this->array = Array< Value, Device, Index, Allocator >( allocator ); // copy the data *this = other; } AllocatorType getAllocator() const { return this->array.getAllocator(); } }; } // namespace Containers Loading src/TNL/Containers/Partitioner.h +1 −1 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { namespace Containers { template< typename Index, typename Communicator > template< typename Index, typename Communicator = Communicators::MpiCommunicator > class Partitioner { using CommunicationGroup = typename Communicator::CommunicationGroup; Loading Loading
src/TNL/Containers/DistributedNDArray.h +26 −18 Original line number Diff line number Diff line Loading @@ -12,34 +12,30 @@ #pragma once #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Containers/NDArray.h> #include <TNL/Containers/Subrange.h> #include <TNL/Containers/DistributedNDArrayView.h> namespace TNL { namespace Containers { template< typename NDArray, typename Communicator = Communicators::MpiCommunicator, typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArray::getDimension(), 0 > > class DistributedNDArray { using CommunicationGroup = typename Communicator::CommunicationGroup; public: using ValueType = typename NDArray::ValueType; using DeviceType = typename NDArray::DeviceType; using IndexType = typename NDArray::IndexType; using AllocatorType = typename NDArray::AllocatorType; using SizesHolderType = typename NDArray::SizesHolderType; using PermutationType = typename NDArray::PermutationType; using CommunicatorType = Communicator; using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArray::SizesHolderType >; using LocalRangeType = Subrange< IndexType >; using OverlapsType = Overlaps; using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArray::NDBaseType, typename NDArray::StridesHolderType, Overlaps >; using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Communicator, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Communicator, Overlaps >; using ViewType = DistributedNDArrayView< typename NDArray::ViewType, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArray::ConstViewType, Overlaps >; using LocalViewType = typename NDArray::ViewType; using ConstLocalViewType = typename NDArray::ConstViewType; Loading @@ -49,10 +45,17 @@ public: DistributedNDArray() = default; // The copy-constructor of TNL::Containers::Array makes shallow copy so our // copy-constructor cannot be default. Actually, we most likely don't need // it anyway, so let's just delete it. DistributedNDArray( const DistributedNDArray& ) = delete; DistributedNDArray( const AllocatorType& allocator ); // Copy constructor (makes a deep copy). explicit DistributedNDArray( const DistributedNDArray& ) = default; // Copy constructor with a specific allocator (makes a deep copy). explicit DistributedNDArray( const DistributedNDArray& other, const AllocatorType& allocator ) : localArray( allocator ) { *this = other; } // Standard copy-semantics with deep copy, just like regular 1D array. // Mismatched sizes cause reallocations. Loading @@ -79,8 +82,13 @@ public: return NDArray::getDimension(); } AllocatorType getAllocator() const { return localArray.getAllocator(); } __cuda_callable__ CommunicationGroup getCommunicationGroup() const MPI_Comm getCommunicationGroup() const { return group; } Loading Loading @@ -232,8 +240,8 @@ public: localEnds == other.localEnds && localArray == other.localArray; bool result = true; if( group != CommunicatorType::NullGroup ) CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group ); if( group != MPI::NullGroup() ) MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group ); return result; } Loading Loading @@ -375,7 +383,7 @@ public: } template< std::size_t level > void setDistribution( IndexType begin, IndexType end, CommunicationGroup group = Communicator::AllGroup ) void setDistribution( IndexType begin, IndexType end, MPI_Comm group = MPI::AllGroup() ) { static_assert( SizesHolderType::template getStaticSize< level >() == 0, "NDArray cannot be distributed in static dimensions." ); TNL_ASSERT_GE( begin, 0, "begin must be non-negative" ); Loading @@ -383,7 +391,7 @@ public: TNL_ASSERT_LT( begin, end, "begin must be lesser than end" ); localBegins.template setSize< level >( begin ); localEnds.template setSize< level >( end ); TNL_ASSERT( this->group == Communicator::NullGroup || this->group == group, TNL_ASSERT( this->group == MPI::NullGroup() || this->group == group, std::cerr << "different groups cannot be combined for different dimensions" ); this->group = group; } Loading @@ -408,7 +416,7 @@ public: void reset() { localArray.reset(); group = CommunicatorType::NullGroup; group = MPI::NullGroup(); globalSizes = SizesHolderType{}; localBegins = LocalBeginsType{}; localEnds = SizesHolderType{}; Loading @@ -435,7 +443,7 @@ public: protected: NDArray localArray; CommunicationGroup group = Communicator::NullGroup; MPI_Comm group = MPI::NullGroup(); SizesHolderType globalSizes; // static sizes should have different type: localBegin is always 0, localEnd is always the full size LocalBeginsType localBegins; Loading
src/TNL/Containers/DistributedNDArraySynchronizer.h +31 −31 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ #include <future> #include <TNL/Containers/ndarray/SynchronizerBuffers.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Containers { Loading Loading @@ -69,7 +70,6 @@ public: protected: using DistributedNDArrayView = typename DistributedNDArray::ViewType; using Communicator = typename DistributedNDArray::CommunicatorType; using Buffers = __ndarray_impl::SynchronizerBuffers< DistributedNDArray >; DistributedNDArrayView array_view; Loading @@ -88,12 +88,12 @@ protected: Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true ); // issue all send and receive async operations std::vector< typename Communicator::Request > requests; const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup(); std::vector< MPI_Request > requests; const MPI_Comm group = array_view.getCommunicationGroup(); Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group ); // wait until send is done Communicator::WaitAll( requests.data(), requests.size() ); MPI::Waitall( requests.data(), requests.size() ); // copy data from receive buffers Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false ); Loading Loading @@ -152,9 +152,9 @@ protected: dim_buffers.right_recv_offsets.template setSize< dim >( localEnds.template getSize< dim >() ); // FIXME: set proper neighbor IDs !!! const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup(); const int rank = Communicator::GetRank(group); const int nproc = Communicator::GetSize(group); const MPI_Comm group = array_view.getCommunicationGroup(); const int rank = MPI::GetRank(group); const int nproc = MPI::GetSize(group); dim_buffers.left_neighbor = (rank + nproc - 1) % nproc; dim_buffers.right_neighbor = (rank + 1) % nproc; } Loading Loading @@ -221,30 +221,30 @@ protected: auto& dim_buffers = buffers.template getDimBuffers< dim >(); if( LBM_HACK == false ) { requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData(), requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData(), dim_buffers.left_send_view.getStorageSize(), dim_buffers.left_neighbor, 0, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData(), requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData(), dim_buffers.left_recv_view.getStorageSize(), dim_buffers.left_neighbor, 1, group ) ); requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData(), requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData(), dim_buffers.right_send_view.getStorageSize(), dim_buffers.right_neighbor, 1, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData(), requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData(), dim_buffers.right_recv_view.getStorageSize(), dim_buffers.right_neighbor, 0, group ) ); } else { requests.push_back( Communicator::ISend( dim_buffers.left_send_view.getData() + 0, requests.push_back( MPI::Isend( dim_buffers.left_send_view.getData() + 0, dim_buffers.left_send_view.getStorageSize() / 27 * 9, dim_buffers.left_neighbor, 0, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, requests.push_back( MPI::Irecv( dim_buffers.left_recv_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, dim_buffers.left_recv_view.getStorageSize() / 27 * 9, dim_buffers.left_neighbor, 1, group ) ); requests.push_back( Communicator::ISend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, requests.push_back( MPI::Isend( dim_buffers.right_send_view.getData() + dim_buffers.left_recv_view.getStorageSize() / 27 * 18, dim_buffers.right_send_view.getStorageSize() / 27 * 9, dim_buffers.right_neighbor, 1, group ) ); requests.push_back( Communicator::IRecv( dim_buffers.right_recv_view.getData() + 0, requests.push_back( MPI::Irecv( dim_buffers.right_recv_view.getData() + 0, dim_buffers.right_recv_view.getStorageSize() / 27 * 9, dim_buffers.right_neighbor, 0, group ) ); } Loading
src/TNL/Containers/DistributedNDArrayView.h +9 −12 Original line number Diff line number Diff line Loading @@ -12,33 +12,30 @@ #pragma once #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Containers/NDArrayView.h> #include <TNL/Containers/Subrange.h> #include <TNL/MPI/Wrappers.h> namespace TNL { namespace Containers { template< typename NDArrayView, typename Communicator = Communicators::MpiCommunicator, typename Overlaps = __ndarray_impl::make_constant_index_sequence< NDArrayView::getDimension(), 0 > > class DistributedNDArrayView { using CommunicationGroup = typename Communicator::CommunicationGroup; public: using ValueType = typename NDArrayView::ValueType; using DeviceType = typename NDArrayView::DeviceType; using IndexType = typename NDArrayView::IndexType; using SizesHolderType = typename NDArrayView::SizesHolderType; using PermutationType = typename NDArrayView::PermutationType; using CommunicatorType = Communicator; using LocalBeginsType = __ndarray_impl::LocalBeginsHolder< typename NDArrayView::SizesHolderType >; using LocalRangeType = Subrange< IndexType >; using OverlapsType = Overlaps; using LocalIndexerType = NDArrayIndexer< SizesHolderType, PermutationType, typename NDArrayView::NDBaseType, typename NDArrayView::StridesHolderType, Overlaps >; using ViewType = DistributedNDArrayView< NDArrayView, Communicator, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Communicator, Overlaps >; using ViewType = DistributedNDArrayView< NDArrayView, Overlaps >; using ConstViewType = DistributedNDArrayView< typename NDArrayView::ConstViewType, Overlaps >; using LocalViewType = NDArrayView; using ConstLocalViewType = typename NDArrayView::ConstViewType; Loading @@ -49,7 +46,7 @@ public: // explicit initialization by local array view, global sizes and local begins and ends __cuda_callable__ DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, CommunicationGroup group ) DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, MPI_Comm group ) : localView(localView), group(group), globalSizes(globalSizes), localBegins(localBegins), localEnds(localEnds) {} // Copy-constructor does shallow copy, so views can be passed-by-value into Loading Loading @@ -112,7 +109,7 @@ public: void reset() { localView.reset(); group = CommunicatorType::NullGroup; group = MPI::NullGroup(); globalSizes = SizesHolderType{}; localBegins = LocalBeginsType{}; localEnds = SizesHolderType{}; Loading @@ -124,7 +121,7 @@ public: } __cuda_callable__ CommunicationGroup getCommunicationGroup() const MPI_Comm getCommunicationGroup() const { return group; } Loading Loading @@ -276,8 +273,8 @@ public: localEnds == other.localEnds && localView == other.localView; bool result = true; if( group != CommunicatorType::NullGroup ) CommunicatorType::Allreduce( &localResult, &result, 1, MPI_LAND, group ); if( group != MPI::NullGroup() ) MPI::Allreduce( &localResult, &result, 1, MPI_LAND, group ); return result; } Loading Loading @@ -406,7 +403,7 @@ public: protected: NDArrayView localView; CommunicationGroup group = Communicator::NullGroup; MPI_Comm group = MPI::NullGroup(); SizesHolderType globalSizes; // static sizes should have different type: localBegin is always 0, localEnd is always the full size LocalBeginsType localBegins; Loading
src/TNL/Containers/NDArray.h +66 −12 Original line number Diff line number Diff line Loading @@ -59,10 +59,8 @@ public: NDArrayStorage() = default; // The copy-constructor of TNL::Containers::Array makes shallow copy so our // copy-constructor cannot be default. Actually, we most likely don't need // it anyway, so let's just delete it. NDArrayStorage( const NDArrayStorage& ) = delete; // Copy constructor (makes a deep copy). explicit NDArrayStorage( const NDArrayStorage& ) = default; // Standard copy-semantics with deep copy, just like regular 1D array. // Mismatched sizes cause reallocations. Loading Loading @@ -326,21 +324,49 @@ template< typename Value, typename SizesHolder, typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >, // identity by default typename Device = Devices::Host, typename Index = typename SizesHolder::IndexType > typename Index = typename SizesHolder::IndexType, typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > > class NDArray : public NDArrayStorage< Array< Value, Device, Index >, : public NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > > { using Base = NDArrayStorage< Array< Value, Device, Index >, using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > > >; public: // inherit all assignment operators // inherit all constructors and assignment operators using Base::Base; using Base::operator=; // default constructor NDArray() = default; // implement dynamic array interface using AllocatorType = Allocator; NDArray( const NDArray& allocator ) { // set empty array containing the specified allocator this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator ); } // Copy constructor with a specific allocator (makes a deep copy). explicit NDArray( const NDArray& other, const AllocatorType& allocator ) { // set empty array containing the specified allocator this->array = Array< Value, Device, Index, Allocator >( allocator ); // copy the data *this = other; } AllocatorType getAllocator() const { return this->array.getAllocator(); } }; template< typename Value, Loading Loading @@ -372,21 +398,49 @@ template< typename Value, typename Permutation = std::make_index_sequence< SizesHolder::getDimension() >, // identity by default typename SliceInfo = SliceInfo<>, // no slicing by default typename Device = Devices::Host, typename Index = typename SizesHolder::IndexType > typename Index = typename SizesHolder::IndexType, typename Allocator = typename Allocators::Default< Device >::template Allocator< Value > > class SlicedNDArray : public NDArrayStorage< Array< Value, Device, Index >, : public NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::SlicedNDArrayBase< SliceInfo > > { using Base = NDArrayStorage< Array< Value, Device, Index >, using Base = NDArrayStorage< Array< Value, Device, Index, Allocator >, SizesHolder, Permutation, __ndarray_impl::SlicedNDArrayBase< SliceInfo > >; public: // inherit all assignment operators // inherit all constructors and assignment operators using Base::Base; using Base::operator=; // default constructor SlicedNDArray() = default; // implement dynamic array interface using AllocatorType = Allocator; SlicedNDArray( const SlicedNDArray& allocator ) { // set empty array containing the specified allocator this->getStorageArray() = Array< Value, Device, Index, Allocator >( allocator ); } // Copy constructor with a specific allocator (makes a deep copy). explicit SlicedNDArray( const SlicedNDArray& other, const AllocatorType& allocator ) { // set empty array containing the specified allocator this->array = Array< Value, Device, Index, Allocator >( allocator ); // copy the data *this = other; } AllocatorType getAllocator() const { return this->array.getAllocator(); } }; } // namespace Containers Loading
src/TNL/Containers/Partitioner.h +1 −1 Original line number Diff line number Diff line Loading @@ -22,7 +22,7 @@ namespace TNL { namespace Containers { template< typename Index, typename Communicator > template< typename Index, typename Communicator = Communicators::MpiCommunicator > class Partitioner { using CommunicationGroup = typename Communicator::CommunicationGroup; Loading