Commit 6019eedf authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Removed __cuda_callable__ from methods in DistributedNDArray and DistributedNDArrayView

CUDA kernels should not ever work with distributed data structures, they
should always get the underlying *local* data structure.
parent 7c9f7acf
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -83,14 +83,12 @@ public:
      return localArray.getAllocator();
   }

   __cuda_callable__
   MPI_Comm getCommunicationGroup() const
   {
      return group;
   }

   // Returns the *global* sizes
   __cuda_callable__
   const SizesHolderType& getSizes() const
   {
      return globalSizes;
@@ -98,33 +96,28 @@ public:

   // Returns the *global* size
   template< std::size_t level >
   __cuda_callable__
   IndexType getSize() const
   {
      return globalSizes.template getSize< level >();
   }

   __cuda_callable__
   LocalBeginsType getLocalBegins() const
   {
      return localBegins;
   }

   __cuda_callable__
   SizesHolderType getLocalEnds() const
   {
      return localEnds;
   }

   template< std::size_t level >
   __cuda_callable__
   LocalRangeType getLocalRange() const
   {
      return LocalRangeType( localBegins.template getSize< level >(), localEnds.template getSize< level >() );
   }

   // returns the local storage size
   __cuda_callable__
   IndexType getLocalStorageSize() const
   {
      return localArray.getStorageSize();
@@ -142,7 +135,6 @@ public:

   // returns the *local* storage index for given *global* indices
   template< typename... IndexTypes >
   __cuda_callable__
   IndexType
   getStorageIndex( IndexTypes&&... indices ) const
   {
@@ -155,13 +147,11 @@ public:
      return __ndarray_impl::call_with_unshifted_indices< LocalBeginsType >( localBegins, getStorageIndex, std::forward< IndexTypes >( indices )... );
   }

   __cuda_callable__
   ValueType* getData()
   {
      return localArray.getData();
   }

   __cuda_callable__
   std::add_const_t< ValueType >* getData() const
   {
      return localArray.getData();
@@ -207,13 +197,11 @@ public:
      return localArray[ index - localBegins.template getSize< 0 >() ];
   }

   __cuda_callable__
   ViewType getView()
   {
      return ViewType( localArray.getView(), globalSizes, localBegins, localEnds, group );
   }

   __cuda_callable__
   ConstViewType getConstView() const
   {
      return ConstViewType( localArray.getConstView(), globalSizes, localBegins, localEnds, group );
+1 −24
Original line number Diff line number Diff line
@@ -37,27 +37,20 @@ public:
   using LocalViewType = NDArrayView;
   using ConstLocalViewType = typename NDArrayView::ConstViewType;

   __cuda_callable__
   DistributedNDArrayView() = default;

   // explicit initialization by local array view, global sizes and local begins and ends
   __cuda_callable__
   DistributedNDArrayView( NDArrayView localView, SizesHolderType globalSizes, LocalBeginsType localBegins, SizesHolderType localEnds, MPI_Comm group )
   : localView(localView), group(group), globalSizes(globalSizes), localBegins(localBegins), localEnds(localEnds) {}

   // Copy-constructor does shallow copy, so views can be passed-by-value into
   // CUDA kernels and they can be captured-by-value in __cuda_callable__
   // lambda functions.
   __cuda_callable__
   // copy-constructor does shallow copy
   DistributedNDArrayView( const DistributedNDArrayView& ) = default;

   // default move-constructor
   __cuda_callable__
   DistributedNDArrayView( DistributedNDArrayView&& ) = default;

   // Copy-assignment does deep copy, just like regular array, but the sizes
   // must match (i.e. copy-assignment cannot resize).
   __cuda_callable__
   DistributedNDArrayView& operator=( const DistributedNDArrayView& other ) = default;

   // There is no move-assignment operator, so expressions like `a = b.getView()`
@@ -76,7 +69,6 @@ public:
   }

   // methods for rebinding (reinitialization)
   __cuda_callable__
   void bind( DistributedNDArrayView view )
   {
      localView.bind( view.localView );
@@ -87,20 +79,17 @@ public:
   }

   // binds to the given raw pointer and changes the indexer
   __cuda_callable__
   void bind( ValueType* data, typename LocalViewType::IndexerType indexer )
   {
      localView.bind( data, indexer );
   }

   // binds to the given raw pointer and preserves the current indexer
   __cuda_callable__
   void bind( ValueType* data )
   {
      localView.bind( data );
   }

   __cuda_callable__
   void reset()
   {
      localView.reset();
@@ -115,14 +104,12 @@ public:
      return NDArrayView::getDimension();
   }

   __cuda_callable__
   MPI_Comm getCommunicationGroup() const
   {
      return group;
   }

   // Returns the *global* sizes
   __cuda_callable__
   const SizesHolderType& getSizes() const
   {
      return globalSizes;
@@ -130,33 +117,28 @@ public:

   // Returns the *global* size
   template< std::size_t level >
   __cuda_callable__
   IndexType getSize() const
   {
      return globalSizes.template getSize< level >();
   }

   __cuda_callable__
   LocalBeginsType getLocalBegins() const
   {
      return localBegins;
   }

   __cuda_callable__
   SizesHolderType getLocalEnds() const
   {
      return localEnds;
   }

   template< std::size_t level >
   __cuda_callable__
   LocalRangeType getLocalRange() const
   {
      return LocalRangeType( localBegins.template getSize< level >(), localEnds.template getSize< level >() );
   }

   // returns the local storage size
   __cuda_callable__
   IndexType getLocalStorageSize() const
   {
      return localView.getStorageSize();
@@ -174,7 +156,6 @@ public:

   // returns the *local* storage index for given *global* indices
   template< typename... IndexTypes >
   __cuda_callable__
   IndexType
   getStorageIndex( IndexTypes&&... indices ) const
   {
@@ -187,13 +168,11 @@ public:
      return __ndarray_impl::call_with_unshifted_indices< LocalBeginsType >( localBegins, getStorageIndex, std::forward< IndexTypes >( indices )... );
   }

   __cuda_callable__
   ValueType* getData()
   {
      return localView.getData();
   }

   __cuda_callable__
   std::add_const_t< ValueType >* getData() const
   {
      return localView.getData();
@@ -239,13 +218,11 @@ public:
      return localView[ index - localBegins.template getSize< 0 >() ];
   }

   __cuda_callable__
   ViewType getView()
   {
      return ViewType( *this );
   }

   __cuda_callable__
   ConstViewType getConstView() const
   {
      return ConstViewType( localView, globalSizes, localBegins, localEnds, group );