diff --git a/src/TNL/Containers/NDArray.h b/src/TNL/Containers/NDArray.h
index d9e4cb09b0882e44d5475b01331d191048eb53d3..ed5111e6dcc71176e7ff562689bdd1dce538c585 100644
--- a/src/TNL/Containers/NDArray.h
+++ b/src/TNL/Containers/NDArray.h
@@ -176,8 +176,8 @@ public:
    template< typename Device2 = DeviceType, typename Func >
    void forAll( Func f ) const
    {
-      __ndarray_impl::ExecutorDispatcher< ConstViewType, Device2 > dispatch;
-      dispatch( getConstView(), f );
+      __ndarray_impl::ExecutorDispatcher< PermutationType, Device2 > dispatch;
+      dispatch( sizes, f );
    }
 
 
diff --git a/src/TNL/Containers/NDArrayView.h b/src/TNL/Containers/NDArrayView.h
index 73af6713f83de5e5dc3880fc40ff93bc043d454c..5b5e056ff3c1b1d0e2be0d5b679bb5ef4ec2f1dc 100644
--- a/src/TNL/Containers/NDArrayView.h
+++ b/src/TNL/Containers/NDArrayView.h
@@ -230,8 +230,8 @@ public:
    template< typename Device2 = DeviceType, typename Func >
    void forAll( Func f ) const
    {
-      __ndarray_impl::ExecutorDispatcher< NDArrayView, Device2 > dispatch;
-      dispatch( *this, f );
+      __ndarray_impl::ExecutorDispatcher< PermutationType, Device2 > dispatch;
+      dispatch( sizes, f );
    }
 
 protected:
diff --git a/src/TNL/Containers/ndarray/Operations.h b/src/TNL/Containers/ndarray/Operations.h
index 04c66a7f4e8a8e054469f099121921f703499a0d..705687c1d6a54bf8f2a2186eb2f0f35eae3f5b53 100644
--- a/src/TNL/Containers/ndarray/Operations.h
+++ b/src/TNL/Containers/ndarray/Operations.h
@@ -21,221 +21,227 @@ namespace Containers {
 
 namespace __ndarray_impl {
 
-template< typename Array,
+template< typename Permutation,
           typename LevelTag = IndexTag< 0 > >
 struct SequentialExecutor
 {
-   template< typename Func,
+   template< typename SizesHolder,
+             typename Func,
              typename... Indices >
    __cuda_callable__
-   void operator()( const Array& array, Func f, Indices&&... indices )
+   void operator()( const SizesHolder& sizes, Func f, Indices&&... indices )
    {
-      SequentialExecutor< Array, IndexTag< LevelTag::value + 1 > > exec;
-      const auto size = array.template getSize< get< LevelTag::value >( typename Array::PermutationType{} ) >();
-      for( typename Array::IndexType i = 0; i < size; i++ )
-         exec( array, f, std::forward< Indices >( indices )..., i );
+      SequentialExecutor< Permutation, IndexTag< LevelTag::value + 1 > > exec;
+      const auto size = sizes.template getSize< get< LevelTag::value >( Permutation{} ) >();
+      for( typename SizesHolder::IndexType i = 0; i < size; i++ )
+         exec( sizes, f, std::forward< Indices >( indices )..., i );
    }
 };
 
-template< typename Array >
-struct SequentialExecutor< Array, IndexTag< Array::getDimension() - 1 > >
+template< typename Permutation >
+struct SequentialExecutor< Permutation, IndexTag< Permutation::size() - 1 > >
 {
-   template< typename Func,
+   template< typename SizesHolder,
+             typename Func,
              typename... Indices >
    __cuda_callable__
-   void operator()( const Array& array, Func f, Indices&&... indices )
+   void operator()( const SizesHolder& sizes, Func f, Indices&&... indices )
    {
-      static_assert( sizeof...(indices) == Array::getDimension() - 1,
+      static_assert( sizeof...(indices) == SizesHolder::getDimension() - 1,
                      "invalid number of indices in the final step of the SequentialExecutor" );
 
-      const auto size = array.template getSize< get< Array::getDimension() - 1 >( typename Array::PermutationType{} ) >();
-      for( typename Array::IndexType i = 0; i < size; i++ )
-         call_with_permuted_arguments< typename Array::PermutationType >( f, std::forward< Indices >( indices )..., i );
+      const auto size = sizes.template getSize< get< SizesHolder::getDimension() - 1 >( Permutation{} ) >();
+      for( typename SizesHolder::IndexType i = 0; i < size; i++ )
+         call_with_permuted_arguments< Permutation >( f, std::forward< Indices >( indices )..., i );
    }
 };
 
 
-template< typename Array,
-          typename LevelTag = IndexTag< Array::getDimension() - 1 > >
+template< typename Permutation,
+          typename LevelTag = IndexTag< Permutation::size() - 1 > >
 struct SequentialExecutorRTL
 {
-   template< typename Func,
+   template< typename SizesHolder,
+             typename Func,
              typename... Indices >
    __cuda_callable__
-   void operator()( const Array& array, Func f, Indices&&... indices )
+   void operator()( const SizesHolder& sizes, Func f, Indices&&... indices )
    {
-      SequentialExecutorRTL< Array, IndexTag< LevelTag::value - 1 > > exec;
-      const auto size = array.template getSize< get< LevelTag::value >( typename Array::PermutationType{} ) >();
-      for( typename Array::IndexType i = 0; i < size; i++ )
-         exec( array, f, i, std::forward< Indices >( indices )... );
+      SequentialExecutorRTL< Permutation, IndexTag< LevelTag::value - 1 > > exec;
+      const auto size = sizes.template getSize< get< LevelTag::value >( Permutation{} ) >();
+      for( typename SizesHolder::IndexType i = 0; i < size; i++ )
+         exec( sizes, f, i, std::forward< Indices >( indices )... );
    }
 };
 
-template< typename Array >
-struct SequentialExecutorRTL< Array, IndexTag< 0 > >
+template< typename Permutation >
+struct SequentialExecutorRTL< Permutation, IndexTag< 0 > >
 {
-   template< typename Func,
+   template< typename SizesHolder,
+             typename Func,
              typename... Indices >
    __cuda_callable__
-   void operator()( const Array& array, Func f, Indices&&... indices )
+   void operator()( const SizesHolder& sizes, Func f, Indices&&... indices )
    {
-      static_assert( sizeof...(indices) == Array::getDimension() - 1,
+      static_assert( sizeof...(indices) == SizesHolder::getDimension() - 1,
                      "invalid number of indices in the final step of the SequentialExecutor" );
 
-      const auto size = array.template getSize< get< 0 >( typename Array::PermutationType{} ) >();
-      for( typename Array::IndexType i = 0; i < size; i++ )
-         call_with_permuted_arguments< typename Array::PermutationType >( f, i, std::forward< Indices >( indices )... );
+      const auto size = sizes.template getSize< get< 0 >( Permutation{} ) >();
+      for( typename SizesHolder::IndexType i = 0; i < size; i++ )
+         call_with_permuted_arguments< Permutation >( f, i, std::forward< Indices >( indices )... );
    }
 };
 
 
-template< typename Array,
-          typename Device = typename Array::DeviceType >
+template< typename Permutation,
+          typename Device >
 struct ParallelExecutorDeviceDispatch
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      using Index = typename Array::IndexType;
+      using Index = typename SizesHolder::IndexType;
 
       auto kernel = [=] ( Index i2, Index i1, Index i0 )
       {
-         SequentialExecutor< Array, IndexTag< 3 > > exec;
-         exec( array, f, i0, i1, i2 );
+         SequentialExecutor< Permutation, IndexTag< 3 > > exec;
+         exec( sizes, f, i0, i1, i2 );
       };
 
-      const Index size0 = array.template getSize< get< 0 >( typename Array::PermutationType{} ) >();
-      const Index size1 = array.template getSize< get< 1 >( typename Array::PermutationType{} ) >();
-      const Index size2 = array.template getSize< get< 2 >( typename Array::PermutationType{} ) >();
+      const Index size0 = sizes.template getSize< get< 0 >( Permutation{} ) >();
+      const Index size1 = sizes.template getSize< get< 1 >( Permutation{} ) >();
+      const Index size2 = sizes.template getSize< get< 2 >( Permutation{} ) >();
       ParallelFor3D< Device >::exec( (Index) 0, (Index) 0, (Index) 0, size2, size1, size0, kernel );
    }
 };
 
-template< typename Array >
-struct ParallelExecutorDeviceDispatch< Array, Devices::Cuda >
+template< typename Permutation >
+struct ParallelExecutorDeviceDispatch< Permutation, Devices::Cuda >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      using Index = typename Array::IndexType;
+      using Index = typename SizesHolder::IndexType;
 
       auto kernel = [=] __cuda_callable__ ( Index i2, Index i1, Index i0 )
       {
-         SequentialExecutorRTL< Array, IndexTag< Array::getDimension() - 4 > > exec;
-         exec( array, f, i0, i1, i2 );
+         SequentialExecutorRTL< Permutation, IndexTag< SizesHolder::getDimension() - 4 > > exec;
+         exec( sizes, f, i0, i1, i2 );
       };
 
-      const Index size0 = array.template getSize< get< Array::getDimension() - 3 >( typename Array::PermutationType{} ) >();
-      const Index size1 = array.template getSize< get< Array::getDimension() - 2 >( typename Array::PermutationType{} ) >();
-      const Index size2 = array.template getSize< get< Array::getDimension() - 1 >( typename Array::PermutationType{} ) >();
+      const Index size0 = sizes.template getSize< get< SizesHolder::getDimension() - 3 >( Permutation{} ) >();
+      const Index size1 = sizes.template getSize< get< SizesHolder::getDimension() - 2 >( Permutation{} ) >();
+      const Index size2 = sizes.template getSize< get< SizesHolder::getDimension() - 1 >( Permutation{} ) >();
       ParallelFor3D< Devices::Cuda >::exec( (Index) 0, (Index) 0, (Index) 0, size2, size1, size0, kernel );
    }
 };
 
-template< typename Array,
-          typename DimTag = IndexTag< Array::getDimension() > >
+template< typename Permutation,
+          typename Device,
+          typename DimTag = IndexTag< Permutation::size() > >
 struct ParallelExecutor
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      ParallelExecutorDeviceDispatch< Array > dispatch;
-      dispatch( array, f );
+      ParallelExecutorDeviceDispatch< Permutation, Device > dispatch;
+      dispatch( sizes, f );
    }
 };
 
-template< typename Array >
-struct ParallelExecutor< Array, IndexTag< 3 > >
+template< typename Permutation,
+          typename Device >
+struct ParallelExecutor< Permutation, Device, IndexTag< 3 > >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      using Device = typename Array::DeviceType;
-      using Index = typename Array::IndexType;
+      using Index = typename SizesHolder::IndexType;
 
       auto kernel = [=] __cuda_callable__ ( Index i2, Index i1, Index i0 )
       {
-         call_with_permuted_arguments< typename Array::PermutationType >( f, i0, i1, i2 );
+         call_with_permuted_arguments< Permutation >( f, i0, i1, i2 );
       };
 
-      const Index size0 = array.template getSize< get< 0 >( typename Array::PermutationType{} ) >();
-      const Index size1 = array.template getSize< get< 1 >( typename Array::PermutationType{} ) >();
-      const Index size2 = array.template getSize< get< 2 >( typename Array::PermutationType{} ) >();
+      const Index size0 = sizes.template getSize< get< 0 >( Permutation{} ) >();
+      const Index size1 = sizes.template getSize< get< 1 >( Permutation{} ) >();
+      const Index size2 = sizes.template getSize< get< 2 >( Permutation{} ) >();
       ParallelFor3D< Device >::exec( (Index) 0, (Index) 0, (Index) 0, size2, size1, size0, kernel );
    }
 };
 
-template< typename Array >
-struct ParallelExecutor< Array, IndexTag< 2 > >
+template< typename Permutation,
+          typename Device >
+struct ParallelExecutor< Permutation, Device, IndexTag< 2 > >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      using Device = typename Array::DeviceType;
-      using Index = typename Array::IndexType;
+      using Index = typename SizesHolder::IndexType;
 
       auto kernel = [=] __cuda_callable__ ( Index i1, Index i0 )
       {
-         call_with_permuted_arguments< typename Array::PermutationType >( f, i0, i1 );
+         call_with_permuted_arguments< Permutation >( f, i0, i1 );
       };
 
-      const Index size0 = array.template getSize< get< 0 >( typename Array::PermutationType{} ) >();
-      const Index size1 = array.template getSize< get< 1 >( typename Array::PermutationType{} ) >();
+      const Index size0 = sizes.template getSize< get< 0 >( Permutation{} ) >();
+      const Index size1 = sizes.template getSize< get< 1 >( Permutation{} ) >();
       ParallelFor2D< Device >::exec( (Index) 0, (Index) 0, size1, size0, kernel );
    }
 };
 
-template< typename Array >
-struct ParallelExecutor< Array, IndexTag< 1 > >
+template< typename Permutation,
+          typename Device >
+struct ParallelExecutor< Permutation, Device, IndexTag< 1 > >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      using Device = typename Array::DeviceType;
-      using Index = typename Array::IndexType;
+      using Index = typename SizesHolder::IndexType;
 
       auto kernel = [=] __cuda_callable__ ( Index i )
       {
-         call_with_permuted_arguments< typename Array::PermutationType >( f, i );
+         call_with_permuted_arguments< Permutation >( f, i );
       };
 
-      const Index size = array.template getSize< get< 0 >( typename Array::PermutationType{} ) >();
+      const Index size = sizes.template getSize< get< 0 >( Permutation{} ) >();
       ParallelFor< Device >::exec( (Index) 0, size, kernel );
    }
 };
 
 
 // Device may be void which stands for StaticNDArray
-template< typename Array, typename Device = typename Array::DeviceType >
+template< typename Permutation,
+          typename Device >
 struct ExecutorDispatcher
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      SequentialExecutor< Array >()( array, f );
+      SequentialExecutor< Permutation >()( sizes, f );
    }
 };
 
-template< typename Array >
-struct ExecutorDispatcher< Array, Devices::Host >
+template< typename Permutation >
+struct ExecutorDispatcher< Permutation, Devices::Host >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
       if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 )
-         ParallelExecutor< Array >()( array, f );
+         ParallelExecutor< Permutation, Devices::Host >()( sizes, f );
       else
-         SequentialExecutor< Array >()( array, f );
+         SequentialExecutor< Permutation >()( sizes, f );
    }
 };
 
-template< typename Array >
-struct ExecutorDispatcher< Array, Devices::Cuda >
+template< typename Permutation >
+struct ExecutorDispatcher< Permutation, Devices::Cuda >
 {
-   template< typename Func >
-   void operator()( const Array& array, Func f )
+   template< typename SizesHolder, typename Func >
+   void operator()( const SizesHolder& sizes, Func f )
    {
-      ParallelExecutor< Array >()( array, f );
+      ParallelExecutor< Permutation, Devices::Cuda >()( sizes, f );
    }
 };
 
@@ -256,9 +262,8 @@ void nd_map_view( Output output, Func f, const Input... input )
       output( indices... ) = f( input( indices... )... );
    };
 
-   // From here on, the output array is used only for getting the sizes,
-   // the writing of the result is done inside the wrapper.
-   ExecutorDispatcher< Output >()( output, wrapper );
+   ExecutorDispatcher< typename Output::PermutationType, typename Output::DeviceType > dispatch;
+   dispatch( output.getSizes(), wrapper );
 }
 
 #else
@@ -356,10 +361,8 @@ template< typename Output,
 void nd_map_view( Output output, Func f )
 {
    nvcc_map_helper_0< Output, Func > wrapper( output, f );
-
-   // From here on, the output array is used only for getting the sizes,
-   // the writing of the result is done inside the wrapper.
-   ExecutorDispatcher< Output >()( output, wrapper );
+   ExecutorDispatcher< typename Output::PermutationType, typename Output::DeviceType > dispatch;
+   dispatch( output.getSizes(), wrapper );
 }
 
 template< typename Output,
@@ -371,10 +374,8 @@ void nd_map_view( Output output, Func f, const Input1 input1 )
                   "all arrays must be of the same dimension" );
 
    nvcc_map_helper_1< Output, Func, Input1 > wrapper( output, f, input1 );
-
-   // From here on, the output array is used only for getting the sizes,
-   // the writing of the result is done inside the wrapper.
-   ExecutorDispatcher< Output >()( output, wrapper );
+   ExecutorDispatcher< typename Output::PermutationType, typename Output::DeviceType > dispatch;
+   dispatch( output.getSizes(), wrapper );
 }
 
 template< typename Output,
@@ -387,10 +388,8 @@ void nd_map_view( Output output, Func f, const Input1 input1, const Input2 input
                   "all arrays must be of the same dimension" );
 
    nvcc_map_helper_2< Output, Func, Input1, Input2 > wrapper( output, f, input1, input2 );
-
-   // From here on, the output array is used only for getting the sizes,
-   // the writing of the result is done inside the wrapper.
-   ExecutorDispatcher< Output >()( output, wrapper );
+   ExecutorDispatcher< typename Output::PermutationType, typename Output::DeviceType > dispatch;
+   dispatch( output.getSizes(), wrapper );
 }
 
 template< typename Output,
@@ -404,10 +403,8 @@ void nd_map_view( Output output, Func f, const Input1 input1, const Input2 input
                   "all arrays must be of the same dimension" );
 
    nvcc_map_helper_3< Output, Func, Input1, Input2, Input3 > wrapper( output, f, input1, input2, input3 );
-
-   // From here on, the output array is used only for getting the sizes,
-   // the writing of the result is done inside the wrapper.
-   ExecutorDispatcher< Output >()( output, wrapper );
+   ExecutorDispatcher< typename Output::PermutationType, typename Output::DeviceType > dispatch;
+   dispatch( output.getSizes(), wrapper );
 }
 
 #endif