Commit 1d0bcfa4 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Implemented Array::resize using the STL semantics almost exactly

- if ValueType is a fundamental type, the Array behaves the same as
  before (allocated elements are initialized to indeterminate values)
- otherwise, allocated elements are value-initialized using
  MemoryOperations::construct and destructed using
  MemoryOperations::destruct before their deallocation
parent 4bb7b41e
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
@@ -24,6 +24,25 @@ struct MemoryOperations;
template<>
struct MemoryOperations< Devices::Sequential >
{
   template< typename Element, typename Index >
   __cuda_callable__
   static void construct( Element* data,
                          const Index size );

   // note that args are passed by reference to the constructor, not via
   // std::forward since move-semantics does not apply for the construction of
   // multiple elements
   template< typename Element, typename Index, typename... Args >
   __cuda_callable__
   static void construct( Element* data,
                          const Index size,
                          const Args&... args );

   template< typename Element, typename Index >
   __cuda_callable__
   static void destruct( Element* data,
                         const Index size );

   template< typename Element >
   __cuda_callable__
   static void setElement( Element* data,
@@ -81,6 +100,22 @@ struct MemoryOperations< Devices::Sequential >
template<>
struct MemoryOperations< Devices::Host >
{
   template< typename Element, typename Index >
   static void construct( Element* data,
                          const Index size );

   // note that args are passed by reference to the constructor, not via
   // std::forward since move-semantics does not apply for the construction of
   // multiple elements
   template< typename Element, typename Index, typename... Args >
   static void construct( Element* data,
                          const Index size,
                          const Args&... args );

   template< typename Element, typename Index >
   static void destruct( Element* data,
                         const Index size );

   // this is __cuda_callable__ only to silence nvcc warnings
   TNL_NVCC_HD_WARNING_DISABLE
   template< typename Element >
@@ -137,6 +172,23 @@ struct MemoryOperations< Devices::Host >
template<>
struct MemoryOperations< Devices::Cuda >
{
   template< typename Element, typename Index >
   static void construct( Element* data,
                          const Index size );

   // note that args are passed by value to the constructor, not via
   // std::forward or even by reference, since move-semantics does not apply for
   // the construction of multiple elements and pass-by-reference cannot be used
   // with CUDA kernels
   template< typename Element, typename Index, typename... Args >
   static void construct( Element* data,
                          const Index size,
                          const Args&... args );

   template< typename Element, typename Index >
   static void destruct( Element* data,
                         const Index size );

   template< typename Element >
   __cuda_callable__
   static void setElement( Element* data,
+51 −1
Original line number Diff line number Diff line
@@ -23,6 +23,56 @@
namespace TNL {
namespace Algorithms {

template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
construct( Element* data,
           const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   auto kernel = [data] __cuda_callable__ ( Index i )
   {
      // placement-new
      ::new( (void*) (data + i) ) Element();
   };
   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}

template< typename Element, typename Index, typename... Args >
void
MemoryOperations< Devices::Cuda >::
construct( Element* data,
           const Index size,
           const Args&... args )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   // NOTE: nvcc does not allow __cuda_callable__ lambdas with a variadic capture
   auto kernel = [data] __cuda_callable__ ( Index i, Args... args )
   {
      // placement-new
      // (note that args are passed by value to the constructor, not via
      // std::forward or even by reference, since move-semantics does not apply for
      // the construction of multiple elements and pass-by-reference cannot be used
      // with CUDA kernels)
      ::new( (void*) (data + i) ) Element( args... );
   };
   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel, args... );
}

template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
destruct( Element* data,
          const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." );
   auto kernel = [data] __cuda_callable__ ( Index i )
   {
      (data + i)->~Element();
   };
   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}

template< typename Element >
__cuda_callable__ void
MemoryOperations< Devices::Cuda >::
+48 −0
Original line number Diff line number Diff line
@@ -21,6 +21,54 @@
namespace TNL {
namespace Algorithms {

template< typename Element, typename Index >
void
MemoryOperations< Devices::Host >::
construct( Element* data,
           const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   auto kernel = [data]( Index i )
   {
      // placement-new
      ::new( (void*) (data + i) ) Element();
   };
   ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel );
}

template< typename Element, typename Index, typename... Args >
void
MemoryOperations< Devices::Host >::
construct( Element* data,
           const Index size,
           const Args&... args )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   auto kernel = [data, &args...]( Index i )
   {
      // placement-new
      // (note that args are passed by reference to the constructor, not via
      // std::forward since move-semantics does not apply for the construction
      // of multiple elements)
      ::new( (void*) (data + i) ) Element( args... );
   };
   ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel );
}

template< typename Element, typename Index >
void
MemoryOperations< Devices::Host >::
destruct( Element* data,
          const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." );
   auto kernel = [data]( Index i )
   {
      (data + i)->~Element();
   };
   ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel );
}

template< typename Element >
__cuda_callable__ // only to avoid nvcc warning
void
+42 −0
Original line number Diff line number Diff line
@@ -15,6 +15,48 @@
namespace TNL {
namespace Algorithms {

template< typename Element, typename Index >
__cuda_callable__
void
MemoryOperations< Devices::Sequential >::
construct( Element* data,
           const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   for( Index i = 0; i < size; i++ )
      // placement-new
      ::new( (void*) (data + i) ) Element();
}

template< typename Element, typename Index, typename... Args >
__cuda_callable__
void
MemoryOperations< Devices::Sequential >::
construct( Element* data,
           const Index size,
           const Args&... args )
{
   TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
   for( Index i = 0; i < size; i++ )
      // placement-new
      // (note that args are passed by reference to the constructor, not via
      // std::forward since move-semantics does not apply for the construction
      // of multiple elements)
      ::new( (void*) (data + i) ) Element( args... );
}

template< typename Element, typename Index >
__cuda_callable__
void
MemoryOperations< Devices::Sequential >::
destruct( Element* data,
          const Index size )
{
   TNL_ASSERT_TRUE( data, "Attempted to destroy elements through a nullptr." );
   for( Index i = 0; i < size; i++ )
      (data + i)->~Element();
}

template< typename Element >
__cuda_callable__
void
+59 −8
Original line number Diff line number Diff line
@@ -41,9 +41,9 @@ template< int, typename > class StaticArray;
 *
 * Memory management handled by constructors and destructors according to the
 * [RAII](https://en.wikipedia.org/wiki/RAII) principle and by methods
 * \ref setSize, \ref setLike, \ref swap, and \ref reset. You can also use
 * methods \ref getSize and \ref empty to check the current array size and
 * \ref getData to access the raw pointer.
 * \ref resize \ref setSize, \ref setLike, \ref swap, and \ref reset. You can
 * also use methods \ref getSize and \ref empty to check the current array size
 * and \ref getData to access the raw pointer.
 *
 * Methods annotated as \ref \_\_cuda_callable\_\_ can be called either from
 * host or from kernels executing on a device according to the \e Device
@@ -263,16 +263,60 @@ class Array
      virtual String getSerializationTypeVirtual() const;

      /**
       * \brief Method for setting the array size.
       * \brief Method for resizing the array.
       *
       * The method resizes the array to the given size:
       *
       * - If the current size is greater than count, the array is reduced to
       *   its first \e size elements.
       * - If the current size is less than \e size, additional elements are
       *   appended (see the note below on initialization).
       * - If the current size is equal to \e size, nothing happens.
       *
       * If the array size changes, the current data will be deallocated, thus
       * all pointers and views to the array alements will become invalid.
       *
       * Note that this method differs from \ref std::vector::resize with respect
       * to the initialization of array elements:
       *
       * - if \e ValueType is a [fundamental type](https://en.cppreference.com/w/cpp/types/is_fundamental),
       *   the elements are [default-initialized](https://en.cppreference.com/w/cpp/language/default_initialization)
       *   (i.e., the elements are initialized to indeterminate values).
       * - otherwise, the elements are [value-initialized](https://en.cppreference.com/w/cpp/language/value_initialization)
       *   (like in \ref std::vector::resize).
       *
       * \param size The new size of the array.
       */
      void resize( Index size );

      /**
       * \brief Method for resizing the array with an initial value.
       *
       * The method resizes the array to the given size:
       *
       * If the array shares data with other arrays, the data is unbound. If the
       * current data is not shared and the current size is the same as the new
       * one, nothing happens.
       * - If the current size is greater than count, the array is reduced to
       *   its first \e size elements.
       * - If the current size is less than \e size, additional copies of
       *   \e value are appended.
       * - If the current size is equal to \e size, nothing happens.
       *
       * If the array size changes, the current data will be deallocated, thus
       * all pointers and views to the array alements will become invalid.
       *
       * \param size The new size of the array.
       * \param value The value to initialize new elements with.
       */
      void resize( Index size, const ValueType& value );

      /**
       * \brief Method for setting the array size.
       *
       * This method behaves almost like \ref resize, but when the array size
       * is changed, old elements are not copied to the new memory location.
       * Hence, this is a shortcut for deallocating the array with \e resize(0)
       * followed by setting the new size with \e resize(size)
       *
       * \param size The new size of the array.
       */
      void setSize( Index size );

@@ -289,6 +333,8 @@ class Array
       * If the array size changes, the current data will be deallocated, thus
       * all pointers and views to the array alements will become invalid.
       *
       * Note that this method uses \ref setSize rather than \ref resize.
       *
       * \tparam ArrayT The type of the parameter can be any type which provides
       *         the method \ref getSize() with the same signature as \e Array.
       * \param array The array whose size is to be taken.
@@ -867,9 +913,14 @@ class Array

   protected:

      /** \brief Method for releasing (deallocating) array data. */
      /** \brief Internal method for releasing (deallocating) array data. */
      void releaseData();

      /** \brief Internal method for reallocating array elements. Used only
       * from the two overloads of \ref resize.
       */
      void reallocate( Index size );

      /** \brief Pointer to the data. */
      Value* data = nullptr;

Loading