From 3b19d7bc1e621367308e1bb70d5ae3f2d42a18ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 18 Mar 2021 17:02:07 +0100 Subject: [PATCH 1/4] Added missing assertions to the MemoryOperations specialization for Devices::Sequential --- .../Algorithms/MemoryOperationsSequential.hpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/TNL/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp index 9e5ad25b1..7891395e8 100644 --- a/src/TNL/Algorithms/MemoryOperationsSequential.hpp +++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp @@ -22,6 +22,7 @@ MemoryOperations< Devices::Sequential >:: setElement( Element* data, const Element& value ) { + TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); *data = value; } @@ -31,6 +32,7 @@ Element MemoryOperations< Devices::Sequential >:: getElement( const Element* data ) { + TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); return *data; } @@ -42,7 +44,9 @@ set( Element* data, const Element& value, const Index size ) { - for( Index i = 0; i < size; i ++ ) + if( size == 0 ) return; + TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); + for( Index i = 0; i < size; i++ ) data[ i ] = value; } @@ -56,7 +60,11 @@ copy( DestinationElement* destination, const SourceElement* source, const Index size ) { - for( Index i = 0; i < size; i ++ ) + if( size == 0 ) return; + TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); + + for( Index i = 0; i < size; i++ ) destination[ i ] = source[ i ]; } @@ -87,6 +95,10 @@ compare( const Element1* destination, const Element2* source, const Index size ) { + if( size == 0 ) return true; + TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); + for( Index i = 0; i < size; i++ ) if( ! ( destination[ i ] == source[ i ] ) ) return false; -- GitLab From 4bb7b41e9490fac275f9ae8a896fadb815c2a2ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 18 Mar 2021 20:23:16 +0100 Subject: [PATCH 2/4] Moved tests for segments from Containers to Algorithms --- src/UnitTests/Algorithms/CMakeLists.txt | 2 ++ .../{Containers => Algorithms}/Segments/CMakeLists.txt | 0 .../{Containers => Algorithms}/Segments/SegmentsTest.hpp | 0 .../{Containers => Algorithms}/Segments/SegmentsTest_CSR.cpp | 0 .../{Containers => Algorithms}/Segments/SegmentsTest_CSR.cu | 0 .../{Containers => Algorithms}/Segments/SegmentsTest_CSR.h | 0 .../Segments/SegmentsTest_Ellpack.cpp | 0 .../{Containers => Algorithms}/Segments/SegmentsTest_Ellpack.cu | 0 .../{Containers => Algorithms}/Segments/SegmentsTest_Ellpack.h | 0 .../Segments/SegmentsTest_SlicedEllpack.cpp | 0 .../Segments/SegmentsTest_SlicedEllpack.cu | 0 .../Segments/SegmentsTest_SlicedEllpack.h | 0 src/UnitTests/Containers/CMakeLists.txt | 1 - 13 files changed, 2 insertions(+), 1 deletion(-) rename src/UnitTests/{Containers => Algorithms}/Segments/CMakeLists.txt (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest.hpp (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_CSR.cpp (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_CSR.cu (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_CSR.h (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_Ellpack.cpp (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_Ellpack.cu (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_Ellpack.h (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_SlicedEllpack.cpp (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_SlicedEllpack.cu (100%) rename src/UnitTests/{Containers => Algorithms}/Segments/SegmentsTest_SlicedEllpack.h (100%) diff --git a/src/UnitTests/Algorithms/CMakeLists.txt b/src/UnitTests/Algorithms/CMakeLists.txt index dd439fbb3..30ea96b4d 100644 --- a/src/UnitTests/Algorithms/CMakeLists.txt +++ b/src/UnitTests/Algorithms/CMakeLists.txt @@ -1,3 +1,5 @@ +ADD_SUBDIRECTORY( Segments ) + set( COMMON_TESTS MemoryOperationsTest MultireductionTest diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Algorithms/Segments/CMakeLists.txt similarity index 100% rename from src/UnitTests/Containers/Segments/CMakeLists.txt rename to src/UnitTests/Algorithms/Segments/CMakeLists.txt diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Algorithms/Segments/SegmentsTest.hpp similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest.hpp rename to src/UnitTests/Algorithms/Segments/SegmentsTest.hpp diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp b/src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.cpp similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp rename to src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.cpp diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu b/src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.cu similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu rename to src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.cu diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.h similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_CSR.h rename to src/UnitTests/Algorithms/Segments/SegmentsTest_CSR.h diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp b/src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.cpp similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp rename to src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.cpp diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu b/src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.cu similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu rename to src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.cu diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.h similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h rename to src/UnitTests/Algorithms/Segments/SegmentsTest_Ellpack.h diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp b/src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.cpp similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp rename to src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.cpp diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu b/src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.cu similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu rename to src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.cu diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.h similarity index 100% rename from src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h rename to src/UnitTests/Algorithms/Segments/SegmentsTest_SlicedEllpack.h diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index efba5e50d..9d9e41343 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -1,5 +1,4 @@ ADD_SUBDIRECTORY( ndarray ) -ADD_SUBDIRECTORY( Segments ) set( CPP_TESTS ArrayTest -- GitLab From 1d0bcfa4b8f38425760b485f4fef49b36224ba30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 18 Mar 2021 22:31:38 +0100 Subject: [PATCH 3/4] Implemented Array::resize using the STL semantics almost exactly - if ValueType is a fundamental type, the Array behaves the same as before (allocated elements are initialized to indeterminate values) - otherwise, allocated elements are value-initialized using MemoryOperations::construct and destructed using MemoryOperations::destruct before their deallocation --- src/TNL/Algorithms/MemoryOperations.h | 52 +++++++++++ src/TNL/Algorithms/MemoryOperationsCuda.hpp | 52 ++++++++++- src/TNL/Algorithms/MemoryOperationsHost.hpp | 48 +++++++++++ .../Algorithms/MemoryOperationsSequential.hpp | 42 +++++++++ src/TNL/Containers/Array.h | 67 +++++++++++++-- src/TNL/Containers/Array.hpp | 86 ++++++++++++++++++- src/UnitTests/Containers/ArrayTest.h | 67 ++++++++++++++- 7 files changed, 399 insertions(+), 15 deletions(-) diff --git a/src/TNL/Algorithms/MemoryOperations.h b/src/TNL/Algorithms/MemoryOperations.h index 68a3a8814..42c37f062 100644 --- a/src/TNL/Algorithms/MemoryOperations.h +++ b/src/TNL/Algorithms/MemoryOperations.h @@ -24,6 +24,25 @@ struct MemoryOperations; template<> struct MemoryOperations< Devices::Sequential > { + template< typename Element, typename Index > + __cuda_callable__ + static void construct( Element* data, + const Index size ); + + // note that args are passed by reference to the constructor, not via + // std::forward since move-semantics does not apply for the construction of + // multiple elements + template< typename Element, typename Index, typename... Args > + __cuda_callable__ + static void construct( Element* data, + const Index size, + const Args&... args ); + + template< typename Element, typename Index > + __cuda_callable__ + static void destruct( Element* data, + const Index size ); + template< typename Element > __cuda_callable__ static void setElement( Element* data, @@ -81,6 +100,22 @@ struct MemoryOperations< Devices::Sequential > template<> struct MemoryOperations< Devices::Host > { + template< typename Element, typename Index > + static void construct( Element* data, + const Index size ); + + // note that args are passed by reference to the constructor, not via + // std::forward since move-semantics does not apply for the construction of + // multiple elements + template< typename Element, typename Index, typename... Args > + static void construct( Element* data, + const Index size, + const Args&... args ); + + template< typename Element, typename Index > + static void destruct( Element* data, + const Index size ); + // this is __cuda_callable__ only to silence nvcc warnings TNL_NVCC_HD_WARNING_DISABLE template< typename Element > @@ -137,6 +172,23 @@ struct MemoryOperations< Devices::Host > template<> struct MemoryOperations< Devices::Cuda > { + template< typename Element, typename Index > + static void construct( Element* data, + const Index size ); + + // note that args are passed by value to the constructor, not via + // std::forward or even by reference, since move-semantics does not apply for + // the construction of multiple elements and pass-by-reference cannot be used + // with CUDA kernels + template< typename Element, typename Index, typename... Args > + static void construct( Element* data, + const Index size, + const Args&... args ); + + template< typename Element, typename Index > + static void destruct( Element* data, + const Index size ); + template< typename Element > __cuda_callable__ static void setElement( Element* data, diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp index 5351b6962..8c104469b 100644 --- a/src/TNL/Algorithms/MemoryOperationsCuda.hpp +++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp @@ -23,6 +23,56 @@ namespace TNL { namespace Algorithms { +template< typename Element, typename Index > +void +MemoryOperations< Devices::Cuda >:: +construct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + auto kernel = [data] __cuda_callable__ ( Index i ) + { + // placement-new + ::new( (void*) (data + i) ) Element(); + }; + ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); +} + +template< typename Element, typename Index, typename... Args > +void +MemoryOperations< Devices::Cuda >:: +construct( Element* data, + const Index size, + const Args&... args ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + // NOTE: nvcc does not allow __cuda_callable__ lambdas with a variadic capture + auto kernel = [data] __cuda_callable__ ( Index i, Args... args ) + { + // placement-new + // (note that args are passed by value to the constructor, not via + // std::forward or even by reference, since move-semantics does not apply for + // the construction of multiple elements and pass-by-reference cannot be used + // with CUDA kernels) + ::new( (void*) (data + i) ) Element( args... ); + }; + ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel, args... ); +} + +template< typename Element, typename Index > +void +MemoryOperations< Devices::Cuda >:: +destruct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); + auto kernel = [data] __cuda_callable__ ( Index i ) + { + (data + i)->~Element(); + }; + ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); +} + template< typename Element > __cuda_callable__ void MemoryOperations< Devices::Cuda >:: @@ -57,7 +107,7 @@ getElement( const Element* data ) return *data; #else // TODO: For some reason the following does not work after adding - // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement + // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement // Probably it might be a problem with lambda function 'kernel' which // nvcc probably does not handle properly. //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 ); diff --git a/src/TNL/Algorithms/MemoryOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp index 92b44f8cf..0034b8302 100644 --- a/src/TNL/Algorithms/MemoryOperationsHost.hpp +++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp @@ -21,6 +21,54 @@ namespace TNL { namespace Algorithms { +template< typename Element, typename Index > +void +MemoryOperations< Devices::Host >:: +construct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + auto kernel = [data]( Index i ) + { + // placement-new + ::new( (void*) (data + i) ) Element(); + }; + ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); +} + +template< typename Element, typename Index, typename... Args > +void +MemoryOperations< Devices::Host >:: +construct( Element* data, + const Index size, + const Args&... args ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + auto kernel = [data, &args...]( Index i ) + { + // placement-new + // (note that args are passed by reference to the constructor, not via + // std::forward since move-semantics does not apply for the construction + // of multiple elements) + ::new( (void*) (data + i) ) Element( args... ); + }; + ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); +} + +template< typename Element, typename Index > +void +MemoryOperations< Devices::Host >:: +destruct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); + auto kernel = [data]( Index i ) + { + (data + i)->~Element(); + }; + ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); +} + template< typename Element > __cuda_callable__ // only to avoid nvcc warning void diff --git a/src/TNL/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp index 7891395e8..2ea21d0ac 100644 --- a/src/TNL/Algorithms/MemoryOperationsSequential.hpp +++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp @@ -15,6 +15,48 @@ namespace TNL { namespace Algorithms { +template< typename Element, typename Index > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +construct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + for( Index i = 0; i < size; i++ ) + // placement-new + ::new( (void*) (data + i) ) Element(); +} + +template< typename Element, typename Index, typename... Args > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +construct( Element* data, + const Index size, + const Args&... args ) +{ + TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); + for( Index i = 0; i < size; i++ ) + // placement-new + // (note that args are passed by reference to the constructor, not via + // std::forward since move-semantics does not apply for the construction + // of multiple elements) + ::new( (void*) (data + i) ) Element( args... ); +} + +template< typename Element, typename Index > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +destruct( Element* data, + const Index size ) +{ + TNL_ASSERT_TRUE( data, "Attempted to destroy elements through a nullptr." ); + for( Index i = 0; i < size; i++ ) + (data + i)->~Element(); +} + template< typename Element > __cuda_callable__ void diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index 92a976e72..9c02ac9d4 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -41,9 +41,9 @@ template< int, typename > class StaticArray; * * Memory management handled by constructors and destructors according to the * [RAII](https://en.wikipedia.org/wiki/RAII) principle and by methods - * \ref setSize, \ref setLike, \ref swap, and \ref reset. You can also use - * methods \ref getSize and \ref empty to check the current array size and - * \ref getData to access the raw pointer. + * \ref resize \ref setSize, \ref setLike, \ref swap, and \ref reset. You can + * also use methods \ref getSize and \ref empty to check the current array size + * and \ref getData to access the raw pointer. * * Methods annotated as \ref \_\_cuda_callable\_\_ can be called either from * host or from kernels executing on a device according to the \e Device @@ -263,16 +263,60 @@ class Array virtual String getSerializationTypeVirtual() const; /** - * \brief Method for setting the array size. + * \brief Method for resizing the array. + * + * The method resizes the array to the given size: + * + * - If the current size is greater than count, the array is reduced to + * its first \e size elements. + * - If the current size is less than \e size, additional elements are + * appended (see the note below on initialization). + * - If the current size is equal to \e size, nothing happens. + * + * If the array size changes, the current data will be deallocated, thus + * all pointers and views to the array alements will become invalid. + * + * Note that this method differs from \ref std::vector::resize with respect + * to the initialization of array elements: + * + * - if \e ValueType is a [fundamental type](https://en.cppreference.com/w/cpp/types/is_fundamental), + * the elements are [default-initialized](https://en.cppreference.com/w/cpp/language/default_initialization) + * (i.e., the elements are initialized to indeterminate values). + * - otherwise, the elements are [value-initialized](https://en.cppreference.com/w/cpp/language/value_initialization) + * (like in \ref std::vector::resize). + * + * \param size The new size of the array. + */ + void resize( Index size ); + + /** + * \brief Method for resizing the array with an initial value. + * + * The method resizes the array to the given size: * - * If the array shares data with other arrays, the data is unbound. If the - * current data is not shared and the current size is the same as the new - * one, nothing happens. + * - If the current size is greater than count, the array is reduced to + * its first \e size elements. + * - If the current size is less than \e size, additional copies of + * \e value are appended. + * - If the current size is equal to \e size, nothing happens. * * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * \param size The new size of the array. + * \param value The value to initialize new elements with. + */ + void resize( Index size, const ValueType& value ); + + /** + * \brief Method for setting the array size. + * + * This method behaves almost like \ref resize, but when the array size + * is changed, old elements are not copied to the new memory location. + * Hence, this is a shortcut for deallocating the array with \e resize(0) + * followed by setting the new size with \e resize(size) + * + * \param size The new size of the array. */ void setSize( Index size ); @@ -289,6 +333,8 @@ class Array * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * + * Note that this method uses \ref setSize rather than \ref resize. + * * \tparam ArrayT The type of the parameter can be any type which provides * the method \ref getSize() with the same signature as \e Array. * \param array The array whose size is to be taken. @@ -867,9 +913,14 @@ class Array protected: - /** \brief Method for releasing (deallocating) array data. */ + /** \brief Internal method for releasing (deallocating) array data. */ void releaseData(); + /** \brief Internal method for reallocating array elements. Used only + * from the two overloads of \ref resize. + */ + void reallocate( Index size ); + /** \brief Pointer to the data. */ Value* data = nullptr; diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 6b8d1014c..402168d11 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -228,8 +228,12 @@ void Array< Value, Device, Index, Allocator >:: releaseData() { - if( this->data ) + if( this->data ) { + if( ! std::is_fundamental< ValueType >::value ) + // call the destructor of each element + Algorithms::MemoryOperations< Device >::destruct( this->data, this->size ); allocator.deallocate( this->data, this->size ); + } this->data = nullptr; this->size = 0; } @@ -240,22 +244,96 @@ template< typename Value, typename Allocator > void Array< Value, Device, Index, Allocator >:: -setSize( Index size ) +reallocate( Index size ) { TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); if( this->size == size ) return; - this->releaseData(); // Allocating zero bytes is useless. Moreover, the allocators don't behave the same way: // "operator new" returns some non-zero address, the latter returns a null pointer. - if( size > 0 ) { + if( size == 0 ) { + this->releaseData(); + return; + } + + // handle initial allocations + if( this->size == 0 ) { this->data = allocator.allocate( size ); + if( ! std::is_fundamental< ValueType >::value ) + // call the constructor of each element + Algorithms::MemoryOperations< Device >::construct( this->data, size ); + this->size = size; TNL_ASSERT_TRUE( this->data, "This should never happen - allocator did not throw on an error." ); + return; } + + // allocate an array with the correct size + Array aux( size ); + + // copy the old elements into aux + Algorithms::MemoryOperations< Device >:: + copy( aux.getData(), this->getData(), TNL::min( this->size, size ) ); + + // swap *this with aux, old data will be released + this->swap( aux ); +} + +template< typename Value, + typename Device, + typename Index, + typename Allocator > +void +Array< Value, Device, Index, Allocator >:: +resize( Index size ) +{ + // remember the old size and reallocate the array + const Index old_size = this->size; + reallocate( size ); + + if( old_size < size ) + if( ! std::is_fundamental< ValueType >::value ) + // initialize the appended elements + Algorithms::MemoryOperations< Device >::construct( this->data + old_size, size - old_size ); +} + +template< typename Value, + typename Device, + typename Index, + typename Allocator > +void +Array< Value, Device, Index, Allocator >:: +resize( Index size, const ValueType& value ) +{ + // remember the old size and reallocate the array + const Index old_size = this->size; + reallocate( size ); + + if( old_size < size ) + // copy value into the appended elements + Algorithms::MemoryOperations< Device >::construct( this->data + old_size, size - old_size, value ); +} + +template< typename Value, + typename Device, + typename Index, + typename Allocator > +void +Array< Value, Device, Index, Allocator >:: +setSize( Index size ) +{ + TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); + + if( this->size == size ) + return; + + // release data to avoid copying the elements to the new memory location + this->releaseData(); + // resize from size 0 does not copy anything, initialization is done as intended + this->resize( size ); } template< typename Value, diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index 4b5809747..c40c0a8c7 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -222,6 +222,69 @@ TYPED_TEST( ArrayTest, constructorsWithAllocators ) EXPECT_EQ( a3.getElement( 1 ), 8 ); EXPECT_EQ( a3.getElement( 2 ), 9 ); EXPECT_EQ( a3.getAllocator(), allocator ); + + // test value-initialization of non-fundamental types + if( ! std::is_fundamental< typename ArrayType::ValueType >::value ) + { + const typename ArrayType::ValueType init{}; + ArrayType a( 42 ); + ASSERT_EQ( a.getSize(), 42 ); + for( int i = 0; i < a.getSize(); i++ ) + EXPECT_EQ( a.getElement( i ), init ) << "i = " << i; + } +} + +TYPED_TEST( ArrayTest, resize ) +{ + using ArrayType = typename TestFixture::ArrayType; + + ArrayType u( 42 ); + ASSERT_EQ( u.getSize(), 42 ); + for( int i = 0; i < u.getSize(); i++ ) + u.setElement( i, i ); + + // no change test + const typename ArrayType::ValueType* old_data = u.getData(); + u.resize( u.getSize() ); + EXPECT_EQ( u.getData(), old_data ); + + // shrink test + u.resize( 20 ); + ASSERT_EQ( u.getSize(), 20 ); + EXPECT_NE( u.getData(), old_data ); + for( int i = 0; i < u.getSize(); i++ ) + EXPECT_EQ( u.getElement( i ), i ); + + // expand test + const typename ArrayType::IndexType old_size = u.getSize(); + old_data = u.getData(); + u.resize( old_size * 2 ); + ASSERT_EQ( u.getSize(), old_size * 2 ); + EXPECT_NE( u.getData(), old_data ); + for( int i = 0; i < old_size; i++ ) + EXPECT_EQ( u.getElement( i ), i ); + + // expand test with initial value + const typename ArrayType::ValueType init = 3; + ArrayType v( 10 ); + v.setValue( 0 ); + v.resize( 42, init ); + ASSERT_EQ( v.getSize(), 42 ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( v.getElement( i ), 0 ) << "i = " << i; + for( int i = 10; i < v.getSize(); i++ ) + EXPECT_EQ( v.getElement( i ), init ) << "i = " << i; + + // test value-initialization of non-fundamental types + if( ! std::is_fundamental< typename ArrayType::ValueType >::value ) + { + const typename ArrayType::ValueType init{}; + ArrayType w; + w.resize( 42 ); + ASSERT_EQ( w.getSize(), 42 ); + for( int i = 0; i < w.getSize(); i++ ) + EXPECT_EQ( w.getElement( i ), init ) << "i = " << i; + } } TYPED_TEST( ArrayTest, setSize ) @@ -380,14 +443,14 @@ TYPED_TEST( ArrayTest, forElements ) using ValueType = typename ArrayType::ValueType; #if not defined HAVE_CUDA -// nvcc does not accept the following code with +// nvcc does not accept the following code with // error #3068-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class ArrayType a( 10 ); a.forEachElement( [] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = i; } ); for( int i = 0; i < 10; i++ ) EXPECT_EQ( a.getElement( i ), i ); -#endif +#endif } TYPED_TEST( ArrayTest, containsValue ) -- GitLab From f91cf5873f9ab2e16444dae58095464d6837ef08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 18 Mar 2021 22:37:26 +0100 Subject: [PATCH 4/4] Fixed ArrayTest::forElements and VectorTest::reduceElements for CUDA --- src/UnitTests/Containers/ArrayTest.h | 14 ++++++++------ src/UnitTests/Containers/VectorTest.h | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index c40c0a8c7..148d92aa6 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -436,21 +436,23 @@ TYPED_TEST( ArrayTest, setElement ) test_setElement< ArrayType >(); } -TYPED_TEST( ArrayTest, forElements ) +// test must be in a plain function because nvcc sucks (extended lambdas are +// not allowed to be defined in protected class member functions) +template< typename ArrayType > +void testArrayForEachElement() { - using ArrayType = typename TestFixture::ArrayType; using IndexType = typename ArrayType::IndexType; using ValueType = typename ArrayType::ValueType; -#if not defined HAVE_CUDA -// nvcc does not accept the following code with -// error #3068-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class ArrayType a( 10 ); a.forEachElement( [] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = i; } ); for( int i = 0; i < 10; i++ ) EXPECT_EQ( a.getElement( i ), i ); -#endif +} +TYPED_TEST( ArrayTest, forElements ) +{ + testArrayForEachElement< typename TestFixture::ArrayType >(); } TYPED_TEST( ArrayTest, containsValue ) diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index 136154fdc..fa99547ec 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -80,15 +80,14 @@ TYPED_TEST( VectorTest, constructors ) } -TYPED_TEST( VectorTest, reduceElements ) +// test must be in a plain function because nvcc sucks (extended lambdas are +// not allowed to be defined in protected class member functions) +template< typename VectorType > +void testVectorReduceElements() { - using VectorType = typename TestFixture::VectorType; using IndexType = typename VectorType::IndexType; using ValueType = typename VectorType::ValueType; -#if not defined HAVE_CUDA -// nvcc does not accept the following code with -// error #3068-D: The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class VectorType a( 10 ); a.forEachElement( [=] __cuda_callable__ ( IndexType i, ValueType& v ) mutable { v = 1; } ); auto fetch = [] __cuda_callable__ ( IndexType i, ValueType& v ) -> ValueType { return v; }; @@ -100,7 +99,10 @@ TYPED_TEST( VectorTest, reduceElements ) auto const_fetch = [] __cuda_callable__ ( IndexType i, const ValueType& v ) -> ValueType { return v; }; EXPECT_EQ( b.reduceEachElement( const_fetch, reduce, ( ValueType ) 0.0 ), b.getSize() ); -#endif +} +TYPED_TEST( VectorTest, reduceElements ) +{ + testVectorReduceElements< typename TestFixture::VectorType >(); } TEST( VectorSpecialCasesTest, defaultConstructors ) -- GitLab