Loading src/TNL/Algorithms/MemoryOperations.h +52 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,25 @@ struct MemoryOperations; template<> struct MemoryOperations< Devices::Sequential > { template< typename Element, typename Index > __cuda_callable__ static void construct( Element* data, const Index size ); // note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction of // multiple elements template< typename Element, typename Index, typename... Args > __cuda_callable__ static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > __cuda_callable__ static void destruct( Element* data, const Index size ); template< typename Element > __cuda_callable__ static void setElement( Element* data, Loading Loading @@ -81,6 +100,22 @@ struct MemoryOperations< Devices::Sequential > template<> struct MemoryOperations< Devices::Host > { template< typename Element, typename Index > static void construct( Element* data, const Index size ); // note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction of // multiple elements template< typename Element, typename Index, typename... Args > static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > static void destruct( Element* data, const Index size ); // this is __cuda_callable__ only to silence nvcc warnings TNL_NVCC_HD_WARNING_DISABLE template< typename Element > Loading Loading @@ -137,6 +172,23 @@ struct MemoryOperations< Devices::Host > template<> struct MemoryOperations< Devices::Cuda > { template< typename Element, typename Index > static void construct( Element* data, const Index size ); // note that args are passed by value to the constructor, not via // std::forward or even by reference, since move-semantics does not apply for // the construction of multiple elements and pass-by-reference cannot be used // with CUDA kernels template< typename Element, typename Index, typename... Args > static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > static void destruct( Element* data, const Index size ); template< typename Element > __cuda_callable__ static void setElement( Element* data, Loading src/TNL/Algorithms/MemoryOperationsCuda.hpp +51 −1 Original line number Diff line number Diff line Loading @@ -23,6 +23,56 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > void MemoryOperations< Devices::Cuda >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data] __cuda_callable__ ( Index i ) { // placement-new ::new( (void*) (data + i) ) Element(); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index, typename... Args > void MemoryOperations< Devices::Cuda >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); // NOTE: nvcc does not allow __cuda_callable__ lambdas with a variadic capture auto kernel = [data] __cuda_callable__ ( Index i, Args... args ) { // placement-new // (note that args are passed by value to the constructor, not via // std::forward or even by reference, since move-semantics does not apply for // the construction of multiple elements and pass-by-reference cannot be used // with CUDA kernels) ::new( (void*) (data + i) ) Element( args... ); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel, args... ); } template< typename Element, typename Index > void MemoryOperations< Devices::Cuda >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); auto kernel = [data] __cuda_callable__ ( Index i ) { (data + i)->~Element(); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); } template< typename Element > __cuda_callable__ void MemoryOperations< Devices::Cuda >:: Loading src/TNL/Algorithms/MemoryOperationsHost.hpp +48 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,54 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > void MemoryOperations< Devices::Host >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data]( Index i ) { // placement-new ::new( (void*) (data + i) ) Element(); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index, typename... Args > void MemoryOperations< Devices::Host >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data, &args...]( Index i ) { // placement-new // (note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction // of multiple elements) ::new( (void*) (data + i) ) Element( args... ); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index > void MemoryOperations< Devices::Host >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); auto kernel = [data]( Index i ) { (data + i)->~Element(); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element > __cuda_callable__ // only to avoid nvcc warning void Loading src/TNL/Algorithms/MemoryOperationsSequential.hpp +56 −2 Original line number Diff line number Diff line Loading @@ -15,6 +15,48 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); for( Index i = 0; i < size; i++ ) // placement-new ::new( (void*) (data + i) ) Element(); } template< typename Element, typename Index, typename... Args > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); for( Index i = 0; i < size; i++ ) // placement-new // (note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction // of multiple elements) ::new( (void*) (data + i) ) Element( args... ); } template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy elements through a nullptr." ); for( Index i = 0; i < size; i++ ) (data + i)->~Element(); } template< typename Element > __cuda_callable__ void Loading @@ -22,6 +64,7 @@ MemoryOperations< Devices::Sequential >:: setElement( Element* data, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); *data = value; } Loading @@ -31,6 +74,7 @@ Element MemoryOperations< Devices::Sequential >:: getElement( const Element* data ) { TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); return *data; } Loading @@ -42,6 +86,8 @@ set( Element* data, const Element& value, const Index size ) { if( size == 0 ) return; TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); for( Index i = 0; i < size; i++ ) data[ i ] = value; } Loading @@ -56,6 +102,10 @@ copy( DestinationElement* destination, const SourceElement* source, const Index size ) { if( size == 0 ) return; TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); for( Index i = 0; i < size; i++ ) destination[ i ] = source[ i ]; } Loading Loading @@ -87,6 +137,10 @@ compare( const Element1* destination, const Element2* source, const Index size ) { if( size == 0 ) return true; TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); for( Index i = 0; i < size; i++ ) if( ! ( destination[ i ] == source[ i ] ) ) return false; Loading src/TNL/Containers/Array.h +59 −8 Original line number Diff line number Diff line Loading @@ -41,9 +41,9 @@ template< int, typename > class StaticArray; * * Memory management handled by constructors and destructors according to the * [RAII](https://en.wikipedia.org/wiki/RAII) principle and by methods * \ref setSize, \ref setLike, \ref swap, and \ref reset. You can also use * methods \ref getSize and \ref empty to check the current array size and * \ref getData to access the raw pointer. * \ref resize \ref setSize, \ref setLike, \ref swap, and \ref reset. You can * also use methods \ref getSize and \ref empty to check the current array size * and \ref getData to access the raw pointer. * * Methods annotated as \ref \_\_cuda_callable\_\_ can be called either from * host or from kernels executing on a device according to the \e Device Loading Loading @@ -263,16 +263,60 @@ class Array virtual String getSerializationTypeVirtual() const; /** * \brief Method for setting the array size. * \brief Method for resizing the array. * * The method resizes the array to the given size: * * - If the current size is greater than count, the array is reduced to * its first \e size elements. * - If the current size is less than \e size, additional elements are * appended (see the note below on initialization). * - If the current size is equal to \e size, nothing happens. * * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * Note that this method differs from \ref std::vector::resize with respect * to the initialization of array elements: * * - if \e ValueType is a [fundamental type](https://en.cppreference.com/w/cpp/types/is_fundamental), * the elements are [default-initialized](https://en.cppreference.com/w/cpp/language/default_initialization) * (i.e., the elements are initialized to indeterminate values). * - otherwise, the elements are [value-initialized](https://en.cppreference.com/w/cpp/language/value_initialization) * (like in \ref std::vector::resize). * * \param size The new size of the array. */ void resize( Index size ); /** * \brief Method for resizing the array with an initial value. * * The method resizes the array to the given size: * * If the array shares data with other arrays, the data is unbound. If the * current data is not shared and the current size is the same as the new * one, nothing happens. * - If the current size is greater than count, the array is reduced to * its first \e size elements. * - If the current size is less than \e size, additional copies of * \e value are appended. * - If the current size is equal to \e size, nothing happens. * * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * \param size The new size of the array. * \param value The value to initialize new elements with. */ void resize( Index size, const ValueType& value ); /** * \brief Method for setting the array size. * * This method behaves almost like \ref resize, but when the array size * is changed, old elements are not copied to the new memory location. * Hence, this is a shortcut for deallocating the array with \e resize(0) * followed by setting the new size with \e resize(size) * * \param size The new size of the array. */ void setSize( Index size ); Loading @@ -289,6 +333,8 @@ class Array * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * Note that this method uses \ref setSize rather than \ref resize. * * \tparam ArrayT The type of the parameter can be any type which provides * the method \ref getSize() with the same signature as \e Array. * \param array The array whose size is to be taken. Loading Loading @@ -867,9 +913,14 @@ class Array protected: /** \brief Method for releasing (deallocating) array data. */ /** \brief Internal method for releasing (deallocating) array data. */ void releaseData(); /** \brief Internal method for reallocating array elements. Used only * from the two overloads of \ref resize. */ void reallocate( Index size ); /** \brief Pointer to the data. */ Value* data = nullptr; Loading Loading
src/TNL/Algorithms/MemoryOperations.h +52 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,25 @@ struct MemoryOperations; template<> struct MemoryOperations< Devices::Sequential > { template< typename Element, typename Index > __cuda_callable__ static void construct( Element* data, const Index size ); // note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction of // multiple elements template< typename Element, typename Index, typename... Args > __cuda_callable__ static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > __cuda_callable__ static void destruct( Element* data, const Index size ); template< typename Element > __cuda_callable__ static void setElement( Element* data, Loading Loading @@ -81,6 +100,22 @@ struct MemoryOperations< Devices::Sequential > template<> struct MemoryOperations< Devices::Host > { template< typename Element, typename Index > static void construct( Element* data, const Index size ); // note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction of // multiple elements template< typename Element, typename Index, typename... Args > static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > static void destruct( Element* data, const Index size ); // this is __cuda_callable__ only to silence nvcc warnings TNL_NVCC_HD_WARNING_DISABLE template< typename Element > Loading Loading @@ -137,6 +172,23 @@ struct MemoryOperations< Devices::Host > template<> struct MemoryOperations< Devices::Cuda > { template< typename Element, typename Index > static void construct( Element* data, const Index size ); // note that args are passed by value to the constructor, not via // std::forward or even by reference, since move-semantics does not apply for // the construction of multiple elements and pass-by-reference cannot be used // with CUDA kernels template< typename Element, typename Index, typename... Args > static void construct( Element* data, const Index size, const Args&... args ); template< typename Element, typename Index > static void destruct( Element* data, const Index size ); template< typename Element > __cuda_callable__ static void setElement( Element* data, Loading
src/TNL/Algorithms/MemoryOperationsCuda.hpp +51 −1 Original line number Diff line number Diff line Loading @@ -23,6 +23,56 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > void MemoryOperations< Devices::Cuda >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data] __cuda_callable__ ( Index i ) { // placement-new ::new( (void*) (data + i) ) Element(); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index, typename... Args > void MemoryOperations< Devices::Cuda >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); // NOTE: nvcc does not allow __cuda_callable__ lambdas with a variadic capture auto kernel = [data] __cuda_callable__ ( Index i, Args... args ) { // placement-new // (note that args are passed by value to the constructor, not via // std::forward or even by reference, since move-semantics does not apply for // the construction of multiple elements and pass-by-reference cannot be used // with CUDA kernels) ::new( (void*) (data + i) ) Element( args... ); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel, args... ); } template< typename Element, typename Index > void MemoryOperations< Devices::Cuda >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); auto kernel = [data] __cuda_callable__ ( Index i ) { (data + i)->~Element(); }; ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); } template< typename Element > __cuda_callable__ void MemoryOperations< Devices::Cuda >:: Loading
src/TNL/Algorithms/MemoryOperationsHost.hpp +48 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,54 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > void MemoryOperations< Devices::Host >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data]( Index i ) { // placement-new ::new( (void*) (data + i) ) Element(); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index, typename... Args > void MemoryOperations< Devices::Host >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); auto kernel = [data, &args...]( Index i ) { // placement-new // (note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction // of multiple elements) ::new( (void*) (data + i) ) Element( args... ); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element, typename Index > void MemoryOperations< Devices::Host >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." ); auto kernel = [data]( Index i ) { (data + i)->~Element(); }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } template< typename Element > __cuda_callable__ // only to avoid nvcc warning void Loading
src/TNL/Algorithms/MemoryOperationsSequential.hpp +56 −2 Original line number Diff line number Diff line Loading @@ -15,6 +15,48 @@ namespace TNL { namespace Algorithms { template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: construct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); for( Index i = 0; i < size; i++ ) // placement-new ::new( (void*) (data + i) ) Element(); } template< typename Element, typename Index, typename... Args > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: construct( Element* data, const Index size, const Args&... args ) { TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." ); for( Index i = 0; i < size; i++ ) // placement-new // (note that args are passed by reference to the constructor, not via // std::forward since move-semantics does not apply for the construction // of multiple elements) ::new( (void*) (data + i) ) Element( args... ); } template< typename Element, typename Index > __cuda_callable__ void MemoryOperations< Devices::Sequential >:: destruct( Element* data, const Index size ) { TNL_ASSERT_TRUE( data, "Attempted to destroy elements through a nullptr." ); for( Index i = 0; i < size; i++ ) (data + i)->~Element(); } template< typename Element > __cuda_callable__ void Loading @@ -22,6 +64,7 @@ MemoryOperations< Devices::Sequential >:: setElement( Element* data, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); *data = value; } Loading @@ -31,6 +74,7 @@ Element MemoryOperations< Devices::Sequential >:: getElement( const Element* data ) { TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); return *data; } Loading @@ -42,6 +86,8 @@ set( Element* data, const Element& value, const Index size ) { if( size == 0 ) return; TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); for( Index i = 0; i < size; i++ ) data[ i ] = value; } Loading @@ -56,6 +102,10 @@ copy( DestinationElement* destination, const SourceElement* source, const Index size ) { if( size == 0 ) return; TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); for( Index i = 0; i < size; i++ ) destination[ i ] = source[ i ]; } Loading Loading @@ -87,6 +137,10 @@ compare( const Element1* destination, const Element2* source, const Index size ) { if( size == 0 ) return true; TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); for( Index i = 0; i < size; i++ ) if( ! ( destination[ i ] == source[ i ] ) ) return false; Loading
src/TNL/Containers/Array.h +59 −8 Original line number Diff line number Diff line Loading @@ -41,9 +41,9 @@ template< int, typename > class StaticArray; * * Memory management handled by constructors and destructors according to the * [RAII](https://en.wikipedia.org/wiki/RAII) principle and by methods * \ref setSize, \ref setLike, \ref swap, and \ref reset. You can also use * methods \ref getSize and \ref empty to check the current array size and * \ref getData to access the raw pointer. * \ref resize \ref setSize, \ref setLike, \ref swap, and \ref reset. You can * also use methods \ref getSize and \ref empty to check the current array size * and \ref getData to access the raw pointer. * * Methods annotated as \ref \_\_cuda_callable\_\_ can be called either from * host or from kernels executing on a device according to the \e Device Loading Loading @@ -263,16 +263,60 @@ class Array virtual String getSerializationTypeVirtual() const; /** * \brief Method for setting the array size. * \brief Method for resizing the array. * * The method resizes the array to the given size: * * - If the current size is greater than count, the array is reduced to * its first \e size elements. * - If the current size is less than \e size, additional elements are * appended (see the note below on initialization). * - If the current size is equal to \e size, nothing happens. * * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * Note that this method differs from \ref std::vector::resize with respect * to the initialization of array elements: * * - if \e ValueType is a [fundamental type](https://en.cppreference.com/w/cpp/types/is_fundamental), * the elements are [default-initialized](https://en.cppreference.com/w/cpp/language/default_initialization) * (i.e., the elements are initialized to indeterminate values). * - otherwise, the elements are [value-initialized](https://en.cppreference.com/w/cpp/language/value_initialization) * (like in \ref std::vector::resize). * * \param size The new size of the array. */ void resize( Index size ); /** * \brief Method for resizing the array with an initial value. * * The method resizes the array to the given size: * * If the array shares data with other arrays, the data is unbound. If the * current data is not shared and the current size is the same as the new * one, nothing happens. * - If the current size is greater than count, the array is reduced to * its first \e size elements. * - If the current size is less than \e size, additional copies of * \e value are appended. * - If the current size is equal to \e size, nothing happens. * * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * \param size The new size of the array. * \param value The value to initialize new elements with. */ void resize( Index size, const ValueType& value ); /** * \brief Method for setting the array size. * * This method behaves almost like \ref resize, but when the array size * is changed, old elements are not copied to the new memory location. * Hence, this is a shortcut for deallocating the array with \e resize(0) * followed by setting the new size with \e resize(size) * * \param size The new size of the array. */ void setSize( Index size ); Loading @@ -289,6 +333,8 @@ class Array * If the array size changes, the current data will be deallocated, thus * all pointers and views to the array alements will become invalid. * * Note that this method uses \ref setSize rather than \ref resize. * * \tparam ArrayT The type of the parameter can be any type which provides * the method \ref getSize() with the same signature as \e Array. * \param array The array whose size is to be taken. Loading Loading @@ -867,9 +913,14 @@ class Array protected: /** \brief Method for releasing (deallocating) array data. */ /** \brief Internal method for releasing (deallocating) array data. */ void releaseData(); /** \brief Internal method for reallocating array elements. Used only * from the two overloads of \ref resize. */ void reallocate( Index size ); /** \brief Pointer to the data. */ Value* data = nullptr; Loading