Loading src/TNL/Containers/Algorithms/ArrayOperations.h +158 −138 Original line number Diff line number Diff line Loading @@ -66,6 +66,12 @@ class ArrayOperations< Devices::Host > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading @@ -77,9 +83,6 @@ class ArrayOperations< Devices::Cuda > static void allocateMemory( Element*& data, const Index size ); template< typename Element > static void freeMemory( Element* data ); template< typename Element > __cuda_callable__ static void setMemoryElement( Element* data, Loading @@ -87,6 +90,9 @@ class ArrayOperations< Devices::Cuda > template< typename Element > __cuda_callable__ static Element getMemoryElement( const Element* data ); template< typename Element > static Element getMemoryElement( const Element* data ); // TODO: does not make sense for CUDA - remove? Loading Loading @@ -114,6 +120,13 @@ class ArrayOperations< Devices::Cuda > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading Loading @@ -200,6 +213,13 @@ class ArrayOperations< Devices::MIC > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +27 −0 Original line number Diff line number Diff line Loading @@ -208,13 +208,40 @@ compareMemory( const Element1* destination, { TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); #ifdef HAVE_CUDA //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result = false; Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result ); return result; #else throw Exceptions::CudaSupportMissing(); #endif } template< typename Element, typename Index > bool ArrayOperations< Devices::Cuda >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); #ifdef HAVE_CUDA if( size == 0 ) return false; bool result = false; Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence; reductionCheckPresence.setValue( value ); Reduction< Devices::Cuda >::reduce( reductionCheckPresence, size, data, 0, result ); return result; #else throw Exceptions::CudaSupportMissing(); #endif } /**** * Operations CUDA -> Host */ Loading src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h +30 −12 Original line number Diff line number Diff line Loading @@ -15,6 +15,8 @@ #include <TNL/tnlConfig.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -128,20 +130,36 @@ compareMemory( const DestinationElement* destination, const SourceElement* source, const Index size ) { if( std::is_same< DestinationElement, SourceElement >::value && ( std::is_fundamental< DestinationElement >::value || std::is_pointer< DestinationElement >::value ) ) { if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 ) return false; TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result = false; Algorithms::ParallelReductionEqualities< DestinationElement, SourceElement > reductionEqualities; Reduction< Devices::Host >::reduce( reductionEqualities, size, destination, source, result ); return result; } else for( Index i = 0; i < size; i ++ ) if( ! ( destination[ i ] == source[ i ] ) ) return false; return true; template< typename Element, typename Index > bool ArrayOperations< Devices::Host >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; bool result = false; Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence; reductionCheckPresence.setValue( value ); Reduction< Devices::Host >::reduce( reductionCheckPresence, size, data, 0, result ); return result; } #ifdef TEMPLATE_EXPLICIT_INSTANTIATION extern template bool ArrayOperations< Devices::Host >::allocateMemory< char, int >( char*& data, const int size ); Loading src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +19 −0 Original line number Diff line number Diff line Loading @@ -215,6 +215,25 @@ compareMemory( const Element1* destination, #endif } template< typename Element, typename Index > bool ArrayOperations< Devices::MIC >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); #ifdef HAVE_MIC TNL_ASSERT( false, ); return false; #else throw Exceptions::MICSupportMissing(); #endif } /**** * Operations MIC -> Host */ Loading src/TNL/Containers/Algorithms/ReductionOperations.h +30 −0 Original line number Diff line number Diff line Loading @@ -579,6 +579,36 @@ protected: PType p; }; template< typename Data, typename Result = bool > class ParallelReductionCheckPresence : public ParallelReductionLogicalOr< Result > { public: using DataType1 = Data; using DataType2 = Data; using ResultType = Result; using LaterReductionOperation = ParallelReductionLogicalOr< Result >; template< typename Index > __cuda_callable__ void firstReduction( ResultType& result, const Index& index, const DataType1* data1, const DataType2* data2 ) { result = result || ( data1[ index ] == value ); } void setValue( const Data& v ) { this->value = v; } protected: Data value; }; } // namespace Algorithms } // namespace Containers } // namespace TNL Loading
src/TNL/Containers/Algorithms/ArrayOperations.h +158 −138 Original line number Diff line number Diff line Loading @@ -66,6 +66,12 @@ class ArrayOperations< Devices::Host > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading @@ -77,9 +83,6 @@ class ArrayOperations< Devices::Cuda > static void allocateMemory( Element*& data, const Index size ); template< typename Element > static void freeMemory( Element* data ); template< typename Element > __cuda_callable__ static void setMemoryElement( Element* data, Loading @@ -87,6 +90,9 @@ class ArrayOperations< Devices::Cuda > template< typename Element > __cuda_callable__ static Element getMemoryElement( const Element* data ); template< typename Element > static Element getMemoryElement( const Element* data ); // TODO: does not make sense for CUDA - remove? Loading Loading @@ -114,6 +120,13 @@ class ArrayOperations< Devices::Cuda > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading Loading @@ -200,6 +213,13 @@ class ArrayOperations< Devices::MIC > static bool compareMemory( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > static bool checkValue( const Element* data, const Index size, const Element& value ); }; template<> Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +27 −0 Original line number Diff line number Diff line Loading @@ -208,13 +208,40 @@ compareMemory( const Element1* destination, { TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); #ifdef HAVE_CUDA //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result = false; Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result ); return result; #else throw Exceptions::CudaSupportMissing(); #endif } template< typename Element, typename Index > bool ArrayOperations< Devices::Cuda >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); #ifdef HAVE_CUDA if( size == 0 ) return false; bool result = false; Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence; reductionCheckPresence.setValue( value ); Reduction< Devices::Cuda >::reduce( reductionCheckPresence, size, data, 0, result ); return result; #else throw Exceptions::CudaSupportMissing(); #endif } /**** * Operations CUDA -> Host */ Loading
src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h +30 −12 Original line number Diff line number Diff line Loading @@ -15,6 +15,8 @@ #include <TNL/tnlConfig.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -128,20 +130,36 @@ compareMemory( const DestinationElement* destination, const SourceElement* source, const Index size ) { if( std::is_same< DestinationElement, SourceElement >::value && ( std::is_fundamental< DestinationElement >::value || std::is_pointer< DestinationElement >::value ) ) { if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 ) return false; TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result = false; Algorithms::ParallelReductionEqualities< DestinationElement, SourceElement > reductionEqualities; Reduction< Devices::Host >::reduce( reductionEqualities, size, destination, source, result ); return result; } else for( Index i = 0; i < size; i ++ ) if( ! ( destination[ i ] == source[ i ] ) ) return false; return true; template< typename Element, typename Index > bool ArrayOperations< Devices::Host >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; bool result = false; Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence; reductionCheckPresence.setValue( value ); Reduction< Devices::Host >::reduce( reductionCheckPresence, size, data, 0, result ); return result; } #ifdef TEMPLATE_EXPLICIT_INSTANTIATION extern template bool ArrayOperations< Devices::Host >::allocateMemory< char, int >( char*& data, const int size ); Loading
src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +19 −0 Original line number Diff line number Diff line Loading @@ -215,6 +215,25 @@ compareMemory( const Element1* destination, #endif } template< typename Element, typename Index > bool ArrayOperations< Devices::MIC >:: checkValue( const Element* data, const Index size, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); #ifdef HAVE_MIC TNL_ASSERT( false, ); return false; #else throw Exceptions::MICSupportMissing(); #endif } /**** * Operations MIC -> Host */ Loading
src/TNL/Containers/Algorithms/ReductionOperations.h +30 −0 Original line number Diff line number Diff line Loading @@ -579,6 +579,36 @@ protected: PType p; }; template< typename Data, typename Result = bool > class ParallelReductionCheckPresence : public ParallelReductionLogicalOr< Result > { public: using DataType1 = Data; using DataType2 = Data; using ResultType = Result; using LaterReductionOperation = ParallelReductionLogicalOr< Result >; template< typename Index > __cuda_callable__ void firstReduction( ResultType& result, const Index& index, const DataType1* data1, const DataType2* data2 ) { result = result || ( data1[ index ] == value ); } void setValue( const Data& v ) { this->value = v; } protected: Data value; }; } // namespace Algorithms } // namespace Containers } // namespace TNL