Commit 84fce19a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'develop' into mpi-explosive

parents 239905cd 1ceb523b
Loading
Loading
Loading
Loading
+158 −138
Original line number Diff line number Diff line
@@ -66,6 +66,12 @@ class ArrayOperations< Devices::Host >
      static bool compareMemory( const Element1* destination,
                                 const Element2* source,
                                 const Index size );

      template< typename Element,
                typename Index >
      static bool checkValue( const Element* data,
                              const Index size,
                              const Element& value );
};

template<>
@@ -77,9 +83,6 @@ class ArrayOperations< Devices::Cuda >
      static void allocateMemory( Element*& data,
                                  const Index size );

   template< typename Element >
   static void freeMemory( Element* data );

   template< typename Element >
   __cuda_callable__
   static void setMemoryElement( Element* data,
@@ -87,6 +90,9 @@ class ArrayOperations< Devices::Cuda >

   template< typename Element >
   __cuda_callable__
   static Element getMemoryElement( const Element* data );

      template< typename Element >
      static Element getMemoryElement( const Element* data );

      // TODO: does not make sense for CUDA - remove?
@@ -114,6 +120,13 @@ class ArrayOperations< Devices::Cuda >
      static bool compareMemory( const Element1* destination,
                                 const Element2* source,
                                 const Index size );

      template< typename Element,
                typename Index >
      static bool checkValue( const Element* data,
                              const Index size,
                              const Element& value );

};

template<>
@@ -200,6 +213,13 @@ class ArrayOperations< Devices::MIC >
      static bool compareMemory( const Element1* destination,
                                 const Element2* source,
                                 const Index size );

      template< typename Element,
                typename Index >
      static bool checkValue( const Element* data,
                              const Index size,
                              const Element& value );
      
};

template<>
+27 −0
Original line number Diff line number Diff line
@@ -208,13 +208,40 @@ compareMemory( const Element1* destination,
{
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
#ifdef HAVE_CUDA   
   //TODO: The parallel reduction on the CUDA device with different element types is needed.
   bool result = false;
   Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
   Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result );
   return result;
#else
   throw Exceptions::CudaSupportMissing();
#endif   
}

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Cuda >::
checkValue( const Element* data,
            const Index size,
            const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
#ifdef HAVE_CUDA
   if( size == 0 ) return false;
   bool result = false;
   Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence;
   reductionCheckPresence.setValue( value );
   Reduction< Devices::Cuda >::reduce( reductionCheckPresence, size, data, 0, result );
   return result;   
#else
   throw Exceptions::CudaSupportMissing();
#endif
}


/****
 * Operations CUDA -> Host
 */
+30 −12
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@

#include <TNL/tnlConfig.h>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Containers/Algorithms/ReductionOperations.h>

namespace TNL {
namespace Containers {   
@@ -128,20 +130,36 @@ compareMemory( const DestinationElement* destination,
               const SourceElement* source,
               const Index size )
{
   if( std::is_same< DestinationElement, SourceElement >::value &&
       ( std::is_fundamental< DestinationElement >::value ||
         std::is_pointer< DestinationElement >::value ) )
   {
      if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 )
         return false;
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );

   //TODO: The parallel reduction on the CUDA device with different element types is needed.
   bool result = false;
   Algorithms::ParallelReductionEqualities< DestinationElement, SourceElement > reductionEqualities;
   Reduction< Devices::Host >::reduce( reductionEqualities, size, destination, source, result );
   return result;
}
   else
      for( Index i = 0; i < size; i ++ )
         if( ! ( destination[ i ] == source[ i ] ) )
            return false;
   return true;

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Host >::
checkValue( const Element* data,
            const Index size,
            const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
   
   if( size == 0 ) return false;
   bool result = false;
   Algorithms::ParallelReductionCheckPresence< Element > reductionCheckPresence;
   reductionCheckPresence.setValue( value );
   Reduction< Devices::Host >::reduce( reductionCheckPresence, size, data, 0, result );
   return result;   
}


#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

extern template bool ArrayOperations< Devices::Host >::allocateMemory< char,        int >( char*& data, const int size );
+19 −0
Original line number Diff line number Diff line
@@ -215,6 +215,25 @@ compareMemory( const Element1* destination,
#endif
}

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::MIC >::
checkValue( const Element* data,
            const Index size,
            const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
#ifdef HAVE_MIC
   TNL_ASSERT( false, );
   return false;
#else
   throw Exceptions::MICSupportMissing();
#endif
}


/****
 * Operations MIC -> Host
 */
+30 −0
Original line number Diff line number Diff line
@@ -579,6 +579,36 @@ protected:
   PType p;
};

template< typename Data, typename Result = bool >
class ParallelReductionCheckPresence : public ParallelReductionLogicalOr< Result >
{
   public:
      using DataType1 = Data;
      using DataType2 = Data;
      using ResultType = Result;
      using LaterReductionOperation = ParallelReductionLogicalOr< Result >;

      template< typename Index >
      __cuda_callable__ void
      firstReduction( ResultType& result,
                      const Index& index,
                      const DataType1* data1,
                      const DataType2* data2 )
      {
         result = result || ( data1[ index ] == value );
      }
      
      void setValue( const Data& v )
      {
         this->value = v;
      }
      
   protected:
      Data value;
};



} // namespace Algorithms
} // namespace Containers
} // namespace TNL
Loading