Commit b357ff4e authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixes in ArrayOperations

parent 8aa006fa
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ allocateMemory( Element*& data,
#ifdef HAVE_CUDA
   TNL_CHECK_CUDA_DEVICE;
   if( cudaMalloc( ( void** ) &data,
                   ( size_t ) size * sizeof( Element ) ) != cudaSuccess )
                   ( std::size_t ) size * sizeof( Element ) ) != cudaSuccess )
   {
      data = 0;
      throw Exceptions::CudaBadAlloc();
@@ -182,12 +182,12 @@ copySTLList( DestinationElement* destination,
   const std::size_t copy_buffer_size = std::min( Devices::Cuda::TransferBufferSize / (std::size_t) sizeof( DestinationElement ), ( std::size_t ) size );
   using BaseType = typename std::remove_cv< DestinationElement >::type;
   std::unique_ptr< BaseType[] > copy_buffer{ new BaseType[ copy_buffer_size ] };
   size_t copiedElements = 0;
   std::size_t copiedElements = 0;
   auto it = source.begin();
   while( copiedElements < size )
   {
      const auto copySize = std::min( size - copiedElements, copy_buffer_size );
      for( size_t i = 0; i < copySize; i++ )
      for( std::size_t i = 0; i < copySize; i++ )
         copy_buffer[ i ] = static_cast< DestinationElement >( * it ++ );
      ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory( &destination[ copiedElements ], &copy_buffer[ 0 ], copySize );
      copiedElements += copySize;
@@ -220,7 +220,7 @@ containsValue( const Element* data,
               const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
   TNL_ASSERT_GE( size, (Index) 0, "" );

   if( size == 0 ) return false;
   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return  ( data[ i ] == value ); };
@@ -315,7 +315,7 @@ compareMemory( const Element1* destination,
    */
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
#ifdef HAVE_CUDA
   std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] };
   Index compared( 0 );
@@ -352,7 +352,7 @@ copyMemory( DestinationElement* destination,
{
   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
#ifdef HAVE_CUDA
   if( std::is_same< DestinationElement, SourceElement >::value )
   {
@@ -400,7 +400,7 @@ compareMemory( const Element1* hostData,
{
   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
   TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." );
   return ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory( deviceData, hostData, size );
}

+18 −10
Original line number Diff line number Diff line
@@ -21,6 +21,8 @@ namespace TNL {
namespace Containers {
namespace Algorithms {

static constexpr int OpenMPArrayOperationsThreshold = 512; // TODO: check this threshold

template< typename Element, typename Index >
void
ArrayOperations< Devices::Host >::
@@ -67,6 +69,9 @@ setMemory( Element* data,
           const Element& value,
           const Index size )
{
   #ifdef HAVE_OPENMP
   #pragma omp parallel for if( Devices::Host::isOMPEnabled() && size > OpenMPArrayOperationsThreshold )
   #endif
   for( Index i = 0; i < size; i++ )
      data[ i ] = value;
}
@@ -86,7 +91,7 @@ copyMemory( DestinationElement* destination,
   {
      // GCC 8.1 complains that we bypass a non-trivial copy-constructor
      // (in C++17 we could use constexpr if to avoid compiling this branch in that case)
      #if defined(__GNUC__) && ( __GNUC__ > 8 || ( __GNUC__ == 8 && __GNUC_MINOR__ > 0 ) ) && !defined(__clang__) && !defined(__NVCC__)
      #if defined(__GNUC__) && ( __GNUC__ > 8 || ( __GNUC__ == 8 && __GNUC_MINOR__ > 0 ) ) && !defined(__clang__)
         #pragma GCC diagnostic push
         #pragma GCC diagnostic ignored "-Wclass-memaccess"
      #endif
@@ -96,8 +101,11 @@ copyMemory( DestinationElement* destination,
      #endif
   }
   else
      #ifdef HAVE_OPENMP
      #pragma omp parallel for if( Devices::Host::isOMPEnabled() && size > OpenMPArrayOperationsThreshold )
      #endif
      for( Index i = 0; i < size; i++ )
         destination[ i ] = ( DestinationElement ) source[ i ];
         destination[ i ] = source[ i ];
}

template< typename DestinationElement,
@@ -107,9 +115,9 @@ ArrayOperations< Devices::Host >::
copySTLList( DestinationElement* destination,
             const std::list< SourceElement >& source )
{
   size_t i = 0;
   std::size_t i = 0;
   for( const SourceElement& e : source )
      destination[ i ++ ] = static_cast< DestinationElement >( e );
      destination[ i++ ] = e;
}


+2 −2
Original line number Diff line number Diff line
@@ -17,8 +17,8 @@ namespace TNL {
namespace Containers {
namespace Algorithms {

static const int OpenMPVectorOperationsThreshold = 512; // TODO: check this threshold
static const int PrefetchDistance = 128;
static constexpr int OpenMPVectorOperationsThreshold = 512; // TODO: check this threshold
static constexpr int PrefetchDistance = 128;

template< typename Vector >
void