Commit 57db358c authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Split ArrayOperations into MemoryOperations and MultiDeviceMemoryOperations

This will be necessary to avoid code bloat with more than 2 devices
(execution types).
parent 986e25fc
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -11,7 +11,8 @@
#pragma once

#include <TNL/TypeTraits.h>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/MemoryOperations.h>
#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>

namespace TNL {
namespace Containers {
@@ -39,7 +40,7 @@ struct ArrayAssignment< Array, T, true >
   {
      TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." );
      if( t.getSize() > 0 ) // we allow even assignment of empty arrays
         ArrayOperations< typename Array::DeviceType, typename T::DeviceType >::template
         MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template
            copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType >
            ( a.getArrayData(), t.getArrayData(), t.getSize() );
   }
@@ -60,7 +61,7 @@ struct ArrayAssignment< Array, T, false >
   static void assign( Array& a, const T& t )
   {
      TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." );
      ArrayOperations< typename Array::DeviceType >::template
      MemoryOperations< typename Array::DeviceType >::template
         set< typename Array::ValueType, typename Array::IndexType >
         ( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() );
   }
+4 −4
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@
#include <TNL/Cuda/DeviceInfo.h>
#include <TNL/Cuda/SharedMemory.h>
#include <TNL/Containers/Algorithms/CudaReductionBuffer.h>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
#include <TNL/Exceptions/CudaSupportMissing.h>

namespace TNL {
@@ -352,7 +352,7 @@ struct CudaReductionKernelLauncher

      // Copy result on CPU
      Result result;
      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
      return result;
   }

@@ -385,8 +385,8 @@ struct CudaReductionKernelLauncher
      ////
      // Copy result on CPU
      std::pair< Index, Result > result;
      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
      ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
      return result;
   }

+11 −47
Original line number Diff line number Diff line
/***************************************************************************
                          ArrayOperations.h  -  description
                          MemoryOperations.h  -  description
                             -------------------
    begin                : Jul 15, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
@@ -12,18 +12,18 @@

#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Cuda/CudaCallable.h>

namespace TNL {
namespace Containers {
namespace Algorithms {

template< typename DestinationDevice,
          typename SourceDevice = DestinationDevice >
struct ArrayOperations;
template< typename DestinationExecution >
struct MemoryOperations;

// TODO: establish the concept of a "void device" for static computations in the whole TNL
// TODO: change "void" to "Execution::Sequential"
template<>
struct ArrayOperations< void >
struct MemoryOperations< void >
{
   template< typename Element >
   __cuda_callable__
@@ -80,7 +80,7 @@ struct ArrayOperations< void >
};

template<>
struct ArrayOperations< Devices::Host >
struct MemoryOperations< Devices::Host >
{
   template< typename Element >
   static void setElement( Element* data,
@@ -130,7 +130,7 @@ struct ArrayOperations< Devices::Host >
};

template<>
struct ArrayOperations< Devices::Cuda >
struct MemoryOperations< Devices::Cuda >
{
   template< typename Element >
   static void setElement( Element* data,
@@ -179,46 +179,10 @@ struct ArrayOperations< Devices::Cuda >
                                  const Element& value );
};

template<>
struct ArrayOperations< Devices::Cuda, Devices::Host >
{
   template< typename DestinationElement,
             typename SourceElement,
             typename Index >
   static void copy( DestinationElement* destination,
                     const SourceElement* source,
                     const Index size );

   template< typename DestinationElement,
             typename SourceElement,
             typename Index >
   static bool compare( const DestinationElement* destination,
                        const SourceElement* source,
                        const Index size );
};

template<>
struct ArrayOperations< Devices::Host, Devices::Cuda >
{
   template< typename DestinationElement,
             typename SourceElement,
             typename Index >
   static void copy( DestinationElement* destination,
                     const SourceElement* source,
                     const Index size );

   template< typename Element1,
             typename Element2,
             typename Index >
   static bool compare( const Element1* destination,
                        const Element2* source,
                        const Index size );
};

} // namespace Algorithms
} // namespace Containers
} // namespace TNL

#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
#include <TNL/Containers/Algorithms/MemoryOperationsSequential.hpp>
#include <TNL/Containers/Algorithms/MemoryOperationsHost.hpp>
#include <TNL/Containers/Algorithms/MemoryOperationsCuda.hpp>
+160 −0
Original line number Diff line number Diff line
/***************************************************************************
                          MemoryOperationsCuda.hpp  -  description
                             -------------------
    begin                : Jul 16, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <iostream>
#include <memory>  // std::unique_ptr
#include <stdexcept>

#include <TNL/Containers/Algorithms/MemoryOperations.h>
#include <TNL/Containers/Algorithms/MultiDeviceMemoryOperations.h>
#include <TNL/ParallelFor.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Exceptions/CudaSupportMissing.h>

namespace TNL {
namespace Containers {
namespace Algorithms {

template< typename Element >
void
MemoryOperations< Devices::Cuda >::
setElement( Element* data,
            const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
   MemoryOperations< Devices::Cuda >::set( data, value, 1 );
}

template< typename Element >
Element
MemoryOperations< Devices::Cuda >::
getElement( const Element* data )
{
   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
   Element result;
   MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
   return result;
}

template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
set( Element* data,
     const Element& value,
     const Index size )
{
   if( size == 0 ) return;
   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
   auto kernel = [data, value] __cuda_callable__ ( Index i )
   {
      data[ i ] = value;
   };
   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}

template< typename DestinationElement,
          typename SourceElement,
          typename Index >
void
MemoryOperations< Devices::Cuda >::
copy( DestinationElement* destination,
      const SourceElement* source,
      const Index size )
{
   if( size == 0 ) return;
   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );

   // our ParallelFor kernel is faster than cudaMemcpy
   auto kernel = [destination, source] __cuda_callable__ ( Index i )
   {
      destination[ i ] = source[ i ];
   };
   ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}

template< typename DestinationElement,
          typename Index,
          typename SourceIterator >
void
MemoryOperations< Devices::Cuda >::
copyFromIterator( DestinationElement* destination,
                  Index destinationSize,
                  SourceIterator first,
                  SourceIterator last )
{
   using BaseType = typename std::remove_cv< DestinationElement >::type;
   std::unique_ptr< BaseType[] > buffer{ new BaseType[ Cuda::getTransferBufferSize() ] };
   Index copiedElements = 0;
   while( copiedElements < destinationSize && first != last ) {
      Index i = 0;
      while( i < Cuda::getTransferBufferSize() && first != last )
         buffer[ i++ ] = *first++;
      MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
      copiedElements += i;
   }
   if( first != last )
      throw std::length_error( "Source iterator is larger than the destination array." );
}

template< typename Element1,
          typename Element2,
          typename Index >
bool
MemoryOperations< Devices::Cuda >::
compare( const Element1* destination,
         const Element2* source,
         const Index size )
{
   if( size == 0 ) return true;
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
}

template< typename Element,
          typename Index >
bool
MemoryOperations< Devices::Cuda >::
containsValue( const Element* data,
               const Index size,
               const Element& value )
{
   if( size == 0 ) return false;
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, (Index) 0, "" );

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
   return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
}

template< typename Element,
          typename Index >
bool
MemoryOperations< Devices::Cuda >::
containsOnlyValue( const Element* data,
                   const Index size,
                   const Element& value )
{
   if( size == 0 ) return false;
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );

   auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
   return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
}

} // namespace Algorithms
} // namespace Containers
} // namespace TNL
+13 −14
Original line number Diff line number Diff line
/***************************************************************************
                          ArrayOperationsHost.hpp  -  description
                          MemoryOperationsHost.hpp  -  description
                             -------------------
    begin                : Jul 16, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
@@ -14,8 +14,8 @@
#include <stdexcept>
#include <algorithm>  // std::copy, std::equal

#include <TNL/Containers/Algorithms/MemoryOperations.h>
#include <TNL/ParallelFor.h>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/Reduction.h>

namespace TNL {
@@ -24,7 +24,7 @@ namespace Algorithms {

template< typename Element >
void
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
setElement( Element* data,
            const Element& value )
{
@@ -34,7 +34,7 @@ setElement( Element* data,

template< typename Element >
Element
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
getElement( const Element* data )
{
   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
@@ -43,7 +43,7 @@ getElement( const Element* data )

template< typename Element, typename Index >
void
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
set( Element* data,
     const Element& value,
     const Index size )
@@ -61,7 +61,7 @@ template< typename DestinationElement,
          typename SourceElement,
          typename Index >
void
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
copy( DestinationElement* destination,
      const SourceElement* source,
      const Index size )
@@ -88,21 +88,20 @@ template< typename DestinationElement,
          typename Index,
          typename SourceIterator >
void
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
copyFromIterator( DestinationElement* destination,
                  Index destinationSize,
                  SourceIterator first,
                  SourceIterator last )
{
   ArrayOperations< void >::copyFromIterator( destination, destinationSize, first, last );
   MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
}


template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
compare( const DestinationElement* destination,
         const SourceElement* source,
         const Index size )
@@ -124,7 +123,7 @@ compare( const DestinationElement* destination,
template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
containsValue( const Element* data,
               const Index size,
               const Element& value )
@@ -139,14 +138,14 @@ containsValue( const Element* data,
   }
   else {
      // sequential algorithm can return as soon as it finds a match
      return ArrayOperations< void >::containsValue( data, size, value );
      return MemoryOperations< void >::containsValue( data, size, value );
   }
}

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Host >::
MemoryOperations< Devices::Host >::
containsOnlyValue( const Element* data,
                   const Index size,
                   const Element& value )
@@ -161,7 +160,7 @@ containsOnlyValue( const Element* data,
   }
   else {
      // sequential algorithm can return as soon as it finds a mismatch
      return ArrayOperations< void >::containsOnlyValue( data, size, value );
      return MemoryOperations< void >::containsOnlyValue( data, size, value );
   }
}

Loading