Commit 7756e2d0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Added Devices::Sequential and corresponding specializations in TNL::Algorithms

parent dbfa5d11
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher

      // Copy result on CPU
      Result result;
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 );
      return result;
   }

@@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher
      ////
      // Copy result on CPU
      std::pair< Index, Result > result;
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
      MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 );
      MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 );
      return result;
   }

+3 −3
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@

#pragma once

#include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Cuda/CudaCallable.h>
@@ -17,12 +18,11 @@
namespace TNL {
namespace Algorithms {

template< typename DestinationExecution >
template< typename DestinationDevice >
struct MemoryOperations;

// TODO: change "void" to "Execution::Sequential"
template<>
struct MemoryOperations< void >
struct MemoryOperations< Devices::Sequential >
{
   template< typename Element >
   __cuda_callable__
+3 −3
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ copyFromIterator( DestinationElement* destination,
                  SourceIterator first,
                  SourceIterator last )
{
   MemoryOperations< void >::copyFromIterator( destination, destinationSize, first, last );
   MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last );
}

template< typename DestinationElement,
@@ -137,7 +137,7 @@ containsValue( const Element* data,
   }
   else {
      // sequential algorithm can return as soon as it finds a match
      return MemoryOperations< void >::containsValue( data, size, value );
      return MemoryOperations< Devices::Sequential >::containsValue( data, size, value );
   }
}

@@ -159,7 +159,7 @@ containsOnlyValue( const Element* data,
   }
   else {
      // sequential algorithm can return as soon as it finds a mismatch
      return MemoryOperations< void >::containsOnlyValue( data, size, value );
      return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value );
   }
}

+8 −8
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ namespace Algorithms {
template< typename Element >
__cuda_callable__
void
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
setElement( Element* data,
            const Element& value )
{
@@ -28,7 +28,7 @@ setElement( Element* data,
template< typename Element >
__cuda_callable__
Element
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
getElement( const Element* data )
{
   return *data;
@@ -37,7 +37,7 @@ getElement( const Element* data )
template< typename Element, typename Index >
__cuda_callable__
void
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
set( Element* data,
     const Element& value,
     const Index size )
@@ -51,7 +51,7 @@ template< typename DestinationElement,
          typename Index >
__cuda_callable__
void
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
copy( DestinationElement* destination,
      const SourceElement* source,
      const Index size )
@@ -64,7 +64,7 @@ template< typename DestinationElement,
          typename Index,
          typename SourceIterator >
void
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
copyFromIterator( DestinationElement* destination,
                  Index destinationSize,
                  SourceIterator first,
@@ -82,7 +82,7 @@ template< typename Element1,
          typename Index >
__cuda_callable__
bool
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
compare( const Element1* destination,
         const Element2* source,
         const Index size )
@@ -97,7 +97,7 @@ template< typename Element,
          typename Index >
__cuda_callable__
bool
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
containsValue( const Element* data,
               const Index size,
               const Element& value )
@@ -116,7 +116,7 @@ template< typename Element,
          typename Index >
__cuda_callable__
bool
MemoryOperations< void >::
MemoryOperations< Devices::Sequential >::
containsOnlyValue( const Element* data,
                   const Index size,
                   const Element& value )
+30 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@

#include <functional>  // reduction functions like std::plus, std::logical_and, std::logical_or etc.

#include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>

@@ -23,6 +24,35 @@ namespace Algorithms {
template< typename Device >
struct Multireduction;

template<>
struct Multireduction< Devices::Sequential >
{
   /**
    * Parameters:
    *    zero: starting value for reduction
    *    dataFetcher: callable object such that `dataFetcher( i, j )` yields
    *                 the i-th value to be reduced from the j-th dataset
    *                 (i = 0,...,size-1; j = 0,...,n-1)
    *    reduction: callable object representing the reduction operation
    *               for example, it can be an instance of std::plus, std::logical_and,
    *               std::logical_or etc.
    *    size: the size of each dataset
    *    n: number of datasets to be reduced
    *    result: output array of size = n
    */
   template< typename Result,
             typename DataFetcher,
             typename Reduction,
             typename Index >
   static constexpr void
   reduce( const Result zero,
           DataFetcher dataFetcher,
           const Reduction reduction,
           const Index size,
           const int n,
           Result* result );
};

template<>
struct Multireduction< Devices::Host >
{
Loading