Commit 19e9b4e5 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved scan tests from Containers to Algorithms

parent e347b486
Loading
Loading
Loading
Loading
+30 −2
Original line number Diff line number Diff line
@@ -10,8 +10,13 @@ set( COMMON_TESTS
         unrolledForTest
)

set( CPP_TESTS SegmentedScanTest )
set( CUDA_TESTS )
set( CPP_TESTS
         ScanTest
         SegmentedScanTest
)
set( CUDA_TESTS
         ScanTestCuda
)
if( BUILD_CUDA )
   set( CUDA_TESTS  ${CUDA_TESTS} ${COMMON_TESTS} )
else()
@@ -32,3 +37,26 @@ if( BUILD_CUDA )
      add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} )
   endforeach()
endif()


if( ${BUILD_MPI} )
   ADD_EXECUTABLE( DistributedScanTest DistributedScanTest.cpp )
   TARGET_COMPILE_OPTIONS( DistributedScanTest PRIVATE ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( DistributedScanTest ${GTEST_BOTH_LIBRARIES} )

   if( BUILD_CUDA )
      CUDA_ADD_EXECUTABLE( DistributedScanTestCuda DistributedScanTestCuda.cu
                           OPTIONS ${CXX_TESTS_FLAGS} )
      TARGET_LINK_LIBRARIES( DistributedScanTestCuda ${GTEST_BOTH_LIBRARIES} )
   endif()

   SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" )
   ADD_TEST( NAME DistributedScanTest COMMAND "mpirun" ${mpi_test_parameters})
   ADD_TEST( NAME DistributedScanTest_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" )

   if( BUILD_CUDA )
      SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" )
      ADD_TEST( NAME DistributedScanTestCuda COMMAND "mpirun" ${mpi_test_parameters})
      ADD_TEST( NAME DistributedScanTestCuda_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" )
   endif()
endif()
+1 −0
Original line number Diff line number Diff line
#include "DistributedScanTest.h"
+100 −108
Original line number Diff line number Diff line
/***************************************************************************
                          DistributedVectorTest.h  -  description
                             -------------------
    begin                : Sep 6, 2018
    copyright            : (C) 2018 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/
#pragma once

#ifdef HAVE_GTEST
#include <limits>

#include <gtest/gtest.h>

#include <TNL/Containers/DistributedVector.h>
#include <TNL/Containers/DistributedVectorView.h>
#include <TNL/Containers/DistributedArray.h>
#include <TNL/Containers/DistributedArrayView.h>
#include <TNL/Containers/Partitioner.h>
#include <TNL/Algorithms/DistributedScan.h>

#define DISTRIBUTED_VECTOR
#include "VectorHelperFunctions.h"
#include "../Containers/VectorHelperFunctions.h"

using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Algorithms;
using namespace TNL::MPI;

/*
 * Light check of DistributedVector.
 * Light check of DistributedArray.
 *
 * - Number of processes is not limited.
 * - Global size is hardcoded as 97 to force non-uniform distribution.
 * - Communication group is hardcoded as AllGroup -- it may be changed as needed.
 */
template< typename DistributedVector >
class DistributedVectorTest
template< typename DistributedArray >
class DistributedScanTest
: public ::testing::Test
{
protected:
   using RealType = typename DistributedVector::RealType;
   using DeviceType = typename DistributedVector::DeviceType;
   using IndexType = typename DistributedVector::IndexType;
   using DistributedVectorType = DistributedVector;
   using VectorViewType = typename DistributedVectorType::LocalViewType;
   using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType >;
   using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >;
   using ValueType = typename DistributedArray::ValueType;
   using DeviceType = typename DistributedArray::DeviceType;
   using IndexType = typename DistributedArray::IndexType;
   using DistributedArrayType = DistributedArray;
   using VectorViewType = typename DistributedArrayType::LocalViewType;
   using DistributedArrayView = Containers::DistributedArrayView< ValueType, DeviceType, IndexType >;
   using HostDistributedArrayType = typename DistributedArrayType::template Self< ValueType, Devices::Sequential >;

   const MPI_Comm group = AllGroup();

   DistributedVectorType v;
   DistributedVectorView v_view;
   HostDistributedVectorType v_host;
   DistributedArrayType v;
   DistributedArrayView v_view;
   HostDistributedArrayType v_host;

   const int rank = GetRank(group);
   const int nproc = GetSize(group);
@@ -58,9 +54,9 @@ protected:
   // some arbitrary value (but must be 0 if not distributed)
   const int ghosts = (nproc > 1) ? 4 : 0;

   DistributedVectorTest()
   DistributedScanTest()
   {
      using LocalRangeType = typename DistributedVector::LocalRangeType;
      using LocalRangeType = typename DistributedArray::LocalRangeType;
      const LocalRangeType localRange = Partitioner< IndexType >::splitRange( globalSize, group );
      v.setDistribution( localRange, ghosts, globalSize, group );

@@ -75,74 +71,70 @@ protected:
   }
};

// types for which DistributedVectorTest is instantiated
using DistributedVectorTypes = ::testing::Types<
   DistributedVector< double, Devices::Sequential, int >,
   DistributedVector< double, Devices::Host, int >
// types for which DistributedScanTest is instantiated
using DistributedArrayTypes = ::testing::Types<
   DistributedArray< double, Devices::Sequential, int >,
   DistributedArray< double, Devices::Host, int >
#ifdef HAVE_CUDA
   ,
   DistributedVector< double, Devices::Cuda, int >
   DistributedArray< double, Devices::Cuda, int >
#endif
>;

TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes );
TYPED_TEST_SUITE( DistributedScanTest, DistributedArrayTypes );

// TODO: test that horizontal operations are computed for ghost values without synchronization

TYPED_TEST( DistributedVectorTest, scan )
TYPED_TEST( DistributedScanTest, inclusiveScan )
{
   using RealType = typename TestFixture::DistributedVectorType::RealType;
   using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
   using IndexType = typename TestFixture::DistributedVectorType::IndexType;
   using ValueType = typename TestFixture::DistributedArrayType::ValueType;
   using DeviceType = typename TestFixture::DistributedArrayType::DeviceType;
   using IndexType = typename TestFixture::DistributedArrayType::IndexType;

   auto& v = this->v;
   auto& v_view = this->v_view;
   auto& v_host = this->v_host;
   const auto localRange = v.getLocalRange();

   // FIXME: tests should work in all cases
   if( std::is_same< RealType, float >::value )
      return;

   setConstantSequence( v, 0 );
   v_host = -1;
   v.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;

   setConstantSequence( v, 1 );
   v_host = -1;
   v.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v_view;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;

   setLinearSequence( v );
   v_host = -1;
   v.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;

   // test views
   setConstantSequence( v, 0 );
   v_host = -1;
   v_view.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;

   setConstantSequence( v, 1 );
   v_host = -1;
   v_view.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v_view;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;

   setLinearSequence( v );
   v_host = -1;
   v_view.scan();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
@@ -152,67 +144,67 @@ TYPED_TEST( DistributedVectorTest, scan )
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef HAVE_CUDA
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3;
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::maxGridSize() = 3;

      setConstantSequence( v, 0 );
      v_host = -1;
      v.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], 0 );

      setConstantSequence( v, 1 );
      v_host = -1;
      v.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], i + 1 );

      setLinearSequence( v );
      v_host = -1;
      v.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;

      // test views
      setConstantSequence( v, 0 );
      v_host = -1;
      v_view.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], 0 );

      setConstantSequence( v, 1 );
      v_host = -1;
      v_view.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], i + 1 );

      setLinearSequence( v );
      v_host = -1;
      v_view.scan();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;

      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize();
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::resetMaxGridSize();
#endif
   }
}

TYPED_TEST( DistributedVectorTest, exclusiveScan )
TYPED_TEST( DistributedScanTest, exclusiveScan )
{
   using RealType = typename TestFixture::DistributedVectorType::RealType;
   using DeviceType = typename TestFixture::DistributedVectorType::DeviceType;
   using IndexType = typename TestFixture::DistributedVectorType::IndexType;
   using ValueType = typename TestFixture::DistributedArrayType::ValueType;
   using DeviceType = typename TestFixture::DistributedArrayType::DeviceType;
   using IndexType = typename TestFixture::DistributedArrayType::IndexType;

   auto& v = this->v;
   auto& v_view = this->v_view;
@@ -220,48 +212,48 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan )
   const auto localRange = v.getLocalRange();

   // FIXME: tests should work in all cases
   if( std::is_same< RealType, float >::value )
   if( std::is_same< ValueType, float >::value )
      return;

   setConstantSequence( v, 0 );
   v_host = -1;
   v.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;

   setConstantSequence( v, 1 );
   v_host = -1;
   v.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v_view;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], i ) << "i = " << i;

   setLinearSequence( v );
   v_host = -1;
   v.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;

   // test views
   setConstantSequence( v, 0 );
   v_host = -1;
   v_view.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;

   setConstantSequence( v, 1 );
   v_host = -1;
   v_view.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v_view;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], i ) << "i = " << i;

   setLinearSequence( v );
   v_host = -1;
   v_view.template scan< Algorithms::ScanType::Exclusive >();
   v_host.setValue( -1 );
   DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
   v_host = v;
   for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
      EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
@@ -271,58 +263,58 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan )
   if( std::is_same< DeviceType, Devices::Cuda >::value )
   {
#ifdef HAVE_CUDA
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3;
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::maxGridSize() = 3;

      setConstantSequence( v, 0 );
      v_host = -1;
      v.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], 0 );

      setConstantSequence( v, 1 );
      v_host = -1;
      v.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], i );

      setLinearSequence( v );
      v_host = -1;
      v.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;

      // test views
      setConstantSequence( v, 0 );
      v_host = -1;
      v_view.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], 0 );

      setConstantSequence( v, 1 );
      v_host = -1;
      v_view.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v_view;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], i );

      setLinearSequence( v );
      v_host = -1;
      v_view.template scan< Algorithms::ScanType::Exclusive >();
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1  );
      v_host.setValue( -1 );
      DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 );
      EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1  );
      v_host = v;
      for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ )
         EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;

      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize();
      Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::resetMaxGridSize();
#endif
   }
}
+1 −0
Original line number Diff line number Diff line
#include "DistributedScanTest.h"
+1 −0
Original line number Diff line number Diff line
#include "ScanTest.h"
Loading