/*************************************************************************** VectorTest-2.h - description ------------------- begin : Oct 25, 2010 copyright : (C) 2010 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #ifdef HAVE_GTEST #include "VectorTestSetup.h" // should be small enough to have fast tests, but larger than minGPUReductionDataSize // and large enough to require multiple CUDA blocks for reduction constexpr int VECTOR_TEST_SIZE = 10000; TYPED_TEST( VectorTest, prefixSum ) { using VectorType = typename TestFixture::VectorType; using VectorOperations = typename TestFixture::VectorOperations; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; const int size = VECTOR_TEST_SIZE; if( std::is_same< RealType, float >::value || std::is_same< IndexType, short >::value ) return; VectorType v( size ); ViewType v_view( v ); typename VectorType::HostType v_host( size ); v = 0; v_host = -1; v.prefixSum(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v.prefixSum(); v_host = v; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i ); setConstantSequence( v, 1 ); v_host = -1; v_view.prefixSum(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); v = 0; v_host = -1; v_view.prefixSum(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v_view.prefixSum(); v_host = v_view; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i ); //// // With CUDA, perform tests with multiple CUDA grids. if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA Algorithms::CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::setMaxGridSize( 3 ); v = 0; v_host = -1; v.prefixSum(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v.prefixSum(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i ); setConstantSequence( v, 1 ); v_host = -1; v_view.prefixSum(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); v = 0; v_host = -1; v_view.prefixSum(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v_view.prefixSum(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i ); CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Inclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } TYPED_TEST( VectorTest, exclusivePrefixSum ) { using VectorType = typename TestFixture::VectorType; using VectorOperations = typename TestFixture::VectorOperations; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; const int size = VECTOR_TEST_SIZE; if( std::is_same< RealType, float >::value || std::is_same< IndexType, short >::value ) return; VectorType v; v.setSize( size ); ViewType v_view( v ); typename VectorType::HostType v_host( size ); setConstantSequence( v, 1 ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ); v.setValue( 0 ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 ); setConstantSequence( v, 1 ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ); v.setValue( 0 ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); v_host = v_view; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 ); //// // With CUDA, perform tests with multiple CUDA grids. if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::setMaxGridSize( 3 ); setConstantSequence( v, 1 ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ); v.setValue( 0 ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 ); setConstantSequence( v, 1 ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ); v.setValue( 0 ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ); setLinearSequence( v ); v_host = -1; v_view.template prefixSum< Algorithms::PrefixSumType::Exclusive >(); EXPECT_GT( ( CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::gridsCount ), 1 ); v_host = v_view; for( int i = 1; i < size; i++ ) EXPECT_EQ( v_host[ i ] - v_host[ i - 1 ], i - 1 ); CudaPrefixSumKernelLauncher< Algorithms::PrefixSumType::Exclusive, RealType, IndexType >::resetMaxGridSize(); #endif } } // TODO: test prefix sum with custom begin and end parameters template< typename FlagsView > void setupFlags( FlagsView& f ) { auto f1 = [] __cuda_callable__ ( typename FlagsView::IndexType i ) { return ( i % 5 ) == 0; }; f.evaluate( f1 ); } /* TYPED_TEST( VectorTest, segmentedPrefixSum ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; using FlagsArrayType = Array< bool, DeviceType, IndexType >; using FlagsViewType = ArrayView< bool, DeviceType, IndexType >; const int size = VECTOR_TEST_SIZE; VectorType v( size ); ViewType v_view( v ); FlagsArrayType flags( size ), flags_copy( size ); FlagsViewType flags_view( flags ); //auto f1 = [] __cuda_callable__ ( IndexType i ) { return ( i % 5 ) == 0; }; //flags_view.evaluate( f1 ); setupFlags( flags_view ); flags_copy = flags_view; v = 0; v.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), 0 ); flags_view = flags_copy; v = 1; v.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v.getElement( i ), ( i % 5 ) + 1 ); flags_view = flags_copy; setLinearSequence( v ); v.computeSegmentedPrefixSum( flags_view ); for( int i = 1; i < size; i++ ) { if( flags.getElement( i ) ) EXPECT_EQ( v.getElement( i ), i ); else EXPECT_EQ( v.getElement( i ) - v.getElement( i - 1 ), i ); } flags_view = flags_copy; v_view = 0; v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v_view.getElement( i ), 0 ); flags_view = flags_copy; v_view = 1; v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 0; i < size; i++ ) EXPECT_EQ( v_view.getElement( i ), ( i % 5 ) + 1 ); flags_view = flags_copy; //v_view.evaluate( [] __cuda_callable__ ( IndexType i ) { return i; } ); setLinearSequence( v ); v_view.computeSegmentedPrefixSum( flags_view ); for( int i = 1; i < size; i++ ) { if( flags.getElement( i ) ) EXPECT_EQ( v_view.getElement( i ), i ); else EXPECT_EQ( v_view.getElement( i ) - v_view.getElement( i - 1 ), i ); } } */ #endif // HAVE_GTEST #include "../main.h"