Loading src/UnitTests/Algorithms/CMakeLists.txt +30 −2 Original line number Diff line number Diff line Loading @@ -10,8 +10,13 @@ set( COMMON_TESTS unrolledForTest ) set( CPP_TESTS SegmentedScanTest ) set( CUDA_TESTS ) set( CPP_TESTS ScanTest SegmentedScanTest ) set( CUDA_TESTS ScanTestCuda ) if( BUILD_CUDA ) set( CUDA_TESTS ${CUDA_TESTS} ${COMMON_TESTS} ) else() Loading @@ -32,3 +37,26 @@ if( BUILD_CUDA ) add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) endforeach() endif() if( ${BUILD_MPI} ) ADD_EXECUTABLE( DistributedScanTest DistributedScanTest.cpp ) TARGET_COMPILE_OPTIONS( DistributedScanTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DistributedScanTest ${GTEST_BOTH_LIBRARIES} ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( DistributedScanTestCuda DistributedScanTestCuda.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DistributedScanTestCuda ${GTEST_BOTH_LIBRARIES} ) endif() SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" ) ADD_TEST( NAME DistributedScanTest COMMAND "mpirun" ${mpi_test_parameters}) ADD_TEST( NAME DistributedScanTest_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" ) if( BUILD_CUDA ) SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" ) ADD_TEST( NAME DistributedScanTestCuda COMMAND "mpirun" ${mpi_test_parameters}) ADD_TEST( NAME DistributedScanTestCuda_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" ) endif() endif() src/UnitTests/Algorithms/DistributedScanTest.cpp 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "DistributedScanTest.h" src/UnitTests/Containers/DistributedVectorTest.h→src/UnitTests/Algorithms/DistributedScanTest.h +100 −108 Original line number Diff line number Diff line /*************************************************************************** DistributedVectorTest.h - description ------------------- begin : Sep 6, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ #pragma once #ifdef HAVE_GTEST #include <limits> #include <gtest/gtest.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Containers/DistributedVectorView.h> #include <TNL/Containers/DistributedArray.h> #include <TNL/Containers/DistributedArrayView.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Algorithms/DistributedScan.h> #define DISTRIBUTED_VECTOR #include "VectorHelperFunctions.h" #include "../Containers/VectorHelperFunctions.h" using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; using namespace TNL::MPI; /* * Light check of DistributedVector. * Light check of DistributedArray. * * - Number of processes is not limited. * - Global size is hardcoded as 97 to force non-uniform distribution. * - Communication group is hardcoded as AllGroup -- it may be changed as needed. */ template< typename DistributedVector > class DistributedVectorTest template< typename DistributedArray > class DistributedScanTest : public ::testing::Test { protected: using RealType = typename DistributedVector::RealType; using DeviceType = typename DistributedVector::DeviceType; using IndexType = typename DistributedVector::IndexType; using DistributedVectorType = DistributedVector; using VectorViewType = typename DistributedVectorType::LocalViewType; using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType >; using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >; using ValueType = typename DistributedArray::ValueType; using DeviceType = typename DistributedArray::DeviceType; using IndexType = typename DistributedArray::IndexType; using DistributedArrayType = DistributedArray; using VectorViewType = typename DistributedArrayType::LocalViewType; using DistributedArrayView = Containers::DistributedArrayView< ValueType, DeviceType, IndexType >; using HostDistributedArrayType = typename DistributedArrayType::template Self< ValueType, Devices::Sequential >; const MPI_Comm group = AllGroup(); DistributedVectorType v; DistributedVectorView v_view; HostDistributedVectorType v_host; DistributedArrayType v; DistributedArrayView v_view; HostDistributedArrayType v_host; const int rank = GetRank(group); const int nproc = GetSize(group); Loading @@ -58,9 +54,9 @@ protected: // some arbitrary value (but must be 0 if not distributed) const int ghosts = (nproc > 1) ? 4 : 0; DistributedVectorTest() DistributedScanTest() { using LocalRangeType = typename DistributedVector::LocalRangeType; using LocalRangeType = typename DistributedArray::LocalRangeType; const LocalRangeType localRange = Partitioner< IndexType >::splitRange( globalSize, group ); v.setDistribution( localRange, ghosts, globalSize, group ); Loading @@ -75,74 +71,70 @@ protected: } }; // types for which DistributedVectorTest is instantiated using DistributedVectorTypes = ::testing::Types< DistributedVector< double, Devices::Sequential, int >, DistributedVector< double, Devices::Host, int > // types for which DistributedScanTest is instantiated using DistributedArrayTypes = ::testing::Types< DistributedArray< double, Devices::Sequential, int >, DistributedArray< double, Devices::Host, int > #ifdef HAVE_CUDA , DistributedVector< double, Devices::Cuda, int > DistributedArray< double, Devices::Cuda, int > #endif >; TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes ); TYPED_TEST_SUITE( DistributedScanTest, DistributedArrayTypes ); // TODO: test that horizontal operations are computed for ghost values without synchronization TYPED_TEST( DistributedVectorTest, scan ) TYPED_TEST( DistributedScanTest, inclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; using IndexType = typename TestFixture::DistributedVectorType::IndexType; using ValueType = typename TestFixture::DistributedArrayType::ValueType; using DeviceType = typename TestFixture::DistributedArrayType::DeviceType; using IndexType = typename TestFixture::DistributedArrayType::IndexType; auto& v = this->v; auto& v_view = this->v_view; auto& v_host = this->v_host; const auto localRange = v.getLocalRange(); // FIXME: tests should work in all cases if( std::is_same< RealType, float >::value ) return; setConstantSequence( v, 0 ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; Loading @@ -152,67 +144,67 @@ TYPED_TEST( DistributedVectorTest, scan ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); setLinearSequence( v ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); setLinearSequence( v ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize(); Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::resetMaxGridSize(); #endif } } TYPED_TEST( DistributedVectorTest, exclusiveScan ) TYPED_TEST( DistributedScanTest, exclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; using IndexType = typename TestFixture::DistributedVectorType::IndexType; using ValueType = typename TestFixture::DistributedArrayType::ValueType; using DeviceType = typename TestFixture::DistributedArrayType::DeviceType; using IndexType = typename TestFixture::DistributedArrayType::IndexType; auto& v = this->v; auto& v_view = this->v_view; Loading @@ -220,48 +212,48 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan ) const auto localRange = v.getLocalRange(); // FIXME: tests should work in all cases if( std::is_same< RealType, float >::value ) if( std::is_same< ValueType, float >::value ) return; setConstantSequence( v, 0 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; Loading @@ -271,58 +263,58 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize(); Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::resetMaxGridSize(); #endif } } Loading src/UnitTests/Algorithms/DistributedScanTestCuda.cu 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "DistributedScanTest.h" src/UnitTests/Algorithms/ScanTest.cpp 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "ScanTest.h" Loading
src/UnitTests/Algorithms/CMakeLists.txt +30 −2 Original line number Diff line number Diff line Loading @@ -10,8 +10,13 @@ set( COMMON_TESTS unrolledForTest ) set( CPP_TESTS SegmentedScanTest ) set( CUDA_TESTS ) set( CPP_TESTS ScanTest SegmentedScanTest ) set( CUDA_TESTS ScanTestCuda ) if( BUILD_CUDA ) set( CUDA_TESTS ${CUDA_TESTS} ${COMMON_TESTS} ) else() Loading @@ -32,3 +37,26 @@ if( BUILD_CUDA ) add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) endforeach() endif() if( ${BUILD_MPI} ) ADD_EXECUTABLE( DistributedScanTest DistributedScanTest.cpp ) TARGET_COMPILE_OPTIONS( DistributedScanTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DistributedScanTest ${GTEST_BOTH_LIBRARIES} ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( DistributedScanTestCuda DistributedScanTestCuda.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DistributedScanTestCuda ${GTEST_BOTH_LIBRARIES} ) endif() SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" ) ADD_TEST( NAME DistributedScanTest COMMAND "mpirun" ${mpi_test_parameters}) ADD_TEST( NAME DistributedScanTest_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTest${CMAKE_EXECUTABLE_SUFFIX}" ) if( BUILD_CUDA ) SET( mpi_test_parameters -np 4 -H localhost:4 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" ) ADD_TEST( NAME DistributedScanTestCuda COMMAND "mpirun" ${mpi_test_parameters}) ADD_TEST( NAME DistributedScanTestCuda_nodistr COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DistributedScanTestCuda${CMAKE_EXECUTABLE_SUFFIX}" ) endif() endif()
src/UnitTests/Algorithms/DistributedScanTest.cpp 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "DistributedScanTest.h"
src/UnitTests/Containers/DistributedVectorTest.h→src/UnitTests/Algorithms/DistributedScanTest.h +100 −108 Original line number Diff line number Diff line /*************************************************************************** DistributedVectorTest.h - description ------------------- begin : Sep 6, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ #pragma once #ifdef HAVE_GTEST #include <limits> #include <gtest/gtest.h> #include <TNL/Containers/DistributedVector.h> #include <TNL/Containers/DistributedVectorView.h> #include <TNL/Containers/DistributedArray.h> #include <TNL/Containers/DistributedArrayView.h> #include <TNL/Containers/Partitioner.h> #include <TNL/Algorithms/DistributedScan.h> #define DISTRIBUTED_VECTOR #include "VectorHelperFunctions.h" #include "../Containers/VectorHelperFunctions.h" using namespace TNL; using namespace TNL::Containers; using namespace TNL::Algorithms; using namespace TNL::MPI; /* * Light check of DistributedVector. * Light check of DistributedArray. * * - Number of processes is not limited. * - Global size is hardcoded as 97 to force non-uniform distribution. * - Communication group is hardcoded as AllGroup -- it may be changed as needed. */ template< typename DistributedVector > class DistributedVectorTest template< typename DistributedArray > class DistributedScanTest : public ::testing::Test { protected: using RealType = typename DistributedVector::RealType; using DeviceType = typename DistributedVector::DeviceType; using IndexType = typename DistributedVector::IndexType; using DistributedVectorType = DistributedVector; using VectorViewType = typename DistributedVectorType::LocalViewType; using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType >; using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >; using ValueType = typename DistributedArray::ValueType; using DeviceType = typename DistributedArray::DeviceType; using IndexType = typename DistributedArray::IndexType; using DistributedArrayType = DistributedArray; using VectorViewType = typename DistributedArrayType::LocalViewType; using DistributedArrayView = Containers::DistributedArrayView< ValueType, DeviceType, IndexType >; using HostDistributedArrayType = typename DistributedArrayType::template Self< ValueType, Devices::Sequential >; const MPI_Comm group = AllGroup(); DistributedVectorType v; DistributedVectorView v_view; HostDistributedVectorType v_host; DistributedArrayType v; DistributedArrayView v_view; HostDistributedArrayType v_host; const int rank = GetRank(group); const int nproc = GetSize(group); Loading @@ -58,9 +54,9 @@ protected: // some arbitrary value (but must be 0 if not distributed) const int ghosts = (nproc > 1) ? 4 : 0; DistributedVectorTest() DistributedScanTest() { using LocalRangeType = typename DistributedVector::LocalRangeType; using LocalRangeType = typename DistributedArray::LocalRangeType; const LocalRangeType localRange = Partitioner< IndexType >::splitRange( globalSize, group ); v.setDistribution( localRange, ghosts, globalSize, group ); Loading @@ -75,74 +71,70 @@ protected: } }; // types for which DistributedVectorTest is instantiated using DistributedVectorTypes = ::testing::Types< DistributedVector< double, Devices::Sequential, int >, DistributedVector< double, Devices::Host, int > // types for which DistributedScanTest is instantiated using DistributedArrayTypes = ::testing::Types< DistributedArray< double, Devices::Sequential, int >, DistributedArray< double, Devices::Host, int > #ifdef HAVE_CUDA , DistributedVector< double, Devices::Cuda, int > DistributedArray< double, Devices::Cuda, int > #endif >; TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes ); TYPED_TEST_SUITE( DistributedScanTest, DistributedArrayTypes ); // TODO: test that horizontal operations are computed for ghost values without synchronization TYPED_TEST( DistributedVectorTest, scan ) TYPED_TEST( DistributedScanTest, inclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; using IndexType = typename TestFixture::DistributedVectorType::IndexType; using ValueType = typename TestFixture::DistributedArrayType::ValueType; using DeviceType = typename TestFixture::DistributedArrayType::DeviceType; using IndexType = typename TestFixture::DistributedArrayType::IndexType; auto& v = this->v; auto& v_view = this->v_view; auto& v_host = this->v_host; const auto localRange = v.getLocalRange(); // FIXME: tests should work in all cases if( std::is_same< RealType, float >::value ) return; setConstantSequence( v, 0 ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; v.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; v_view.scan(); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; Loading @@ -152,67 +144,67 @@ TYPED_TEST( DistributedVectorTest, scan ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); setLinearSequence( v ); v_host = -1; v.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ); setLinearSequence( v ); v_host = -1; v_view.scan(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Inclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize(); Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, ValueType, IndexType >::resetMaxGridSize(); #endif } } TYPED_TEST( DistributedVectorTest, exclusiveScan ) TYPED_TEST( DistributedScanTest, exclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; using IndexType = typename TestFixture::DistributedVectorType::IndexType; using ValueType = typename TestFixture::DistributedArrayType::ValueType; using DeviceType = typename TestFixture::DistributedArrayType::DeviceType; using IndexType = typename TestFixture::DistributedArrayType::IndexType; auto& v = this->v; auto& v_view = this->v_view; Loading @@ -220,48 +212,48 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan ) const auto localRange = v.getLocalRange(); // FIXME: tests should work in all cases if( std::is_same< RealType, float >::value ) if( std::is_same< ValueType, float >::value ) return; setConstantSequence( v, 0 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; Loading @@ -271,58 +263,58 @@ TYPED_TEST( DistributedVectorTest, exclusiveScan ) if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef HAVE_CUDA Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::maxGridSize() = 3; setConstantSequence( v, 0 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; // test views setConstantSequence( v, 0 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ); setConstantSequence( v, 1 ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ); setLinearSequence( v ); v_host = -1; v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host.setValue( -1 ); DistributedScan< ScanType::Exclusive >::perform( v_view, 0, this->globalSize, std::plus<>{}, (ValueType) 0 ); EXPECT_GT( ( Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize(); Algorithms::detail::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, ValueType, IndexType >::resetMaxGridSize(); #endif } } Loading
src/UnitTests/Algorithms/DistributedScanTestCuda.cu 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "DistributedScanTest.h"
src/UnitTests/Algorithms/ScanTest.cpp 0 → 100644 +1 −0 Original line number Diff line number Diff line #include "ScanTest.h"