Loading src/UnitTests/Algorithms/CMakeLists.txt +14 −0 Original line number Diff line number Diff line Loading @@ -30,3 +30,17 @@ if( BUILD_CUDA ) add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) endforeach() endif() IF( BUILD_HIP ) HIP_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp ) TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} ) HIP_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp ) TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) HIP_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp ) TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_HIP ) src/UnitTests/Algorithms/MemoryOperationsTest.h +226 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #ifdef HAVE_GTEST #include <TNL/Allocators/Host.h> #include <TNL/Allocators/Cuda.h> #include <TNL/Allocators/Hip.h> #include <TNL/Algorithms/MemoryOperations.h> #include <TNL/Algorithms/MultiDeviceMemoryOperations.h> Loading Loading @@ -182,7 +183,6 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host ) allocator.deallocate( data, ARRAY_TEST_SIZE ); } #ifdef HAVE_CUDA TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda ) { Loading Loading @@ -406,6 +406,231 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda ) cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } #endif // HAVE_CUDA #ifdef HAVE_HIP TYPED_TEST( MemoryOperationsTest, allocateMemory_hip ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Hip< ValueType >; Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); ASSERT_NE( data, nullptr ); allocator.deallocate( data, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); } TYPED_TEST( MemoryOperationsTest, setElement_hip ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Hip< ValueType >; Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) MemoryOperations< Devices::Hip >::setElement( &data[ i ], (ValueType) i ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) { ValueType d; ASSERT_EQ( hipMemcpy( &d, &data[ i ], sizeof( ValueType ), hipMemcpyDeviceToHost ), hipSuccess ); EXPECT_EQ( d, i ); EXPECT_EQ( MemoryOperations< Devices::Hip >::getElement( &data[ i ] ), i ); } allocator.deallocate( data, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); } TYPED_TEST( MemoryOperationsTest, set_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) EXPECT_EQ( hostData[ i ], 13 ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, copy_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE ); EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, copyWithConversions_hip ) { using HostAllocator1 = Allocators::Host< int >; using HostAllocator2 = Allocators::Host< double >; using HipAllocator1 = Allocators::Hip< long >; using HipAllocator2 = Allocators::Hip< float >; HostAllocator1 hostAllocator1; HostAllocator2 hostAllocator2; HipAllocator1 hipAllocator1; HipAllocator2 hipAllocator2; int* hostData = hostAllocator1.allocate( ARRAY_TEST_SIZE ); double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE ); long* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE ); float* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( hostData[ i ], hostData2[ i ] ); hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE ); hostAllocator2.deallocate( hostData2, ARRAY_TEST_SIZE ); hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, compare_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE ); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE ); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, compareWithConversions_hip ) { using HostAllocator = Allocators::Host< int >; using HipAllocator1 = Allocators::Hip< float >; using HipAllocator2 = Allocators::Hip< double >; HostAllocator hostAllocator; HipAllocator1 hipAllocator1; HipAllocator2 hipAllocator2; int* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); float* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE ); double* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (float) 8, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE ); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); MemoryOperations< Devices::Hip >::set( deviceData, (float) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE ); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, containsValue_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 10; i++ ) EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 10; i < 20; i++ ) EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, containsOnlyValue_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 20; i++ ) EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } #endif // HAVE_HIP #endif // HAVE_GTEST Loading src/UnitTests/Algorithms/MultireductionTest.h +6 −0 Original line number Diff line number Diff line Loading @@ -90,6 +90,12 @@ using VectorTypes = ::testing::Types< Vector< int, Devices::Cuda >, Vector< float, Devices::Cuda > #endif #ifdef HAVE_HIP , Vector< int, Devices::Hip >, Vector< float, Devices::Hip > #endif >; TYPED_TEST_SUITE( MultireductionTest, VectorTypes ); Loading src/UnitTests/Algorithms/ParallelForTest.h +160 −9 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Devices/Hip.h> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/ParallelFor.h> Loading Loading @@ -140,7 +141,7 @@ TEST( ParallelForTest, 3D_host ) } #ifdef HAVE_CUDA // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_1D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -156,7 +157,7 @@ void test_1D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel = [=] __cuda_callable__ (int i) mutable auto kernel = [=] __device_callable__ (int i) mutable { view[i] = i; }; Loading @@ -176,7 +177,7 @@ TEST( ParallelForTest, 1D_cuda ) test_1D_cuda(); } // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_2D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -192,7 +193,7 @@ void test_2D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __cuda_callable__ (int i, int j) mutable auto kernel1 = [=] __device_callable__ (int i, int j) mutable { view[i] = i; }; Loading @@ -206,7 +207,7 @@ void test_2D_cuda() } a.setValue( 0 ); auto kernel2 = [=] __cuda_callable__ (int i, int j) mutable auto kernel2 = [=] __device_callable__ (int i, int j) mutable { view[j] = j; }; Loading @@ -225,7 +226,7 @@ TEST( ParallelForTest, 2D_cuda ) test_2D_cuda(); } // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_3D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -241,7 +242,7 @@ void test_3D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable { view[i] = i; }; Loading @@ -255,7 +256,7 @@ void test_3D_cuda() } a.setValue( 0 ); auto kernel2 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable { view[j] = j; }; Loading @@ -268,7 +269,7 @@ void test_3D_cuda() } a.setValue( 0 ); auto kernel3 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable { view[k] = k; }; Loading @@ -286,7 +287,157 @@ TEST( ParallelForTest, 3D_cuda ) { test_3D_cuda(); } #endif // HAVE_CUDA #ifdef HAVE_HIP void test_1D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel = [=] __device_callable__ (int i) mutable { view[i] = i; }; Algorithms::ParallelFor< Devices::Hip >::exec( 0, size, kernel ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 1D_hip ) { test_1D_hip(); } // nvcc does not allow __device_callable__ lambdas inside private regions void test_2D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __device_callable__ (int i, int j) mutable { view[i] = i; }; Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, size, 1, kernel1 ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel2 = [=] __device_callable__ (int i, int j) mutable { view[j] = j; }; Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, 1, size, kernel2 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 2D_hip ) { test_2D_hip(); } // nvcc does not allow __device_callable__ lambdas inside private regions void test_3D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable { view[i] = i; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, size, 1, 1, kernel1 ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable { view[j] = j; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, size, 1, kernel2 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable { view[k] = k; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, 1, size, kernel3 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 3D_hip ) { test_3D_hip(); } #endif #endif #include "../main.h" src/UnitTests/Containers/VectorPrefixSumTest.h +16 −16 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/UnitTests/Algorithms/CMakeLists.txt +14 −0 Original line number Diff line number Diff line Loading @@ -30,3 +30,17 @@ if( BUILD_CUDA ) add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} ) endforeach() endif() IF( BUILD_HIP ) HIP_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp ) TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} ) HIP_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp ) TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) HIP_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp ) TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_HIP )
src/UnitTests/Algorithms/MemoryOperationsTest.h +226 −1 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ #ifdef HAVE_GTEST #include <TNL/Allocators/Host.h> #include <TNL/Allocators/Cuda.h> #include <TNL/Allocators/Hip.h> #include <TNL/Algorithms/MemoryOperations.h> #include <TNL/Algorithms/MultiDeviceMemoryOperations.h> Loading Loading @@ -182,7 +183,6 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host ) allocator.deallocate( data, ARRAY_TEST_SIZE ); } #ifdef HAVE_CUDA TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda ) { Loading Loading @@ -406,6 +406,231 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda ) cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } #endif // HAVE_CUDA #ifdef HAVE_HIP TYPED_TEST( MemoryOperationsTest, allocateMemory_hip ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Hip< ValueType >; Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); ASSERT_NE( data, nullptr ); allocator.deallocate( data, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); } TYPED_TEST( MemoryOperationsTest, setElement_hip ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Hip< ValueType >; Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) MemoryOperations< Devices::Hip >::setElement( &data[ i ], (ValueType) i ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) { ValueType d; ASSERT_EQ( hipMemcpy( &d, &data[ i ], sizeof( ValueType ), hipMemcpyDeviceToHost ), hipSuccess ); EXPECT_EQ( d, i ); EXPECT_EQ( MemoryOperations< Devices::Hip >::getElement( &data[ i ] ), i ); } allocator.deallocate( data, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); } TYPED_TEST( MemoryOperationsTest, set_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) EXPECT_EQ( hostData[ i ], 13 ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, copy_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE ); EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, copyWithConversions_hip ) { using HostAllocator1 = Allocators::Host< int >; using HostAllocator2 = Allocators::Host< double >; using HipAllocator1 = Allocators::Hip< long >; using HipAllocator2 = Allocators::Hip< float >; HostAllocator1 hostAllocator1; HostAllocator2 hostAllocator2; HipAllocator1 hipAllocator1; HipAllocator2 hipAllocator2; int* hostData = hostAllocator1.allocate( ARRAY_TEST_SIZE ); double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE ); long* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE ); float* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE ); MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( hostData[ i ], hostData2[ i ] ); hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE ); hostAllocator2.deallocate( hostData2, ARRAY_TEST_SIZE ); hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, compare_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE ); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE ); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, compareWithConversions_hip ) { using HostAllocator = Allocators::Host< int >; using HipAllocator1 = Allocators::Hip< float >; using HipAllocator2 = Allocators::Hip< double >; HostAllocator hostAllocator; HipAllocator1 hipAllocator1; HipAllocator2 hipAllocator2; int* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); float* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE ); double* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE ); MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData, (float) 8, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE ); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); MemoryOperations< Devices::Hip >::set( deviceData, (float) 7, ARRAY_TEST_SIZE ); MemoryOperations< Devices::Hip >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE ); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE ); hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, containsValue_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 10; i++ ) EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 10; i < 20; i++ ) EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } TYPED_TEST( MemoryOperationsTest, containsOnlyValue_hip ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; using HipAllocator = Allocators::Hip< ValueType >; HostAllocator hostAllocator; HipAllocator hipAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 20; i++ ) EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = 10; MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } #endif // HAVE_HIP #endif // HAVE_GTEST Loading
src/UnitTests/Algorithms/MultireductionTest.h +6 −0 Original line number Diff line number Diff line Loading @@ -90,6 +90,12 @@ using VectorTypes = ::testing::Types< Vector< int, Devices::Cuda >, Vector< float, Devices::Cuda > #endif #ifdef HAVE_HIP , Vector< int, Devices::Hip >, Vector< float, Devices::Hip > #endif >; TYPED_TEST_SUITE( MultireductionTest, VectorTypes ); Loading
src/UnitTests/Algorithms/ParallelForTest.h +160 −9 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Devices/Hip.h> #include <TNL/Containers/Array.h> #include <TNL/Algorithms/ParallelFor.h> Loading Loading @@ -140,7 +141,7 @@ TEST( ParallelForTest, 3D_host ) } #ifdef HAVE_CUDA // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_1D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -156,7 +157,7 @@ void test_1D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel = [=] __cuda_callable__ (int i) mutable auto kernel = [=] __device_callable__ (int i) mutable { view[i] = i; }; Loading @@ -176,7 +177,7 @@ TEST( ParallelForTest, 1D_cuda ) test_1D_cuda(); } // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_2D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -192,7 +193,7 @@ void test_2D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __cuda_callable__ (int i, int j) mutable auto kernel1 = [=] __device_callable__ (int i, int j) mutable { view[i] = i; }; Loading @@ -206,7 +207,7 @@ void test_2D_cuda() } a.setValue( 0 ); auto kernel2 = [=] __cuda_callable__ (int i, int j) mutable auto kernel2 = [=] __device_callable__ (int i, int j) mutable { view[j] = j; }; Loading @@ -225,7 +226,7 @@ TEST( ParallelForTest, 2D_cuda ) test_2D_cuda(); } // nvcc does not allow __cuda_callable__ lambdas inside private regions // nvcc does not allow __device_callable__ lambdas inside private regions void test_3D_cuda() { using Array = Containers::Array< int, Devices::Cuda >; Loading @@ -241,7 +242,7 @@ void test_3D_cuda() a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable { view[i] = i; }; Loading @@ -255,7 +256,7 @@ void test_3D_cuda() } a.setValue( 0 ); auto kernel2 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable { view[j] = j; }; Loading @@ -268,7 +269,7 @@ void test_3D_cuda() } a.setValue( 0 ); auto kernel3 = [=] __cuda_callable__ (int i, int j, int k) mutable auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable { view[k] = k; }; Loading @@ -286,7 +287,157 @@ TEST( ParallelForTest, 3D_cuda ) { test_3D_cuda(); } #endif // HAVE_CUDA #ifdef HAVE_HIP void test_1D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel = [=] __device_callable__ (int i) mutable { view[i] = i; }; Algorithms::ParallelFor< Devices::Hip >::exec( 0, size, kernel ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 1D_hip ) { test_1D_hip(); } // nvcc does not allow __device_callable__ lambdas inside private regions void test_2D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __device_callable__ (int i, int j) mutable { view[i] = i; }; Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, size, 1, kernel1 ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel2 = [=] __device_callable__ (int i, int j) mutable { view[j] = j; }; Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, 1, size, kernel2 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 2D_hip ) { test_2D_hip(); } // nvcc does not allow __device_callable__ lambdas inside private regions void test_3D_hip() { using Array = Containers::Array< int, Devices::Hip >; using ArrayHost = Containers::Array< int, Devices::Host >; Array a; for (int size = 100; size <= 100000000; size *= 100) { ArrayHost expected; expected.setSize( size ); for (int i = 0; i < size; i++) expected[ i ] = i; a.setSize( size ); a.setValue( 0 ); auto view = a.getView(); auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable { view[i] = i; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, size, 1, 1, kernel1 ); ArrayHost ah; ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable { view[j] = j; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, size, 1, kernel2 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } a.setValue( 0 ); auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable { view[k] = k; }; Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, 1, size, kernel3 ); ah = a; if( ah != expected ) { for (int i = 0; i < size; i++) ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i; } } } TEST( ParallelForTest, 3D_hip ) { test_3D_hip(); } #endif #endif #include "../main.h"
src/UnitTests/Containers/VectorPrefixSumTest.h +16 −16 File changed.Preview size limit exceeded, changes collapsed. Show changes