Added tests of algorithms for HIP. (3cb394a0) · Commits · TNL / tnl-dev

src/UnitTests/Algorithms/CMakeLists.txt

+14 −0

Original line number	Diff line number	Diff line
		@@ -30,3 +30,17 @@ if( BUILD_CUDA )
		add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} )
		endforeach()
		endif()

		IF( BUILD_HIP )
		HIP_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
		TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
		TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )

		HIP_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
		TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
		TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )

		HIP_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
		TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
		TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
		ENDIF( BUILD_HIP )

src/UnitTests/Algorithms/MemoryOperationsTest.h

+226 −1

Original line number	Diff line number	Diff line
		@@ -13,6 +13,7 @@
		#ifdef HAVE_GTEST
		#include <TNL/Allocators/Host.h>
		#include <TNL/Allocators/Cuda.h>
		#include <TNL/Allocators/Hip.h>
		#include <TNL/Algorithms/MemoryOperations.h>
		#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>

		@@ -182,7 +183,6 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host )
		allocator.deallocate( data, ARRAY_TEST_SIZE );
		}


		#ifdef HAVE_CUDA
		TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda )
		{
		@@ -406,6 +406,231 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda )
		cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		}
		#endif // HAVE_CUDA

		#ifdef HAVE_HIP
		TYPED_TEST( MemoryOperationsTest, allocateMemory_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using Allocator = Allocators::Hip< ValueType >;

		Allocator allocator;
		ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
		ASSERT_NE( data, nullptr );

		allocator.deallocate( data, ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
		}

		TYPED_TEST( MemoryOperationsTest, setElement_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using Allocator = Allocators::Hip< ValueType >;

		Allocator allocator;
		ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );

		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		MemoryOperations< Devices::Hip >::setElement( &data[ i ], (ValueType) i );

		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		{
		ValueType d;
		ASSERT_EQ( hipMemcpy( &d, &data[ i ], sizeof( ValueType ), hipMemcpyDeviceToHost ), hipSuccess );
		EXPECT_EQ( d, i );
		EXPECT_EQ( MemoryOperations< Devices::Hip >::getElement( &data[ i ] ), i );
		}

		allocator.deallocate( data, ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
		}

		TYPED_TEST( MemoryOperationsTest, set_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using HostAllocator = Allocators::Host< ValueType >;
		using HipAllocator = Allocators::Hip< ValueType >;

		HostAllocator hostAllocator;
		HipAllocator hipAllocator;
		ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
		ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		EXPECT_EQ( hostData[ i ], 13 );
		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, copy_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using HostAllocator = Allocators::Host< ValueType >;
		using HipAllocator = Allocators::Hip< ValueType >;

		HostAllocator hostAllocator;
		HipAllocator hipAllocator;
		ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
		MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
		EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, copyWithConversions_hip )
		{
		using HostAllocator1 = Allocators::Host< int >;
		using HostAllocator2 = Allocators::Host< double >;
		using HipAllocator1 = Allocators::Hip< long >;
		using HipAllocator2 = Allocators::Hip< float >;

		HostAllocator1 hostAllocator1;
		HostAllocator2 hostAllocator2;
		HipAllocator1 hipAllocator1;
		HipAllocator2 hipAllocator2;
		int* hostData = hostAllocator1.allocate( ARRAY_TEST_SIZE );
		double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE );
		long* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE );
		float* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
		MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
		MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
		for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
		EXPECT_EQ( hostData[ i ], hostData2[ i ] );
		hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE );
		hostAllocator2.deallocate( hostData2, ARRAY_TEST_SIZE );
		hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
		hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, compare_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using HostAllocator = Allocators::Host< ValueType >;
		using HipAllocator = Allocators::Hip< ValueType >;

		HostAllocator hostAllocator;
		HipAllocator hipAllocator;
		ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE );

		MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
		EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
		EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
		EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

		MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
		EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
		EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
		EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, compareWithConversions_hip )
		{
		using HostAllocator = Allocators::Host< int >;
		using HipAllocator1 = Allocators::Hip< float >;
		using HipAllocator2 = Allocators::Hip< double >;

		HostAllocator hostAllocator;
		HipAllocator1 hipAllocator1;
		HipAllocator2 hipAllocator2;
		int* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		float* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE );
		double* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE );

		MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
		EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
		EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
		EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

		MemoryOperations< Devices::Hip >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
		MemoryOperations< Devices::Hip >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
		EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
		EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
		EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
		hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, containsValue_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using HostAllocator = Allocators::Host< ValueType >;
		using HipAllocator = Allocators::Hip< ValueType >;

		HostAllocator hostAllocator;
		HipAllocator hipAllocator;
		ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );

		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		hostData[ i ] = i % 10;
		MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

		for( int i = 0; i < 10; i++ )
		EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
		for( int i = 10; i < 20; i++ )
		EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );

		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		}

		TYPED_TEST( MemoryOperationsTest, containsOnlyValue_hip )
		{
		using ValueType = typename TestFixture::ValueType;
		using HostAllocator = Allocators::Host< ValueType >;
		using HipAllocator = Allocators::Hip< ValueType >;

		HostAllocator hostAllocator;
		HipAllocator hipAllocator;
		ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
		ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );

		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		hostData[ i ] = i % 10;
		MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

		for( int i = 0; i < 20; i++ )
		EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );

		for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
		hostData[ i ] = 10;
		MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

		EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );

		hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
		hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
		}
		#endif // HAVE_HIP

		#endif // HAVE_GTEST

src/UnitTests/Algorithms/MultireductionTest.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -90,6 +90,12 @@ using VectorTypes = ::testing::Types<
		Vector< int, Devices::Cuda >,
		Vector< float, Devices::Cuda >
		#endif
		#ifdef HAVE_HIP
		,
		Vector< int, Devices::Hip >,
		Vector< float, Devices::Hip >
		#endif

		>;

		TYPED_TEST_SUITE( MultireductionTest, VectorTypes );

src/UnitTests/Algorithms/ParallelForTest.h

+160 −9

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@

		#include <TNL/Devices/Host.h>
		#include <TNL/Devices/Cuda.h>
		#include <TNL/Devices/Hip.h>
		#include <TNL/Containers/Array.h>
		#include <TNL/Algorithms/ParallelFor.h>

		@@ -140,7 +141,7 @@ TEST( ParallelForTest, 3D_host )
		}

		#ifdef HAVE_CUDA
		// nvcc does not allow __cuda_callable__ lambdas inside private regions
		// nvcc does not allow __device_callable__ lambdas inside private regions
		void test_1D_cuda()
		{
		using Array = Containers::Array< int, Devices::Cuda >;
		@@ -156,7 +157,7 @@ void test_1D_cuda()
		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel = [=] __cuda_callable__ (int i) mutable
		auto kernel = [=] __device_callable__ (int i) mutable
		{
		view[i] = i;
		};
		@@ -176,7 +177,7 @@ TEST( ParallelForTest, 1D_cuda )
		test_1D_cuda();
		}

		// nvcc does not allow __cuda_callable__ lambdas inside private regions
		// nvcc does not allow __device_callable__ lambdas inside private regions
		void test_2D_cuda()
		{
		using Array = Containers::Array< int, Devices::Cuda >;
		@@ -192,7 +193,7 @@ void test_2D_cuda()
		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel1 = [=] __cuda_callable__ (int i, int j) mutable
		auto kernel1 = [=] __device_callable__ (int i, int j) mutable
		{
		view[i] = i;
		};
		@@ -206,7 +207,7 @@ void test_2D_cuda()
		}

		a.setValue( 0 );
		auto kernel2 = [=] __cuda_callable__ (int i, int j) mutable
		auto kernel2 = [=] __device_callable__ (int i, int j) mutable
		{
		view[j] = j;
		};
		@@ -225,7 +226,7 @@ TEST( ParallelForTest, 2D_cuda )
		test_2D_cuda();
		}

		// nvcc does not allow __cuda_callable__ lambdas inside private regions
		// nvcc does not allow __device_callable__ lambdas inside private regions
		void test_3D_cuda()
		{
		using Array = Containers::Array< int, Devices::Cuda >;
		@@ -241,7 +242,7 @@ void test_3D_cuda()
		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel1 = [=] __cuda_callable__ (int i, int j, int k) mutable
		auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[i] = i;
		};
		@@ -255,7 +256,7 @@ void test_3D_cuda()
		}

		a.setValue( 0 );
		auto kernel2 = [=] __cuda_callable__ (int i, int j, int k) mutable
		auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[j] = j;
		};
		@@ -268,7 +269,7 @@ void test_3D_cuda()
		}

		a.setValue( 0 );
		auto kernel3 = [=] __cuda_callable__ (int i, int j, int k) mutable
		auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[k] = k;
		};
		@@ -286,7 +287,157 @@ TEST( ParallelForTest, 3D_cuda )
		{
		test_3D_cuda();
		}
		#endif // HAVE_CUDA

		#ifdef HAVE_HIP
		void test_1D_hip()
		{
		using Array = Containers::Array< int, Devices::Hip >;
		using ArrayHost = Containers::Array< int, Devices::Host >;
		Array a;
		for (int size = 100; size <= 100000000; size *= 100)
		{
		ArrayHost expected;
		expected.setSize( size );
		for (int i = 0; i < size; i++)
		expected[ i ] = i;

		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel = [=] __device_callable__ (int i) mutable
		{
		view[i] = i;
		};
		Algorithms::ParallelFor< Devices::Hip >::exec( 0, size, kernel );

		ArrayHost ah;
		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}
		}
		}

		TEST( ParallelForTest, 1D_hip )
		{
		test_1D_hip();
		}

		// nvcc does not allow __device_callable__ lambdas inside private regions
		void test_2D_hip()
		{
		using Array = Containers::Array< int, Devices::Hip >;
		using ArrayHost = Containers::Array< int, Devices::Host >;
		Array a;
		for (int size = 100; size <= 100000000; size *= 100)
		{
		ArrayHost expected;
		expected.setSize( size );
		for (int i = 0; i < size; i++)
		expected[ i ] = i;

		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel1 = [=] __device_callable__ (int i, int j) mutable
		{
		view[i] = i;
		};
		Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, size, 1, kernel1 );

		ArrayHost ah;
		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}

		a.setValue( 0 );
		auto kernel2 = [=] __device_callable__ (int i, int j) mutable
		{
		view[j] = j;
		};
		Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, 1, size, kernel2 );

		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}
		}
		}

		TEST( ParallelForTest, 2D_hip )
		{
		test_2D_hip();
		}

		// nvcc does not allow __device_callable__ lambdas inside private regions
		void test_3D_hip()
		{
		using Array = Containers::Array< int, Devices::Hip >;
		using ArrayHost = Containers::Array< int, Devices::Host >;
		Array a;
		for (int size = 100; size <= 100000000; size *= 100)
		{
		ArrayHost expected;
		expected.setSize( size );
		for (int i = 0; i < size; i++)
		expected[ i ] = i;

		a.setSize( size );
		a.setValue( 0 );
		auto view = a.getView();
		auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[i] = i;
		};
		Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, size, 1, 1, kernel1 );

		ArrayHost ah;
		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}

		a.setValue( 0 );
		auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[j] = j;
		};
		Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, size, 1, kernel2 );

		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}

		a.setValue( 0 );
		auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable
		{
		view[k] = k;
		};
		Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, 1, size, kernel3 );

		ah = a;
		if( ah != expected ) {
		for (int i = 0; i < size; i++)
		ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
		}
		}
		}

		TEST( ParallelForTest, 3D_hip )
		{
		test_3D_hip();
		}
		#endif


		#endif

		#include "../main.h"

src/UnitTests/Containers/VectorPrefixSumTest.h

+16 −16

File changed.

Preview size limit exceeded, changes collapsed.