Commit 3cb394a0 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Added tests of algorithms for HIP.

parent ad4e179b
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -30,3 +30,17 @@ if( BUILD_CUDA )
      add_test( ${target} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${target}${CMAKE_EXECUTABLE_SUFFIX} )
   endforeach()
endif()

IF( BUILD_HIP )
   HIP_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp )
   TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} )

   HIP_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} )

   HIP_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp )
   TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} )
ENDIF( BUILD_HIP )
+226 −1
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#ifdef HAVE_GTEST
#include <TNL/Allocators/Host.h>
#include <TNL/Allocators/Cuda.h>
#include <TNL/Allocators/Hip.h>
#include <TNL/Algorithms/MemoryOperations.h>
#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>

@@ -182,7 +183,6 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host )
   allocator.deallocate( data, ARRAY_TEST_SIZE );
}


#ifdef HAVE_CUDA
TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda )
{
@@ -406,6 +406,231 @@ TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda )
   cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
}
#endif // HAVE_CUDA

#ifdef HAVE_HIP
TYPED_TEST( MemoryOperationsTest, allocateMemory_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using Allocator = Allocators::Hip< ValueType >;

   Allocator allocator;
   ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
   ASSERT_NE( data, nullptr );

   allocator.deallocate( data, ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
}

TYPED_TEST( MemoryOperationsTest, setElement_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using Allocator = Allocators::Hip< ValueType >;

   Allocator allocator;
   ValueType* data = allocator.allocate( ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );

   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
      MemoryOperations< Devices::Hip >::setElement( &data[ i ], (ValueType) i );

   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
   {
      ValueType d;
      ASSERT_EQ( hipMemcpy( &d, &data[ i ], sizeof( ValueType ), hipMemcpyDeviceToHost ), hipSuccess );
      EXPECT_EQ( d, i );
      EXPECT_EQ( MemoryOperations< Devices::Hip >::getElement( &data[ i ] ), i );
   }

   allocator.deallocate( data, ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
}

TYPED_TEST( MemoryOperationsTest, set_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using HostAllocator = Allocators::Host< ValueType >;
   using HipAllocator = Allocators::Hip< ValueType >;

   HostAllocator hostAllocator;
   HipAllocator hipAllocator;
   ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
   MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE );
   ASSERT_NO_THROW( TNL_CHECK_HIP_DEVICE );
   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
      EXPECT_EQ( hostData[ i ], 13 );
   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, copy_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using HostAllocator = Allocators::Host< ValueType >;
   using HipAllocator = Allocators::Hip< ValueType >;

   HostAllocator hostAllocator;
   HipAllocator hipAllocator;
   ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE );
   MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE );
   MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE );
   EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) );
   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, copyWithConversions_hip )
{
   using HostAllocator1 = Allocators::Host< int >;
   using HostAllocator2 = Allocators::Host< double >;
   using HipAllocator1 = Allocators::Hip< long >;
   using HipAllocator2 = Allocators::Hip< float >;

   HostAllocator1 hostAllocator1;
   HostAllocator2 hostAllocator2;
   HipAllocator1 hipAllocator1;
   HipAllocator2 hipAllocator2;
   int* hostData = hostAllocator1.allocate( ARRAY_TEST_SIZE );
   double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE );
   long* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE );
   float* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE );
   MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE );
   MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE );
   for( int i = 0; i < ARRAY_TEST_SIZE; i ++ )
      EXPECT_EQ( hostData[ i ], hostData2[ i ] );
   hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE );
   hostAllocator2.deallocate( hostData2, ARRAY_TEST_SIZE );
   hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
   hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, compare_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using HostAllocator = Allocators::Host< ValueType >;
   using HipAllocator = Allocators::Hip< ValueType >;

   HostAllocator hostAllocator;
   HipAllocator hipAllocator;
   ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData2 = hipAllocator.allocate( ARRAY_TEST_SIZE );

   MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE );
   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
   EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

   MemoryOperations< Devices::Hip >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE );
   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
   EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, compareWithConversions_hip )
{
   using HostAllocator = Allocators::Host< int >;
   using HipAllocator1 = Allocators::Hip< float >;
   using HipAllocator2 = Allocators::Hip< double >;

   HostAllocator hostAllocator;
   HipAllocator1 hipAllocator1;
   HipAllocator2 hipAllocator2;
   int* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   float* deviceData = hipAllocator1.allocate( ARRAY_TEST_SIZE );
   double* deviceData2 = hipAllocator2.allocate( ARRAY_TEST_SIZE );

   MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData, (float) 8, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE );
   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
   EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
   EXPECT_FALSE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

   MemoryOperations< Devices::Hip >::set( deviceData, (float) 7, ARRAY_TEST_SIZE );
   MemoryOperations< Devices::Hip >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE );
   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Hip >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) ));
   EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) ));
   EXPECT_TRUE(( MemoryOperations< Devices::Hip >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) ));

   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hipAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE );
   hipAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, containsValue_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using HostAllocator = Allocators::Host< ValueType >;
   using HipAllocator = Allocators::Hip< ValueType >;

   HostAllocator hostAllocator;
   HipAllocator hipAllocator;
   ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );

   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
      hostData[ i ] = i % 10;
   MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

   for( int i = 0; i < 10; i++ )
      EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );
   for( int i = 10; i < 20; i++ )
      EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );

   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
}

TYPED_TEST( MemoryOperationsTest, containsOnlyValue_hip )
{
   using ValueType = typename TestFixture::ValueType;
   using HostAllocator = Allocators::Host< ValueType >;
   using HipAllocator = Allocators::Hip< ValueType >;

   HostAllocator hostAllocator;
   HipAllocator hipAllocator;
   ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE );
   ValueType* deviceData = hipAllocator.allocate( ARRAY_TEST_SIZE );

   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
      hostData[ i ] = i % 10;
   MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

   for( int i = 0; i < 20; i++ )
      EXPECT_FALSE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) );

   for( int i = 0; i < ARRAY_TEST_SIZE; i++ )
      hostData[ i ] = 10;
   MultiDeviceMemoryOperations< Devices::Hip, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE );

   EXPECT_TRUE( ( MemoryOperations< Devices::Hip >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) );

   hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE );
   hipAllocator.deallocate( deviceData, ARRAY_TEST_SIZE );
}
#endif // HAVE_HIP

#endif // HAVE_GTEST


+6 −0
Original line number Diff line number Diff line
@@ -90,6 +90,12 @@ using VectorTypes = ::testing::Types<
   Vector< int,   Devices::Cuda >,
   Vector< float, Devices::Cuda >
#endif
#ifdef HAVE_HIP
   ,
   Vector< int,   Devices::Hip >,
   Vector< float, Devices::Hip >
#endif

>;

TYPED_TEST_SUITE( MultireductionTest, VectorTypes );
+160 −9
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@

#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/Hip.h>
#include <TNL/Containers/Array.h>
#include <TNL/Algorithms/ParallelFor.h>

@@ -140,7 +141,7 @@ TEST( ParallelForTest, 3D_host )
}

#ifdef HAVE_CUDA
// nvcc does not allow __cuda_callable__ lambdas inside private regions
// nvcc does not allow __device_callable__ lambdas inside private regions
void test_1D_cuda()
{
   using Array = Containers::Array< int, Devices::Cuda >;
@@ -156,7 +157,7 @@ void test_1D_cuda()
      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel = [=] __cuda_callable__ (int i) mutable
      auto kernel = [=] __device_callable__ (int i) mutable
      {
         view[i] = i;
      };
@@ -176,7 +177,7 @@ TEST( ParallelForTest, 1D_cuda )
   test_1D_cuda();
}

// nvcc does not allow __cuda_callable__ lambdas inside private regions
// nvcc does not allow __device_callable__ lambdas inside private regions
void test_2D_cuda()
{
   using Array = Containers::Array< int, Devices::Cuda >;
@@ -192,7 +193,7 @@ void test_2D_cuda()
      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel1 = [=] __cuda_callable__ (int i, int j) mutable
      auto kernel1 = [=] __device_callable__ (int i, int j) mutable
      {
         view[i] = i;
      };
@@ -206,7 +207,7 @@ void test_2D_cuda()
      }

      a.setValue( 0 );
      auto kernel2 = [=] __cuda_callable__ (int i, int j) mutable
      auto kernel2 = [=] __device_callable__ (int i, int j) mutable
      {
         view[j] = j;
      };
@@ -225,7 +226,7 @@ TEST( ParallelForTest, 2D_cuda )
   test_2D_cuda();
}

// nvcc does not allow __cuda_callable__ lambdas inside private regions
// nvcc does not allow __device_callable__ lambdas inside private regions
void test_3D_cuda()
{
   using Array = Containers::Array< int, Devices::Cuda >;
@@ -241,7 +242,7 @@ void test_3D_cuda()
      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel1 = [=] __cuda_callable__ (int i, int j, int k) mutable
      auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[i] = i;
      };
@@ -255,7 +256,7 @@ void test_3D_cuda()
      }

      a.setValue( 0 );
      auto kernel2 = [=] __cuda_callable__ (int i, int j, int k) mutable
      auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[j] = j;
      };
@@ -268,7 +269,7 @@ void test_3D_cuda()
      }

      a.setValue( 0 );
      auto kernel3 = [=] __cuda_callable__ (int i, int j, int k) mutable
      auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[k] = k;
      };
@@ -286,7 +287,157 @@ TEST( ParallelForTest, 3D_cuda )
{
   test_3D_cuda();
}
#endif // HAVE_CUDA

#ifdef HAVE_HIP
void test_1D_hip()
{
   using Array = Containers::Array< int, Devices::Hip >;
   using ArrayHost = Containers::Array< int, Devices::Host >;
   Array a;
   for (int size = 100; size <= 100000000; size *= 100)
   {
      ArrayHost expected;
      expected.setSize( size );
      for (int i = 0; i < size; i++)
         expected[ i ] = i;

      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel = [=] __device_callable__ (int i) mutable
      {
         view[i] = i;
      };
      Algorithms::ParallelFor< Devices::Hip >::exec( 0, size, kernel );

      ArrayHost ah;
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }
   }
}

TEST( ParallelForTest, 1D_hip )
{
   test_1D_hip();
}

// nvcc does not allow __device_callable__ lambdas inside private regions
void test_2D_hip()
{
   using Array = Containers::Array< int, Devices::Hip >;
   using ArrayHost = Containers::Array< int, Devices::Host >;
   Array a;
   for (int size = 100; size <= 100000000; size *= 100)
   {
      ArrayHost expected;
      expected.setSize( size );
      for (int i = 0; i < size; i++)
         expected[ i ] = i;

      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel1 = [=] __device_callable__ (int i, int j) mutable
      {
         view[i] = i;
      };
      Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, size, 1, kernel1 );

      ArrayHost ah;
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
      auto kernel2 = [=] __device_callable__ (int i, int j) mutable
      {
         view[j] = j;
      };
      Algorithms::ParallelFor2D< Devices::Hip >::exec( 0, 0, 1, size, kernel2 );

      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }
   }
}

TEST( ParallelForTest, 2D_hip )
{
   test_2D_hip();
}

// nvcc does not allow __device_callable__ lambdas inside private regions
void test_3D_hip()
{
   using Array = Containers::Array< int, Devices::Hip >;
   using ArrayHost = Containers::Array< int, Devices::Host >;
   Array a;
   for (int size = 100; size <= 100000000; size *= 100)
   {
      ArrayHost expected;
      expected.setSize( size );
      for (int i = 0; i < size; i++)
         expected[ i ] = i;

      a.setSize( size );
      a.setValue( 0 );
      auto view = a.getView();
      auto kernel1 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[i] = i;
      };
      Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, size, 1, 1, kernel1 );

      ArrayHost ah;
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
      auto kernel2 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[j] = j;
      };
      Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, size, 1, kernel2 );

      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
      auto kernel3 = [=] __device_callable__ (int i, int j, int k) mutable
      {
         view[k] = k;
      };
      Algorithms::ParallelFor3D< Devices::Hip >::exec( 0, 0, 0, 1, 1, size, kernel3 );

      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
      }
   }
}

TEST( ParallelForTest, 3D_hip )
{
   test_3D_hip();
}
#endif


#endif

#include "../main.h"
+16 −16

File changed.

Preview size limit exceeded, changes collapsed.