Commit 410ed20c authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Improved ParallelForTest

parent a79560df
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
for( Index j = startY; j < endY; j++ )
   for( Index i = startX; i < endX; i++ )
      f( i, j, args... );
+5 −1
Original line number Diff line number Diff line
@@ -35,7 +35,11 @@ Performing for-loops in higher dimensions is simillar. In the following example

Notice the parameters of the lambda function `sum`. The first parameter `i` changes more often than `j` and therefore the index mapping has the form `j * xSize + i` to acces the vector elements sequentialy on CPU and to fullfill coalesced memory accesses on GPU. The for-loop is executed by calling `ParallelFor2D` with proper device. The first four parameters are `startX, startY, endX, endY` and on CPU this is equivalent to the following embeded for loops:

\include ParallelFor2D-snippet.cpp
```cpp
for( Index j = startY; j < endY; j++ )
   for( Index i = startX; i < endX; i++ )
      f( i, j, args... );
```

where `args...` stand for additional arguments passed to the for-loop. After the parameters defining the loops bounds, lambda function (`sum` in this case) is passed followed by additional arguments. One of them, in our example, is `xSize` again because it must be passed to the lambda function for the index mapping computation.

+8 −6
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Containers/Array.h>
@@ -164,7 +166,7 @@ void test_1D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }
   }
}
@@ -200,7 +202,7 @@ void test_2D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
@@ -213,7 +215,7 @@ void test_2D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }
   }
}
@@ -249,7 +251,7 @@ void test_3D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
@@ -262,7 +264,7 @@ void test_3D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }

      a.setValue( 0 );
@@ -275,7 +277,7 @@ void test_3D_cuda()
      ah = a;
      if( ah != expected ) {
         for (int i = 0; i < size; i++)
            ASSERT_EQ( ah[i], i ) << "First index at which the result is wrong is i = " << i;
            ASSERT_EQ( ah[i], expected[i] ) << "First index at which the result is wrong is i = " << i;
      }
   }
}