Added SequentialFor.

3916fab8 · Tomáš Oberhuber · Jakub Klinkovský · 7b225e46 · 3916fab8 · 3916fab8
Commit 3916fab8 authored 3 years ago by Tomáš Oberhuber Committed by Jakub Klinkovský 3 years ago
--- a/Documentation/Examples/Algorithms/CMakeLists.txt
+++ b/Documentation/Examples/Algorithms/CMakeLists.txt
 ADD_SUBDIRECTORY( Segments )

-IF( BUILD_CUDA )
-   CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )
-
-   CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )
-
-   CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )
-
-   CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
-   ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
-
-   CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
-   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
-
-   CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
-   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
-ELSE()
-   ADD_EXECUTABLE( SortingExample SortingExample.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )
-
-   ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )
-
-   ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )
-
-   ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
-
-   ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
-
-   ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
-   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
-ENDIF()
-
-ADD_EXECUTABLE(staticForExample staticForExample.cpp)
-ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out )
-
-ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp)
-ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out )
-
-ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
-   SortingExample.out
-   SortingExample2.out
-   SortingExample3.out
-   ParallelForExample.out
-   reduceArrayExample.out
-   reduceWithArgumentArrayExample.out
+set( COMMON_EXAMPLES
+   SortingExample
+   SortingExample2
+   SortingExample3
+   ParallelForExample
+   SequentialForExample
   unrolledForExample.out
   staticForExample.out
 )
+
+set( HOST_EXAMPLES
+   staticForExample
+   unrolledForExample
+)
+if( BUILD_CUDA )
+   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
+      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
+      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
+   endforeach()
+else()
+   foreach( target IN ITEMS "${COMMON_EXAMPLES} ${HOST_EXAMPLES}")
+      add_executable( ${target} ${target}.cpp )
+      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
+      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
+   endforeach()
+endif()
+
+IF( BUILD_CUDA )
+   ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
+ELSE()
+   ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} )
+ENDIF()
\ No newline at end of file
--- a/Documentation/Examples/Algorithms/SequentialForExample.cpp
+++ b/Documentation/Examples/Algorithms/SequentialForExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/SequentialFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+
+template< typename Device >
+void printVector()
+{
+   const int size( 36 );
+   TNL::Containers::Vector< float, Device > v( size, 1.0 );
+   auto view = v.getView();
+   auto print = [=] __cuda_callable__  ( int i ) mutable {
+      printf( "v[ %d ] = %f \n", i, view[ i ] );  // we use printf because of compatibility with GPU kernels
+   };
+   std::cout << "Printing vector using parallel for: " << std::endl;
+   Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), print );
+
+   std::cout << "Printing vector using sequential for: " << std::endl;
+   Algorithms::SequentialFor< Device >::exec( 0, v.getSize(), print );
+}
+
+int main( int argc, char* argv[] )
+{
+   std::cout << "Example on the host:" << std::endl;
+   printVector< TNL::Devices::Host >();
+
+#ifdef HAVE_CUDA
+   std::cout << "Example on CUDA GPU:" << std::endl;
+   printVector< TNL::Devices::Cuda >();
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Algorithms/SequentialForExample.cu
+++ b/Documentation/Examples/Algorithms/SequentialForExample.cu
+SequentialForExample.cpp
\ No newline at end of file
--- a/src/TNL/Algorithms/SequentialFor.h
+++ b/src/TNL/Algorithms/SequentialFor.h
+/***************************************************************************
+                          SequentialFor.h  -  description
+                             -------------------
+    begin                : Apr 5, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Algorithms/ParallelFor.h>
+
+
+namespace TNL {
+   namespace Algorithms {
+
+/**
+ * \brief Wrapper to ParallelFor which makes it run sequentially.
+ *
+ *  It is helpfull for debuging or just sequential for loops on GPUs.
+ */
+template< typename Device = Devices::Sequential >
+struct SequentialFor
+{
+   /**
+    * \brief Static method for execution of the loop.
+    *
+    * \tparam Index defines the type of indexes over which the loop iterates.
+    * \tparam Function is the type of function to be called in each iteration.
+    *
+    * \param start the for-loop iterates over index interval [start, end).
+    * \param end the for-loop iterates over index interval [start, end).
+    * \param f is the function to be called in each iteration
+    *
+    * \par Example
+    * \include Algorithms/SequentialForExample.cpp
+    * \par Output
+    * \include SequentialForExample.out
+    *
+    */
+   template< typename Index,
+             typename Function >
+   static void exec( Index start, Index end, Function f )
+   {
+      for( Index i = start; i < end; i++ )
+         ParallelFor< Device >::exec( i, i + 1, f );
+   }
+};
+
+
+   } // namespace Algorithms
+} // namespace TNL
\ No newline at end of file