Commit 3916fab8 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Added SequentialFor.

parent 7b225e46
Loading
Loading
Loading
Loading
+30 −51
Original line number Diff line number Diff line
ADD_SUBDIRECTORY( Segments )

IF( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

   CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

   CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

   CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
set( COMMON_EXAMPLES
   SortingExample
   SortingExample2
   SortingExample3
   ParallelForExample
   SequentialForExample
   unrolledForExample.out
   staticForExample.out
)

   CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
set( HOST_EXAMPLES
   staticForExample
   unrolledForExample
)
if( BUILD_CUDA )
   foreach( target IN ITEMS ${COMMON_EXAMPLES} )
      cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
      add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
      set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
   endforeach()
else()
   foreach( target IN ITEMS "${COMMON_EXAMPLES} ${HOST_EXAMPLES}")
      add_executable( ${target} ${target}.cpp )
      add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
      set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
   endforeach()
endif()

   CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
IF( BUILD_CUDA )
   ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
ELSE()
   ADD_EXECUTABLE( SortingExample SortingExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

   ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

   ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
   ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

   ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

   ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

   ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
   ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
   ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} )
ENDIF()
 No newline at end of file

ADD_EXECUTABLE(staticForExample staticForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out )

ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out )

ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
   SortingExample.out
   SortingExample2.out
   SortingExample3.out
   ParallelForExample.out
   reduceArrayExample.out
   reduceWithArgumentArrayExample.out
   unrolledForExample.out
   staticForExample.out
)
+37 −0
Original line number Diff line number Diff line
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/SequentialFor.h>

using namespace TNL;
using namespace TNL::Containers;

template< typename Device >
void printVector()
{
   const int size( 36 );
   TNL::Containers::Vector< float, Device > v( size, 1.0 );
   auto view = v.getView();
   auto print = [=] __cuda_callable__  ( int i ) mutable {
      printf( "v[ %d ] = %f \n", i, view[ i ] );  // we use printf because of compatibility with GPU kernels
   };
   std::cout << "Printing vector using parallel for: " << std::endl;
   Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), print );

   std::cout << "Printing vector using sequential for: " << std::endl;
   Algorithms::SequentialFor< Device >::exec( 0, v.getSize(), print );
}

int main( int argc, char* argv[] )
{
   std::cout << "Example on the host:" << std::endl;
   printVector< TNL::Devices::Host >();

#ifdef HAVE_CUDA
   std::cout << "Example on CUDA GPU:" << std::endl;
   printVector< TNL::Devices::Cuda >();
#endif
   return EXIT_SUCCESS;
}
+1 −0
Original line number Diff line number Diff line
SequentialForExample.cpp
 No newline at end of file
+54 −0
Original line number Diff line number Diff line
/***************************************************************************
                          SequentialFor.h  -  description
                             -------------------
    begin                : Apr 5, 2021
    copyright            : (C) 2021 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <TNL/Algorithms/ParallelFor.h>


namespace TNL {
   namespace Algorithms {

/**
 * \brief Wrapper to ParallelFor which makes it run sequentially.
 *
 *  It is helpfull for debuging or just sequential for loops on GPUs.
 */
template< typename Device = Devices::Sequential >
struct SequentialFor
{
   /**
    * \brief Static method for execution of the loop.
    *
    * \tparam Index defines the type of indexes over which the loop iterates.
    * \tparam Function is the type of function to be called in each iteration.
    *
    * \param start the for-loop iterates over index interval [start, end).
    * \param end the for-loop iterates over index interval [start, end).
    * \param f is the function to be called in each iteration
    *
    * \par Example
    * \include Algorithms/SequentialForExample.cpp
    * \par Output
    * \include SequentialForExample.out
    *
    */
   template< typename Index,
             typename Function >
   static void exec( Index start, Index end, Function f )
   {
      for( Index i = start; i < end; i++ )
         ParallelFor< Device >::exec( i, i + 1, f );
   }
};


   } // namespace Algorithms
} // namespace TNL
 No newline at end of file