Added SequentialFor. (3916fab8) · Commits · TNL / tnl-dev

Documentation/Examples/Algorithms/CMakeLists.txt

+30 −51

Original line number	Diff line number	Diff line
		ADD_SUBDIRECTORY( Segments )

		IF( BUILD_CUDA )
		CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

		CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

		CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

		CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
		set( COMMON_EXAMPLES
		SortingExample
		SortingExample2
		SortingExample3
		ParallelForExample
		SequentialForExample
		unrolledForExample.out
		staticForExample.out
		)

		CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
		set( HOST_EXAMPLES
		staticForExample
		unrolledForExample
		)
		if( BUILD_CUDA )
		foreach( target IN ITEMS ${COMMON_EXAMPLES} )
		cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
		add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
		endforeach()
		else()
		foreach( target IN ITEMS "${COMMON_EXAMPLES} ${HOST_EXAMPLES}")
		add_executable( ${target} ${target}.cpp )
		add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
		set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
		endforeach()
		endif()

		CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
		IF( BUILD_CUDA )
		ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
		ELSE()
		ADD_EXECUTABLE( SortingExample SortingExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

		ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

		ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

		ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

		ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

		ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
		ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} )
		ENDIF()
		No newline at end of file

		ADD_EXECUTABLE(staticForExample staticForExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out )

		ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out )

		ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
		SortingExample.out
		SortingExample2.out
		SortingExample3.out
		ParallelForExample.out
		reduceArrayExample.out
		reduceWithArgumentArrayExample.out
		unrolledForExample.out
		staticForExample.out
		)

Documentation/Examples/Algorithms/SequentialForExample.cpp

0 → 100644

+37 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <cstdlib>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Algorithms/ParallelFor.h>
		#include <TNL/Algorithms/SequentialFor.h>

		using namespace TNL;
		using namespace TNL::Containers;

		template< typename Device >
		void printVector()
		{
		const int size( 36 );
		TNL::Containers::Vector< float, Device > v( size, 1.0 );
		auto view = v.getView();
		auto print = [=] __cuda_callable__ ( int i ) mutable {
		printf( "v[ %d ] = %f \n", i, view[ i ] ); // we use printf because of compatibility with GPU kernels
		};
		std::cout << "Printing vector using parallel for: " << std::endl;
		Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), print );

		std::cout << "Printing vector using sequential for: " << std::endl;
		Algorithms::SequentialFor< Device >::exec( 0, v.getSize(), print );
		}

		int main( int argc, char* argv[] )
		{
		std::cout << "Example on the host:" << std::endl;
		printVector< TNL::Devices::Host >();

		#ifdef HAVE_CUDA
		std::cout << "Example on CUDA GPU:" << std::endl;
		printVector< TNL::Devices::Cuda >();
		#endif
		return EXIT_SUCCESS;
		}

Documentation/Examples/Algorithms/SequentialForExample.cu

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		SequentialForExample.cpp
		No newline at end of file

src/TNL/Algorithms/SequentialFor.h

0 → 100644

+54 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		SequentialFor.h - description
		-------------------
		begin : Apr 5, 2021
		copyright : (C) 2021 by Tomas Oberhuber et al.
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <TNL/Algorithms/ParallelFor.h>


		namespace TNL {
		namespace Algorithms {

		/**
		* \brief Wrapper to ParallelFor which makes it run sequentially.
		*
		* It is helpfull for debuging or just sequential for loops on GPUs.
		*/
		template< typename Device = Devices::Sequential >
		struct SequentialFor
		{
		/**
		* \brief Static method for execution of the loop.
		*
		* \tparam Index defines the type of indexes over which the loop iterates.
		* \tparam Function is the type of function to be called in each iteration.
		*
		* \param start the for-loop iterates over index interval [start, end).
		* \param end the for-loop iterates over index interval [start, end).
		* \param f is the function to be called in each iteration
		*
		* \par Example
		* \include Algorithms/SequentialForExample.cpp
		* \par Output
		* \include SequentialForExample.out
		*
		*/
		template< typename Index,
		typename Function >
		static void exec( Index start, Index end, Function f )
		{
		for( Index i = start; i < end; i++ )
		ParallelFor< Device >::exec( i, i + 1, f );
		}
		};


		} // namespace Algorithms
		} // namespace TNL
		No newline at end of file