Skip to content
Snippets Groups Projects
Commit 3916fab8 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Added SequentialFor.

parent 7b225e46
No related branches found
No related tags found
1 merge request!105TO/matrices-adaptive-csr
ADD_SUBDIRECTORY( Segments )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )
CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )
CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )
CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
ELSE()
ADD_EXECUTABLE( SortingExample SortingExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )
ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )
ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )
ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )
ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
ENDIF()
ADD_EXECUTABLE(staticForExample staticForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND staticForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/staticForExample.out OUTPUT staticForExample.out )
ADD_EXECUTABLE(unrolledForExample unrolledForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND unrolledForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/unrolledForExample.out OUTPUT unrolledForExample.out )
ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
SortingExample.out
SortingExample2.out
SortingExample3.out
ParallelForExample.out
reduceArrayExample.out
reduceWithArgumentArrayExample.out
set( COMMON_EXAMPLES
SortingExample
SortingExample2
SortingExample3
ParallelForExample
SequentialForExample
unrolledForExample.out
staticForExample.out
)
set( HOST_EXAMPLES
staticForExample
unrolledForExample
)
if( BUILD_CUDA )
foreach( target IN ITEMS ${COMMON_EXAMPLES} )
cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
endforeach()
else()
foreach( target IN ITEMS "${COMMON_EXAMPLES} ${HOST_EXAMPLES}")
add_executable( ${target} ${target}.cpp )
add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
endforeach()
endif()
IF( BUILD_CUDA )
ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
ELSE()
ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS ${HOST_OUTPUTS} )
ENDIF()
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/SequentialFor.h>
using namespace TNL;
using namespace TNL::Containers;
template< typename Device >
void printVector()
{
const int size( 36 );
TNL::Containers::Vector< float, Device > v( size, 1.0 );
auto view = v.getView();
auto print = [=] __cuda_callable__ ( int i ) mutable {
printf( "v[ %d ] = %f \n", i, view[ i ] ); // we use printf because of compatibility with GPU kernels
};
std::cout << "Printing vector using parallel for: " << std::endl;
Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), print );
std::cout << "Printing vector using sequential for: " << std::endl;
Algorithms::SequentialFor< Device >::exec( 0, v.getSize(), print );
}
int main( int argc, char* argv[] )
{
std::cout << "Example on the host:" << std::endl;
printVector< TNL::Devices::Host >();
#ifdef HAVE_CUDA
std::cout << "Example on CUDA GPU:" << std::endl;
printVector< TNL::Devices::Cuda >();
#endif
return EXIT_SUCCESS;
}
SequentialForExample.cpp
\ No newline at end of file
/***************************************************************************
SequentialFor.h - description
-------------------
begin : Apr 5, 2021
copyright : (C) 2021 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#pragma once
#include <TNL/Algorithms/ParallelFor.h>
namespace TNL {
namespace Algorithms {
/**
* \brief Wrapper to ParallelFor which makes it run sequentially.
*
* It is helpfull for debuging or just sequential for loops on GPUs.
*/
template< typename Device = Devices::Sequential >
struct SequentialFor
{
/**
* \brief Static method for execution of the loop.
*
* \tparam Index defines the type of indexes over which the loop iterates.
* \tparam Function is the type of function to be called in each iteration.
*
* \param start the for-loop iterates over index interval [start, end).
* \param end the for-loop iterates over index interval [start, end).
* \param f is the function to be called in each iteration
*
* \par Example
* \include Algorithms/SequentialForExample.cpp
* \par Output
* \include SequentialForExample.out
*
*/
template< typename Index,
typename Function >
static void exec( Index start, Index end, Function f )
{
for( Index i = start; i < end; i++ )
ParallelFor< Device >::exec( i, i + 1, f );
}
};
} // namespace Algorithms
} // namespace TNL
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment