Loading Documentation/Examples/Algorithms/CMakeLists.txt +24 −4 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ELSE() ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ENDIF() ADD_EXECUTABLE(staticForExample staticForExample.cpp) Loading @@ -29,6 +47,8 @@ ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample2.out SortingExample3.out ParallelForExample.out reduceArrayExample.out reduceWithArgumentArrayExample.out unrolledForExample.out staticForExample.out ) Documentation/Examples/Containers/ArrayExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceArrayExample.cpp +12 −9 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Algorithms/reduce.h> using namespace TNL; template< typename Device > void reduceElementsExample() void reduceArrayExample() { /**** * Create new arrays Loading @@ -22,23 +20,28 @@ void reduceElementsExample() /**** * Sum all elements of array `a` */ auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; }; auto sum = a.reduceEachElement( fetch, std::plus<>{}, 0.0 ); float sum_total = Algorithms::reduce( a, TNL::Plus{} ); /**** * Sum last 5 elements of array `a` */ float sum_last_five = Algorithms::reduce( a.getConstView( 5, 10 ), TNL::Plus{} ); /**** * Print the results */ std::cout << " a = " << a << std::endl; std::cout << " sum = " << sum << std::endl; std::cout << " sum of all elements = " << sum_total << std::endl; std::cout << " sum of last 5 elements = " << sum_last_five << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on the host system: " << std::endl; reduceElementsExample< Devices::Host >(); reduceArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on the CUDA device: " << std::endl; reduceElementsExample< Devices::Cuda >(); reduceArrayExample< Devices::Cuda >(); #endif } Documentation/Examples/Algorithms/reduceArrayExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line reduceArrayExample.cpp No newline at end of file Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cpp +41 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/reduce.h> using namespace TNL; template< typename Device > void reduceElementsExample() void reduceArrayExample() { /**** * Create new arrays */ const int size = 10; Containers::Array< float, Device > a( size ); auto a_view = a.getView(); Containers::Vector< float, Device > a( size ); /**** * Initiate the elements of array `a` */ a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } ); a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = 3 - i; } ); /**** * Sum all elements of array `a` * Reduce all elements of array `a` */ auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; }; auto sum = a_view.reduceEachElement( fetch, std::plus<>{}, 0.0 ); std::pair< float, int > result_total = Algorithms::reduceWithArgument( TNL::abs( a ), TNL::MaxWithArg{} ); /**** * Print the results */ std::cout << " a = " << a << std::endl; std::cout << " sum = " << sum << std::endl; std::cout << " abs-max of all elements = " << result_total.first << " at position " << result_total.second << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on the host system: " << std::endl; reduceElementsExample< Devices::Host >(); reduceArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on the CUDA device: " << std::endl; reduceElementsExample< Devices::Cuda >(); reduceArrayExample< Devices::Cuda >(); #endif } Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line reduceWithArgumentArrayExample.cpp No newline at end of file Loading
Documentation/Examples/Algorithms/CMakeLists.txt +24 −4 Original line number Diff line number Diff line IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu) ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu) ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ELSE() ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE( SortingExample SortingExample.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out ) ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out ) ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp) ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out ) ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp) ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out ) ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out ) ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp) ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out ) ENDIF() ADD_EXECUTABLE(staticForExample staticForExample.cpp) Loading @@ -29,6 +47,8 @@ ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS SortingExample2.out SortingExample3.out ParallelForExample.out reduceArrayExample.out reduceWithArgumentArrayExample.out unrolledForExample.out staticForExample.out )
Documentation/Examples/Containers/ArrayExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceArrayExample.cpp +12 −9 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Algorithms/reduce.h> using namespace TNL; template< typename Device > void reduceElementsExample() void reduceArrayExample() { /**** * Create new arrays Loading @@ -22,23 +20,28 @@ void reduceElementsExample() /**** * Sum all elements of array `a` */ auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; }; auto sum = a.reduceEachElement( fetch, std::plus<>{}, 0.0 ); float sum_total = Algorithms::reduce( a, TNL::Plus{} ); /**** * Sum last 5 elements of array `a` */ float sum_last_five = Algorithms::reduce( a.getConstView( 5, 10 ), TNL::Plus{} ); /**** * Print the results */ std::cout << " a = " << a << std::endl; std::cout << " sum = " << sum << std::endl; std::cout << " sum of all elements = " << sum_total << std::endl; std::cout << " sum of last 5 elements = " << sum_last_five << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on the host system: " << std::endl; reduceElementsExample< Devices::Host >(); reduceArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on the CUDA device: " << std::endl; reduceElementsExample< Devices::Cuda >(); reduceArrayExample< Devices::Cuda >(); #endif }
Documentation/Examples/Algorithms/reduceArrayExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line reduceArrayExample.cpp No newline at end of file
Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cpp +41 −0 Original line number Diff line number Diff line #include <iostream> #include <functional> #include <TNL/Containers/Array.h> #include <TNL/Containers/ArrayView.h> #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/reduce.h> using namespace TNL; template< typename Device > void reduceElementsExample() void reduceArrayExample() { /**** * Create new arrays */ const int size = 10; Containers::Array< float, Device > a( size ); auto a_view = a.getView(); Containers::Vector< float, Device > a( size ); /**** * Initiate the elements of array `a` */ a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } ); a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = 3 - i; } ); /**** * Sum all elements of array `a` * Reduce all elements of array `a` */ auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; }; auto sum = a_view.reduceEachElement( fetch, std::plus<>{}, 0.0 ); std::pair< float, int > result_total = Algorithms::reduceWithArgument( TNL::abs( a ), TNL::MaxWithArg{} ); /**** * Print the results */ std::cout << " a = " << a << std::endl; std::cout << " sum = " << sum << std::endl; std::cout << " abs-max of all elements = " << result_total.first << " at position " << result_total.second << std::endl; } int main( int argc, char* argv[] ) { std::cout << "Running example on the host system: " << std::endl; reduceElementsExample< Devices::Host >(); reduceArrayExample< Devices::Host >(); #ifdef HAVE_CUDA std::cout << "Running example on the CUDA device: " << std::endl; reduceElementsExample< Devices::Cuda >(); reduceArrayExample< Devices::Cuda >(); #endif }
Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line reduceWithArgumentArrayExample.cpp No newline at end of file