Merge branch 'JK/scan' into 'develop' (b3a8feac) · Commits · TNL / tnl-dev

Documentation/Examples/Algorithms/CMakeLists.txt

+24 −4

Original line number	Diff line number	Diff line
		IF( BUILD_CUDA )
		CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
		CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

		CUDA_ADD_EXECUTABLE( SortingExample2Cuda SortingExample2.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample2Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

		CUDA_ADD_EXECUTABLE( SortingExample3Cuda SortingExample3.cu)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample3Cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

		CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

		CUDA_ADD_EXECUTABLE(reduceArrayExampleCuda reduceArrayExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND reduceArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

		CUDA_ADD_EXECUTABLE(reduceWithArgumentArrayExampleCuda reduceWithArgumentArrayExample.cu)
		ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
		ELSE()
		ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
		ADD_EXECUTABLE( SortingExample SortingExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )

		ADD_EXECUTABLE( SortingExample2 SortingExample2.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample2.out OUTPUT SortingExample2.out )

		ADD_EXECUTABLE( SortingExample3 SortingExample3.cpp)
		ADD_CUSTOM_COMMAND( COMMAND SortingExample3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample3.out OUTPUT SortingExample3.out )

		ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )

		ADD_EXECUTABLE(reduceArrayExample reduceArrayExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND reduceArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceArrayExample.out OUTPUT reduceArrayExample.out )

		ADD_EXECUTABLE(reduceWithArgumentArrayExample reduceWithArgumentArrayExample.cpp)
		ADD_CUSTOM_COMMAND( COMMAND reduceWithArgumentArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/reduceWithArgumentArrayExample.out OUTPUT reduceWithArgumentArrayExample.out )
		ENDIF()

		ADD_EXECUTABLE(staticForExample staticForExample.cpp)
		@@ -29,6 +47,8 @@ ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
		SortingExample2.out
		SortingExample3.out
		ParallelForExample.out
		reduceArrayExample.out
		reduceWithArgumentArrayExample.out
		unrolledForExample.out
		staticForExample.out
		)

Documentation/Examples/Containers/ArrayExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceArrayExample.cpp

+12 −9

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <functional>
		#include <TNL/Containers/Array.h>
		#include <TNL/Containers/ArrayView.h>
		#include <TNL/Algorithms/reduce.h>

		using namespace TNL;

		template< typename Device >
		void reduceElementsExample()
		void reduceArrayExample()
		{
		/****
		* Create new arrays
		@@ -22,23 +20,28 @@ void reduceElementsExample()
		/****
		* Sum all elements of array `a`
		*/
		auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; };
		auto sum = a.reduceEachElement( fetch, std::plus<>{}, 0.0 );
		float sum_total = Algorithms::reduce( a, TNL::Plus{} );

		/****
		* Sum last 5 elements of array `a`
		*/
		float sum_last_five = Algorithms::reduce( a.getConstView( 5, 10 ), TNL::Plus{} );

		/****
		* Print the results
		*/
		std::cout << " a = " << a << std::endl;
		std::cout << " sum = " << sum << std::endl;
		std::cout << " sum of all elements = " << sum_total << std::endl;
		std::cout << " sum of last 5 elements = " << sum_last_five << std::endl;
		}

		int main( int argc, char* argv[] )
		{
		std::cout << "Running example on the host system: " << std::endl;
		reduceElementsExample< Devices::Host >();
		reduceArrayExample< Devices::Host >();

		#ifdef HAVE_CUDA
		std::cout << "Running example on the CUDA device: " << std::endl;
		reduceElementsExample< Devices::Cuda >();
		reduceArrayExample< Devices::Cuda >();
		#endif
		}

Documentation/Examples/Algorithms/reduceArrayExample.cu

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		reduceArrayExample.cpp
		No newline at end of file

Documentation/Examples/Containers/ArrayViewExample_reduceElements.cpp→Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cpp

+41 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <functional>
		#include <TNL/Containers/Array.h>
		#include <TNL/Containers/ArrayView.h>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Algorithms/reduce.h>

		using namespace TNL;

		template< typename Device >
		void reduceElementsExample()
		void reduceArrayExample()
		{
		/****
		* Create new arrays
		*/
		const int size = 10;
		Containers::Array< float, Device > a( size );
		auto a_view = a.getView();
		Containers::Vector< float, Device > a( size );

		/****
		* Initiate the elements of array `a`
		*/
		a_view.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = i; } );
		a.forAllElements( [] __cuda_callable__ ( int i, float& value ) { value = 3 - i; } );

		/****
		* Sum all elements of array `a`
		* Reduce all elements of array `a`
		*/
		auto fetch = [=] __cuda_callable__ ( int i, float& value ) { return value; };
		auto sum = a_view.reduceEachElement( fetch, std::plus<>{}, 0.0 );
		std::pair< float, int > result_total = Algorithms::reduceWithArgument( TNL::abs( a ), TNL::MaxWithArg{} );

		/****
		* Print the results
		*/
		std::cout << " a = " << a << std::endl;
		std::cout << " sum = " << sum << std::endl;
		std::cout << " abs-max of all elements = " << result_total.first << " at position " << result_total.second << std::endl;
		}

		int main( int argc, char* argv[] )
		{
		std::cout << "Running example on the host system: " << std::endl;
		reduceElementsExample< Devices::Host >();
		reduceArrayExample< Devices::Host >();

		#ifdef HAVE_CUDA
		std::cout << "Running example on the CUDA device: " << std::endl;
		reduceElementsExample< Devices::Cuda >();
		reduceArrayExample< Devices::Cuda >();
		#endif
		}

Documentation/Examples/Algorithms/reduceWithArgumentArrayExample.cu

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		reduceWithArgumentArrayExample.cpp
		No newline at end of file