Parallel reduction tutorial. (35f8fd27) · Commits · TNL / tnl-dev

Documentation/Tutorials/Arrays/tutorial_01_Arrays.md

+3 −0

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@ This tutorial introduces arrays in TNL. Array is one of the most important struc
		5. [Checking the array contents](#checking_the_array_contents)
		6. [IO operations with arrays](#io_operations_with-arrays)
		2. [Static arrays](#static_arrays)
		2. [Distributed arrays](#distributed_arrays)

		## Arrays <a name="arrays"></a>

		@@ -148,3 +149,5 @@ Output:
		\include ArrayIO.out

		## Static arrays <a name="static_arrays"></a>

		## Distributed arrays <a name="distributed_arrays"></a>

+12 −3

Original line number	Diff line number	Diff line
		IF( BUILD_CUDA )
		# CUDA_ADD_EXECUTABLE( ArrayAllocation ArrayAllocation.cu )
		# ADD_CUSTOM_COMMAND( COMMAND ArrayAllocation > ArrayAllocation.out OUTPUT ArrayAllocation.out )
		CUDA_ADD_EXECUTABLE( SumExample SumExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND SumExample > SumExample.out OUTPUT SumExample.out )
		CUDA_ADD_EXECUTABLE( ProductExample ProductExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND ProductExample > ProductExample.out OUTPUT ProductExample.out )
		CUDA_ADD_EXECUTABLE( ScalarProductExample ScalarProductExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND ScalarProductExample > ScalarProductExample.out OUTPUT ScalarProductExample.out )
		CUDA_ADD_EXECUTABLE( MaximumNormExample MaximumNormExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND MaximumNormExample > MaximumNormExample.out OUTPUT MaximumNormExample.out )
		ENDIF()

		IF( BUILD_CUDA )
		ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
		)
		SumExample.out
		ProductExample.out
		ScalarProductExample.out
		MaximumNormExample.out )
		ENDIF()

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <cstdlib>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		using namespace TNL;
		using namespace TNL::Containers;
		using namespace TNL::Containers::Algorithms;

		template< typename Device >
		double maximumNorm( const Vector< double, Device >& v )
		{
		auto view = v.getView();
		auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); };
		auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a = max( a, b ); };
		auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a = max( a ,b ); };
		return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 0.0 );
		}

		int main( int argc, char* argv[] )
		{
		Vector< double, Devices::Host > host_v( 10 );
		host_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } );
		std::cout << "host_v = " << host_v << std::cout;
		std::cout << "The maximum norm of the host vector elements is " << maximumNorm( host_v ) << "." << std::endl;
		#ifdef HAVE_CUDA
		Vector< double, Devices::Cuda > cuda_v( 10 );
		cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } );
		std::cout << "cuda_v = " << cuda_v << std::cout;
		std::cout << "The maximum norm of the CUDA vector elements is " << maximumNorm( cuda_v ) << "." << std::endl;
		#endif
		return EXIT_SUCCESS;
		}

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		MaximumNormExample.cpp
		No newline at end of file

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <cstdlib>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		using namespace TNL;
		using namespace TNL::Containers;
		using namespace TNL::Containers::Algorithms;

		template< typename Device >
		double product( const Vector< double, Device >& v )
		{
		auto view = v.getView();
		auto fetch = [=] __cuda_callable__ ( int i ) { return view[ i ]; };
		auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a *= b; };
		auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a *= b; };
		return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 1.0 );
		}

		int main( int argc, char* argv[] )
		{
		Vector< double, Devices::Host > host_v( 10 );
		host_v = 1.0;
		std::cout << "host_v = " << host_v << std::cout;
		std::cout << "The product of the host vector elements is " << product( host_v ) << "." << std::endl;
		#ifdef HAVE_CUDA
		Vector< double, Devices::Cuda > cuda_v( 10 );
		cuda_v = 1.0;
		std::cout << "cuda_v = " << cuda_v << std::cout;
		std::cout << "The product of the CUDA vector elements is " << product( cuda_v ) << "." << std::endl;
		#endif
		return EXIT_SUCCESS;
		}