Writting tutorial on parallel reduction. (2d0b5ea5) · Commits · TNL / tnl-dev

Documentation/Tutorials/Reduction/CMakeLists.txt

+11 −1

Original line number	Diff line number	Diff line
		@@ -7,12 +7,22 @@ IF( BUILD_CUDA )
		ADD_CUSTOM_COMMAND( COMMAND ScalarProductExample > ScalarProductExample.out OUTPUT ScalarProductExample.out )
		CUDA_ADD_EXECUTABLE( MaximumNormExample MaximumNormExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND MaximumNormExample > MaximumNormExample.out OUTPUT MaximumNormExample.out )
		CUDA_ADD_EXECUTABLE( ComparisonExample ComparisonExample.cu )
		ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ComparisonExample.out OUTPUT ComparisonExample.out )
		# CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu )
		# ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out )
		ENDIF()

		ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cpp )
		ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out )


		IF( BUILD_CUDA )
		ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
		SumExample.out
		ProductExample.out
		ScalarProductExample.out
		MaximumNormExample.out )
		MaximumNormExample.out
		ComparisonExample.out
		UpdateAndResidueExample.out )
		ENDIF()

Documentation/Tutorials/Reduction/ComparisonExample.cpp

0 → 100644

+49 −0

Original line number	Diff line number	Diff line
		#include <iostream>
		#include <cstdlib>
		#include <TNL/Containers/Vector.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		using namespace TNL;
		using namespace TNL::Containers;
		using namespace TNL::Containers::Algorithms;

		template< typename Device >
		bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
		{
		auto u_view = u.getView();
		auto v_view = v.getView();

		/***
		* Fetch compares corresponding elements of both vectors
		*/
		auto fetch = [=] __cuda_callable__ ( int i )->bool { return ( u_view[ i ] == v_view[ i ] ); };

		/***
		* Reduce performs logical AND on intermediate results obtained by fetch.
		*/
		auto reduce = [] __cuda_callable__ ( bool& a, const bool& b ) { a = ( a && b ); };
		auto volatileReduce = [=] __cuda_callable__ ( volatile bool& a, const volatile bool& b ) { a = ( a && b ); };
		return Reduction< Device >::reduce( v_view.getSize(), reduce, volatileReduce, fetch, true );
		}

		int main( int argc, char* argv[] )
		{
		Vector< double, Devices::Host > host_u( 10 ), host_v( 10 );
		host_u = 1.0;
		host_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } );
		std::cout << "host_u = " << host_u << std::endl;
		std::cout << "host_v = " << host_v << std::endl;
		std::cout << "Comparison of host_u and host_v is: " << ( comparison( host_u, host_v ) ? "'true'" : "'false'" ) << "." << std::endl;
		std::cout << "Comparison of host_u and host_u is: " << ( comparison( host_u, host_u ) ? "'true'" : "'false'" ) << "." << std::endl;
		#ifdef HAVE_CUDA
		Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 );
		cuda_u = 1.0;
		cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } );
		std::cout << "cuda_u = " << cuda_u << std::endl;
		std::cout << "cuda_v = " << cuda_v << std::endl;
		std::cout << "Comparison of cuda_u and cuda_v is: " << ( comparison( cuda_u, cuda_v ) ? "'true'" : "'false'" ) << "." << std::endl;
		std::cout << "Comparison of cuda_u and cuda_u is: " << ( comparison( cuda_u, cuda_u ) ? "'true'" : "'false'" ) << "." << std::endl;
		#endif
		return EXIT_SUCCESS;
		}

Documentation/Tutorials/Reduction/ComparisonExample.cu

0 → 120000

+1 −0

Original line number	Diff line number	Diff line
		ComparisonExample.cpp
		No newline at end of file

Documentation/Tutorials/Reduction/MaximumNormExample.cpp

+3 −3

Original line number	Diff line number	Diff line
		@@ -14,19 +14,19 @@ double maximumNorm( const Vector< double, Device >& v )
		auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); };
		auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a = max( a, b ); };
		auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a = max( a ,b ); };
		return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 0.0 );
		return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 0.0 );
		}

		int main( int argc, char* argv[] )
		{
		Vector< double, Devices::Host > host_v( 10 );
		host_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } );
		std::cout << "host_v = " << host_v << std::cout;
		std::cout << "host_v = " << host_v << std::endl;
		std::cout << "The maximum norm of the host vector elements is " << maximumNorm( host_v ) << "." << std::endl;
		#ifdef HAVE_CUDA
		Vector< double, Devices::Cuda > cuda_v( 10 );
		cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } );
		std::cout << "cuda_v = " << cuda_v << std::cout;
		std::cout << "cuda_v = " << cuda_v << std::endl;
		std::cout << "The maximum norm of the CUDA vector elements is " << maximumNorm( cuda_v ) << "." << std::endl;
		#endif
		return EXIT_SUCCESS;

Documentation/Tutorials/Reduction/ProductExample.cpp

+14 −3

Original line number	Diff line number	Diff line
		@@ -14,19 +14,30 @@ double product( const Vector< double, Device >& v )
		auto fetch = [=] __cuda_callable__ ( int i ) { return view[ i ]; };
		auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a *= b; };
		auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a *= b; };
		return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 1.0 );

		/***
		* Since we compute the product of all elements, the reduction must be initialized by 1.0 not by 0.0.
		*/
		return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 1.0 );
		}

		int main( int argc, char* argv[] )
		{
		/***
		* The first test on CPU ...
		*/
		Vector< double, Devices::Host > host_v( 10 );
		host_v = 1.0;
		std::cout << "host_v = " << host_v << std::cout;
		std::cout << "host_v = " << host_v << std::endl;
		std::cout << "The product of the host vector elements is " << product( host_v ) << "." << std::endl;

		/***
		* ... the second test on GPU.
		*/
		#ifdef HAVE_CUDA
		Vector< double, Devices::Cuda > cuda_v( 10 );
		cuda_v = 1.0;
		std::cout << "cuda_v = " << cuda_v << std::cout;
		std::cout << "cuda_v = " << cuda_v << std::endl;
		std::cout << "The product of the CUDA vector elements is " << product( cuda_v ) << "." << std::endl;
		#endif
		return EXIT_SUCCESS;