Loading Documentation/Tutorials/Reduction/CMakeLists.txt +13 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,14 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ComparisonExample.out OUTPUT ComparisonExample.out ) CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-1 MaskAndReduceExample-1.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-1 > MaskAndReduceExample-1.out OUTPUT MaskAndReduceExample-1.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-2 MaskAndReduceExample-2.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-2 > MaskAndReduceExample-2.out OUTPUT MaskAndReduceExample-2.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-3 MaskAndReduceExample-3.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-3 > MaskAndReduceExample-3.out OUTPUT MaskAndReduceExample-3.out ) ADD_EXECUTABLE( ReductionInMethodsExample ReductionInMethodsExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND ReductionInMethodsExample > ReductionInMethodsExample.out OUTPUT ReductionInMethodsExample.out ) ENDIF() IF( BUILD_CUDA ) Loading @@ -20,5 +28,9 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS ScalarProductExample.out MaximumNormExample.out ComparisonExample.out UpdateAndResidueExample.out ) UpdateAndResidueExample.out MaskAndReduceExample-1.out MaskAndReduceExample-2.out MaskAndReduceExample-3.out ReductionInMethodsExample.out ) ENDIF() Documentation/Tutorials/Reduction/MaskAndReduceExample-1.cpp 0 → 100644 +39 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > double maskAndReduce( Vector< double, Device >& u ) { auto u_view = u.getView(); auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ i ] > 0 ? u_view[ i ] : 0.0; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a += b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a += b; }; return Reduction< Device >::reduce( u_view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Timer timer; Vector< double, Devices::Host > host_u( 10 ); host_u.evaluate( [] __cuda_callable__ ( int i ) { return sin( ( double ) i ); } ); double result = maskAndReduce( host_u ); std::cout << "host_u = " << host_u << std::endl; std::cout << "Sum of the positive numbers is:" << result << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 10 ); cuda_u = host_u; result = maskAndReduce( cuda_u ); std::cout << "cuda_u = " << cuda_u << std::endl; std::cout << "Sum of the positive numbers is:" << result << std::endl; #endif return EXIT_SUCCESS; } Documentation/Tutorials/Reduction/MaskAndReduceExample-1.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line MaskAndReduceExample-1.cpp No newline at end of file Documentation/Tutorials/Reduction/MaskAndReduceExample-2.cpp 0 → 100644 +43 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > double maskAndReduce( Vector< double, Device >& u ) { auto u_view = u.getView(); auto fetch = [=] __cuda_callable__ ( int i )->double { if( i % 2 == 0 ) return u_view[ i ]; return 0.0; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a += b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a += b; }; return Reduction< Device >::reduce( u_view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Timer timer; Vector< double, Devices::Host > host_u( 100000 ); host_u = 1.0; timer.start(); double result = maskAndReduce( host_u ); timer.stop(); std::cout << "Host tesult is:" << result << ". It took " << timer.getRealTime() << "seconds." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 100000 ); cuda_u = 1.0; timer.reset(); timer.start(); result = maskAndReduce( cuda_u ); timer.stop(); std::cout << "CUDA result is:" << result << ". It took " << timer.getRealTime() << "seconds." << std::endl; #endif return EXIT_SUCCESS; } Documentation/Tutorials/Reduction/MaskAndReduceExample-2.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line MaskAndReduceExample-2.cpp No newline at end of file Loading
Documentation/Tutorials/Reduction/CMakeLists.txt +13 −1 Original line number Diff line number Diff line Loading @@ -11,6 +11,14 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ComparisonExample.out OUTPUT ComparisonExample.out ) CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu ) ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-1 MaskAndReduceExample-1.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-1 > MaskAndReduceExample-1.out OUTPUT MaskAndReduceExample-1.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-2 MaskAndReduceExample-2.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-2 > MaskAndReduceExample-2.out OUTPUT MaskAndReduceExample-2.out ) CUDA_ADD_EXECUTABLE( MaskAndReduceExample-3 MaskAndReduceExample-3.cu ) ADD_CUSTOM_COMMAND( COMMAND MaskAndReduceExample-3 > MaskAndReduceExample-3.out OUTPUT MaskAndReduceExample-3.out ) ADD_EXECUTABLE( ReductionInMethodsExample ReductionInMethodsExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND ReductionInMethodsExample > ReductionInMethodsExample.out OUTPUT ReductionInMethodsExample.out ) ENDIF() IF( BUILD_CUDA ) Loading @@ -20,5 +28,9 @@ ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS ScalarProductExample.out MaximumNormExample.out ComparisonExample.out UpdateAndResidueExample.out ) UpdateAndResidueExample.out MaskAndReduceExample-1.out MaskAndReduceExample-2.out MaskAndReduceExample-3.out ReductionInMethodsExample.out ) ENDIF()
Documentation/Tutorials/Reduction/MaskAndReduceExample-1.cpp 0 → 100644 +39 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > double maskAndReduce( Vector< double, Device >& u ) { auto u_view = u.getView(); auto fetch = [=] __cuda_callable__ ( int i )->double { return u_view[ i ] > 0 ? u_view[ i ] : 0.0; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a += b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a += b; }; return Reduction< Device >::reduce( u_view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Timer timer; Vector< double, Devices::Host > host_u( 10 ); host_u.evaluate( [] __cuda_callable__ ( int i ) { return sin( ( double ) i ); } ); double result = maskAndReduce( host_u ); std::cout << "host_u = " << host_u << std::endl; std::cout << "Sum of the positive numbers is:" << result << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 10 ); cuda_u = host_u; result = maskAndReduce( cuda_u ); std::cout << "cuda_u = " << cuda_u << std::endl; std::cout << "Sum of the positive numbers is:" << result << std::endl; #endif return EXIT_SUCCESS; }
Documentation/Tutorials/Reduction/MaskAndReduceExample-1.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line MaskAndReduceExample-1.cpp No newline at end of file
Documentation/Tutorials/Reduction/MaskAndReduceExample-2.cpp 0 → 100644 +43 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > double maskAndReduce( Vector< double, Device >& u ) { auto u_view = u.getView(); auto fetch = [=] __cuda_callable__ ( int i )->double { if( i % 2 == 0 ) return u_view[ i ]; return 0.0; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a += b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a += b; }; return Reduction< Device >::reduce( u_view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Timer timer; Vector< double, Devices::Host > host_u( 100000 ); host_u = 1.0; timer.start(); double result = maskAndReduce( host_u ); timer.stop(); std::cout << "Host tesult is:" << result << ". It took " << timer.getRealTime() << "seconds." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 100000 ); cuda_u = 1.0; timer.reset(); timer.start(); result = maskAndReduce( cuda_u ); timer.stop(); std::cout << "CUDA result is:" << result << ". It took " << timer.getRealTime() << "seconds." << std::endl; #endif return EXIT_SUCCESS; }
Documentation/Tutorials/Reduction/MaskAndReduceExample-2.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line MaskAndReduceExample-2.cpp No newline at end of file