Loading Documentation/Tutorials/Reduction/CMakeLists.txt +11 −1 Original line number Diff line number Diff line Loading @@ -7,12 +7,22 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ScalarProductExample > ScalarProductExample.out OUTPUT ScalarProductExample.out ) CUDA_ADD_EXECUTABLE( MaximumNormExample MaximumNormExample.cu ) ADD_CUSTOM_COMMAND( COMMAND MaximumNormExample > MaximumNormExample.out OUTPUT MaximumNormExample.out ) CUDA_ADD_EXECUTABLE( ComparisonExample ComparisonExample.cu ) ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ComparisonExample.out OUTPUT ComparisonExample.out ) # CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu ) # ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) ENDIF() ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS SumExample.out ProductExample.out ScalarProductExample.out MaximumNormExample.out ) MaximumNormExample.out ComparisonExample.out UpdateAndResidueExample.out ) ENDIF() Documentation/Tutorials/Reduction/ComparisonExample.cpp 0 → 100644 +49 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v ) { auto u_view = u.getView(); auto v_view = v.getView(); /*** * Fetch compares corresponding elements of both vectors */ auto fetch = [=] __cuda_callable__ ( int i )->bool { return ( u_view[ i ] == v_view[ i ] ); }; /*** * Reduce performs logical AND on intermediate results obtained by fetch. */ auto reduce = [] __cuda_callable__ ( bool& a, const bool& b ) { a = ( a && b ); }; auto volatileReduce = [=] __cuda_callable__ ( volatile bool& a, const volatile bool& b ) { a = ( a && b ); }; return Reduction< Device >::reduce( v_view.getSize(), reduce, volatileReduce, fetch, true ); } int main( int argc, char* argv[] ) { Vector< double, Devices::Host > host_u( 10 ), host_v( 10 ); host_u = 1.0; host_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } ); std::cout << "host_u = " << host_u << std::endl; std::cout << "host_v = " << host_v << std::endl; std::cout << "Comparison of host_u and host_v is: " << ( comparison( host_u, host_v ) ? "'true'" : "'false'" ) << "." << std::endl; std::cout << "Comparison of host_u and host_u is: " << ( comparison( host_u, host_u ) ? "'true'" : "'false'" ) << "." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 ); cuda_u = 1.0; cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } ); std::cout << "cuda_u = " << cuda_u << std::endl; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "Comparison of cuda_u and cuda_v is: " << ( comparison( cuda_u, cuda_v ) ? "'true'" : "'false'" ) << "." << std::endl; std::cout << "Comparison of cuda_u and cuda_u is: " << ( comparison( cuda_u, cuda_u ) ? "'true'" : "'false'" ) << "." << std::endl; #endif return EXIT_SUCCESS; } Documentation/Tutorials/Reduction/ComparisonExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line ComparisonExample.cpp No newline at end of file Documentation/Tutorials/Reduction/MaximumNormExample.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -14,19 +14,19 @@ double maximumNorm( const Vector< double, Device >& v ) auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a = max( a, b ); }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a = max( a ,b ); }; return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 0.0 ); return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Vector< double, Devices::Host > host_v( 10 ); host_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } ); std::cout << "host_v = " << host_v << std::cout; std::cout << "host_v = " << host_v << std::endl; std::cout << "The maximum norm of the host vector elements is " << maximumNorm( host_v ) << "." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_v( 10 ); cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } ); std::cout << "cuda_v = " << cuda_v << std::cout; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "The maximum norm of the CUDA vector elements is " << maximumNorm( cuda_v ) << "." << std::endl; #endif return EXIT_SUCCESS; Loading Documentation/Tutorials/Reduction/ProductExample.cpp +14 −3 Original line number Diff line number Diff line Loading @@ -14,19 +14,30 @@ double product( const Vector< double, Device >& v ) auto fetch = [=] __cuda_callable__ ( int i ) { return view[ i ]; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a *= b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a *= b; }; return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 1.0 ); /*** * Since we compute the product of all elements, the reduction must be initialized by 1.0 not by 0.0. */ return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 1.0 ); } int main( int argc, char* argv[] ) { /*** * The first test on CPU ... */ Vector< double, Devices::Host > host_v( 10 ); host_v = 1.0; std::cout << "host_v = " << host_v << std::cout; std::cout << "host_v = " << host_v << std::endl; std::cout << "The product of the host vector elements is " << product( host_v ) << "." << std::endl; /*** * ... the second test on GPU. */ #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_v( 10 ); cuda_v = 1.0; std::cout << "cuda_v = " << cuda_v << std::cout; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "The product of the CUDA vector elements is " << product( cuda_v ) << "." << std::endl; #endif return EXIT_SUCCESS; Loading Loading
Documentation/Tutorials/Reduction/CMakeLists.txt +11 −1 Original line number Diff line number Diff line Loading @@ -7,12 +7,22 @@ IF( BUILD_CUDA ) ADD_CUSTOM_COMMAND( COMMAND ScalarProductExample > ScalarProductExample.out OUTPUT ScalarProductExample.out ) CUDA_ADD_EXECUTABLE( MaximumNormExample MaximumNormExample.cu ) ADD_CUSTOM_COMMAND( COMMAND MaximumNormExample > MaximumNormExample.out OUTPUT MaximumNormExample.out ) CUDA_ADD_EXECUTABLE( ComparisonExample ComparisonExample.cu ) ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ComparisonExample.out OUTPUT ComparisonExample.out ) # CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu ) # ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) ENDIF() ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cpp ) ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out ) IF( BUILD_CUDA ) ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS SumExample.out ProductExample.out ScalarProductExample.out MaximumNormExample.out ) MaximumNormExample.out ComparisonExample.out UpdateAndResidueExample.out ) ENDIF()
Documentation/Tutorials/Reduction/ComparisonExample.cpp 0 → 100644 +49 −0 Original line number Diff line number Diff line #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> #include <TNL/Containers/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; using namespace TNL::Containers::Algorithms; template< typename Device > bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v ) { auto u_view = u.getView(); auto v_view = v.getView(); /*** * Fetch compares corresponding elements of both vectors */ auto fetch = [=] __cuda_callable__ ( int i )->bool { return ( u_view[ i ] == v_view[ i ] ); }; /*** * Reduce performs logical AND on intermediate results obtained by fetch. */ auto reduce = [] __cuda_callable__ ( bool& a, const bool& b ) { a = ( a && b ); }; auto volatileReduce = [=] __cuda_callable__ ( volatile bool& a, const volatile bool& b ) { a = ( a && b ); }; return Reduction< Device >::reduce( v_view.getSize(), reduce, volatileReduce, fetch, true ); } int main( int argc, char* argv[] ) { Vector< double, Devices::Host > host_u( 10 ), host_v( 10 ); host_u = 1.0; host_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } ); std::cout << "host_u = " << host_u << std::endl; std::cout << "host_v = " << host_v << std::endl; std::cout << "Comparison of host_u and host_v is: " << ( comparison( host_u, host_v ) ? "'true'" : "'false'" ) << "." << std::endl; std::cout << "Comparison of host_u and host_u is: " << ( comparison( host_u, host_u ) ? "'true'" : "'false'" ) << "." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 ); cuda_u = 1.0; cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } ); std::cout << "cuda_u = " << cuda_u << std::endl; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "Comparison of cuda_u and cuda_v is: " << ( comparison( cuda_u, cuda_v ) ? "'true'" : "'false'" ) << "." << std::endl; std::cout << "Comparison of cuda_u and cuda_u is: " << ( comparison( cuda_u, cuda_u ) ? "'true'" : "'false'" ) << "." << std::endl; #endif return EXIT_SUCCESS; }
Documentation/Tutorials/Reduction/ComparisonExample.cu 0 → 120000 +1 −0 Original line number Diff line number Diff line ComparisonExample.cpp No newline at end of file
Documentation/Tutorials/Reduction/MaximumNormExample.cpp +3 −3 Original line number Diff line number Diff line Loading @@ -14,19 +14,19 @@ double maximumNorm( const Vector< double, Device >& v ) auto fetch = [=] __cuda_callable__ ( int i ) { return abs( view[ i ] ); }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a = max( a, b ); }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a = max( a ,b ); }; return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 0.0 ); return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 0.0 ); } int main( int argc, char* argv[] ) { Vector< double, Devices::Host > host_v( 10 ); host_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } ); std::cout << "host_v = " << host_v << std::cout; std::cout << "host_v = " << host_v << std::endl; std::cout << "The maximum norm of the host vector elements is " << maximumNorm( host_v ) << "." << std::endl; #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_v( 10 ); cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return i - 7; } ); std::cout << "cuda_v = " << cuda_v << std::cout; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "The maximum norm of the CUDA vector elements is " << maximumNorm( cuda_v ) << "." << std::endl; #endif return EXIT_SUCCESS; Loading
Documentation/Tutorials/Reduction/ProductExample.cpp +14 −3 Original line number Diff line number Diff line Loading @@ -14,19 +14,30 @@ double product( const Vector< double, Device >& v ) auto fetch = [=] __cuda_callable__ ( int i ) { return view[ i ]; }; auto reduce = [] __cuda_callable__ ( double& a, const double& b ) { a *= b; }; auto volatileReduce = [=] __cuda_callable__ ( volatile double& a, const volatile double& b ) { a *= b; }; return Reduction< Device >::reduce( v.getSize(), reduce, volatileReduce, fetch, 1.0 ); /*** * Since we compute the product of all elements, the reduction must be initialized by 1.0 not by 0.0. */ return Reduction< Device >::reduce( view.getSize(), reduce, volatileReduce, fetch, 1.0 ); } int main( int argc, char* argv[] ) { /*** * The first test on CPU ... */ Vector< double, Devices::Host > host_v( 10 ); host_v = 1.0; std::cout << "host_v = " << host_v << std::cout; std::cout << "host_v = " << host_v << std::endl; std::cout << "The product of the host vector elements is " << product( host_v ) << "." << std::endl; /*** * ... the second test on GPU. */ #ifdef HAVE_CUDA Vector< double, Devices::Cuda > cuda_v( 10 ); cuda_v = 1.0; std::cout << "cuda_v = " << cuda_v << std::cout; std::cout << "cuda_v = " << cuda_v << std::endl; std::cout << "The product of the CUDA vector elements is " << product( cuda_v ) << "." << std::endl; #endif return EXIT_SUCCESS; Loading