From fa023cc20bf03ee735257a8ce2defcda301eb97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkovsky@mmg.fjfi.cvut.cz> Date: Sat, 8 Jan 2022 11:01:28 +0100 Subject: [PATCH] Fixed ParallelForExample The example should be the same for both .cpp and .cu, otherwise the output might be inconsistent (e.g. when .cpp is included, but .cu is compiled and its output is used). --- .../Examples/Algorithms/ParallelForExample.cu | 60 +------------------ 1 file changed, 1 insertion(+), 59 deletions(-) mode change 100644 => 120000 Documentation/Examples/Algorithms/ParallelForExample.cu diff --git a/Documentation/Examples/Algorithms/ParallelForExample.cu b/Documentation/Examples/Algorithms/ParallelForExample.cu deleted file mode 100644 index 316ea3cb98..0000000000 --- a/Documentation/Examples/Algorithms/ParallelForExample.cu +++ /dev/null @@ -1,59 +0,0 @@ -#include <iostream> -#include <cstdlib> -#include <TNL/Containers/Vector.h> -#include <TNL/Algorithms/ParallelFor.h> - -using namespace TNL; -using namespace TNL::Containers; -using namespace TNL::Algorithms; - -template< typename Device > -void vectorSum( const Vector< double, Device >& v1, - const Vector< double, Device >& v2, - const double& c, - Vector< double, Device >& result ) -{ - /**** - * Get vectors view which can be captured by lambda. - */ - auto v1_view = v1.getConstView(); - auto v2_view = v2.getConstView(); - auto result_view = result.getView(); - - /**** - * The sum function. - */ - auto sum = [=] __cuda_callable__ ( int i, const double c ) mutable { - result_view[ i ] = v1_view[ i ] + v2_view[ i ] + c; }; - - ParallelFor< Device >::exec( 0, v1.getSize(), sum, c ); -} - -int main( int argc, char* argv[] ) -{ - /*** - * Firstly, test the vectors sum on CPU. - */ - Vector< double, Devices::Host > host_v1( 10 ), host_v2( 10 ), host_result( 10 ); - host_v1 = 1.0; - host_v2.forAllElements( []__cuda_callable__ ( int i, double& v ) { v = i; } ); - vectorSum( host_v1, host_v2, 2.0, host_result ); - std::cout << "host_v1 = " << host_v1 << std::endl; - std::cout << "host_v2 = " << host_v2 << std::endl; - std::cout << "The sum of the vectors on CPU is " << host_result << "." << std::endl; - - /*** - * And then also on GPU. - */ -#ifdef HAVE_CUDA - Vector< double, Devices::Cuda > cuda_v1( 10 ), cuda_v2( 10 ), cuda_result( 10 ); - cuda_v1 = 1.0; - cuda_v2.forAllElements( []__cuda_callable__ ( int i, double& v ) { v = i; } ); - vectorSum( cuda_v1, cuda_v2, 2.0, cuda_result ); - std::cout << "cuda_v1 = " << cuda_v1 << std::endl; - std::cout << "cuda_v2 = " << cuda_v2 << std::endl; - std::cout << "The sum of the vectors on GPU is " << cuda_result << "." << std::endl; -#endif - return EXIT_SUCCESS; -} - diff --git a/Documentation/Examples/Algorithms/ParallelForExample.cu b/Documentation/Examples/Algorithms/ParallelForExample.cu new file mode 120000 index 0000000000..fba5e08163 --- /dev/null +++ b/Documentation/Examples/Algorithms/ParallelForExample.cu @@ -0,0 +1 @@ +ParallelForExample.cpp \ No newline at end of file -- GitLab