From 9c5591804172c2bc3440ac1a4c138d245cd65c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Sun, 7 Mar 2021 20:21:17 +0100 Subject: [PATCH] Optimizing CSRScalarKernel for OpenMP. --- .../SpMV/ReferenceFormats/Legacy/CSR_impl.h | 1 - .../Algorithms/Segments/CSRScalarKernel.hpp | 22 ++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h index 827e2c3116..8d15b49d9c 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h @@ -1715,7 +1715,6 @@ void SpMVCSRAdaptive( const Real *inVector, return; Real result = 0.0; - bool compute( true ); const Index laneID = threadIdx.x & 31; // & is cheaper than % Block<Index> block = blocks[blockIdx]; const Index minID = rowPointers[block.index[0]/* minRow */]; diff --git a/src/TNL/Algorithms/Segments/CSRScalarKernel.hpp b/src/TNL/Algorithms/Segments/CSRScalarKernel.hpp index 75fda2e440..15f6966797 100644 --- a/src/TNL/Algorithms/Segments/CSRScalarKernel.hpp +++ b/src/TNL/Algorithms/Segments/CSRScalarKernel.hpp @@ -94,7 +94,27 @@ segmentsReduction( const OffsetsView& offsets, aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; - Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + + if( std::is_same< DeviceType, TNL::Devices::Host >::value ) + { +#ifdef HAVE_OPENMP + #pragma omp parallel for firstprivate( l ) schedule( dynamic, 100 ), if( Devices::Host::isOMPEnabled() ) +#endif + for( Index segmentIdx = first; segmentIdx < last; segmentIdx ++ ) + l( segmentIdx, args... ); + /*{ + const IndexType begin = offsets[ segmentIdx ]; + const IndexType end = offsets[ segmentIdx + 1 ]; + Real aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); + keeper( segmentIdx, aux ); + }*/ + } + else + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } // namespace Segments } // namespace Algorithms -- GitLab