Loading src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h +0 −1 Original line number Diff line number Diff line Loading @@ -1715,7 +1715,6 @@ void SpMVCSRAdaptive( const Real *inVector, return; Real result = 0.0; bool compute( true ); const Index laneID = threadIdx.x & 31; // & is cheaper than % Block<Index> block = blocks[blockIdx]; const Index minID = rowPointers[block.index[0]/* minRow */]; Loading src/TNL/Algorithms/Segments/CSRScalarKernel.hpp +21 −1 Original line number Diff line number Diff line Loading @@ -94,6 +94,26 @@ segmentsReduction( const OffsetsView& offsets, aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; if( std::is_same< DeviceType, TNL::Devices::Host >::value ) { #ifdef HAVE_OPENMP #pragma omp parallel for firstprivate( l ) schedule( dynamic, 100 ), if( Devices::Host::isOMPEnabled() ) #endif for( Index segmentIdx = first; segmentIdx < last; segmentIdx ++ ) l( segmentIdx, args... ); /*{ const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }*/ } else Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } // namespace Segments Loading Loading
src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h +0 −1 Original line number Diff line number Diff line Loading @@ -1715,7 +1715,6 @@ void SpMVCSRAdaptive( const Real *inVector, return; Real result = 0.0; bool compute( true ); const Index laneID = threadIdx.x & 31; // & is cheaper than % Block<Index> block = blocks[blockIdx]; const Index minID = rowPointers[block.index[0]/* minRow */]; Loading
src/TNL/Algorithms/Segments/CSRScalarKernel.hpp +21 −1 Original line number Diff line number Diff line Loading @@ -94,6 +94,26 @@ segmentsReduction( const OffsetsView& offsets, aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }; if( std::is_same< DeviceType, TNL::Devices::Host >::value ) { #ifdef HAVE_OPENMP #pragma omp parallel for firstprivate( l ) schedule( dynamic, 100 ), if( Devices::Host::isOMPEnabled() ) #endif for( Index segmentIdx = first; segmentIdx < last; segmentIdx ++ ) l( segmentIdx, args... ); /*{ const IndexType begin = offsets[ segmentIdx ]; const IndexType end = offsets[ segmentIdx + 1 ]; Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); keeper( segmentIdx, aux ); }*/ } else Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } } // namespace Segments Loading