Optimizing CSRScalarKernel for OpenMP. (9c559180) · Commits · TNL / tnl-dev

src/Benchmarks/SpMV/ReferenceFormats/Legacy/CSR_impl.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -1715,7 +1715,6 @@ void SpMVCSRAdaptive( const Real *inVector,
		return;

		Real result = 0.0;
		bool compute( true );
		const Index laneID = threadIdx.x & 31; // & is cheaper than %
		Block<Index> block = blocks[blockIdx];
		const Index minID = rowPointers[block.index[0]/* minRow */];

+21 −1

Original line number	Diff line number	Diff line
		@@ -94,6 +94,26 @@ segmentsReduction( const OffsetsView& offsets,
		aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
		keeper( segmentIdx, aux );
		};

		if( std::is_same< DeviceType, TNL::Devices::Host >::value )
		{
		#ifdef HAVE_OPENMP
		#pragma omp parallel for firstprivate( l ) schedule( dynamic, 100 ), if( Devices::Host::isOMPEnabled() )
		#endif
		for( Index segmentIdx = first; segmentIdx < last; segmentIdx ++ )
		l( segmentIdx, args... );
		/*{
		const IndexType begin = offsets[ segmentIdx ];
		const IndexType end = offsets[ segmentIdx + 1 ];
		Real aux( zero );
		IndexType localIdx( 0 );
		bool compute( true );
		for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ )
		aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
		keeper( segmentIdx, aux );
		}*/
		}
		else
		Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
		}
		} // namespace Segments