Commit 9c559180 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Optimizing CSRScalarKernel for OpenMP.

parent a848a69d
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -1715,7 +1715,6 @@ void SpMVCSRAdaptive( const Real *inVector,
      return;

   Real result = 0.0;
   bool compute( true );
   const Index laneID = threadIdx.x & 31; // & is cheaper than %
   Block<Index> block = blocks[blockIdx];
   const Index minID = rowPointers[block.index[0]/* minRow */];
+21 −1
Original line number Diff line number Diff line
@@ -94,6 +94,26 @@ segmentsReduction( const OffsetsView& offsets,
            aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
        keeper( segmentIdx, aux );
    };

     if( std::is_same< DeviceType, TNL::Devices::Host >::value )
    {
#ifdef HAVE_OPENMP
        #pragma omp parallel for firstprivate( l ) schedule( dynamic, 100 ), if( Devices::Host::isOMPEnabled() )
#endif
        for( Index segmentIdx = first; segmentIdx < last; segmentIdx ++ )
            l( segmentIdx, args... );
        /*{
            const IndexType begin = offsets[ segmentIdx ];
            const IndexType end = offsets[ segmentIdx + 1 ];
            Real aux( zero );
            IndexType localIdx( 0 );
            bool compute( true );
            for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
                aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
            keeper( segmentIdx, aux );
        }*/
    }
    else
        Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
}
      } // namespace Segments