Commit e07a0168 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Added boolean compute to stop segment reduction.

parent 30a8311f
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -218,14 +218,15 @@ void
CSR< Device, Index, IndexAllocator >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = decltype( fetch( IndexType(), IndexType() ) );
   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
   const auto offsetsView = this->offsets.getConstView();
   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
      const IndexType begin = offsetsView[ i ];
      const IndexType end = offsetsView[ i + 1 ];
      RealType aux( zero );
      for( IndexType j = begin; j < end; j++  )
         reduction( aux, fetch( i, j, args... ) );
      bool compute( true );
      for( IndexType j = begin; j < end && compute; j++  )
         reduction( aux, fetch( i, j, compute, args... ) );
      keeper( i, aux );
   };
   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+4 −3
Original line number Diff line number Diff line
@@ -204,14 +204,15 @@ void
CSRView< Device, Index >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = decltype( fetch( IndexType(), IndexType() ) );
   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
   const auto offsetsView = this->offsets.getConstView();
   auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
      const IndexType begin = offsetsView[ i ];
      const IndexType end = offsetsView[ i + 1 ];
      RealType aux( zero );
      for( IndexType j = begin; j < end; j++  )
         reduction( aux, fetch( i, j, args... ) );
      bool compute( true );
      for( IndexType j = begin; j < end && compute; j++  )
         reduction( aux, fetch( i, j, compute, args... ) );
      keeper( i, aux );
   };
   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+7 −6
Original line number Diff line number Diff line
@@ -306,31 +306,32 @@ void
Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
   if( RowMajorOrder )
   {
      using RealType = decltype( fetch( IndexType(), IndexType() ) );
      const IndexType segmentSize = this->segmentSize;
      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
         const IndexType begin = i * segmentSize;
         const IndexType end = begin + segmentSize;
         RealType aux( zero );
         for( IndexType j = begin; j < end; j++  )
            reduction( aux, fetch( i, j, args... ) );
         bool compute( true );
         for( IndexType j = begin; j < end && compute; j++  )
            reduction( aux, fetch( i, j, compute, args... ) );
         keeper( i, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
   }
   else
   {
      using RealType = decltype( fetch( IndexType(), IndexType() ) );
      const IndexType storageSize = this->getStorageSize();
      const IndexType alignedSize = this->alignedSize;
      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
         const IndexType begin = i;
         const IndexType end = storageSize;
         RealType aux( zero );
         for( IndexType j = begin; j < end; j += alignedSize  )
            reduction( aux, fetch( i, j, args... ) );
         bool compute( true );
         for( IndexType j = begin; j < end && compute; j += alignedSize  )
            reduction( aux, fetch( i, j, compute, args... ) );
         keeper( i, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+7 −6
Original line number Diff line number Diff line
@@ -245,31 +245,32 @@ void
EllpackView< Device, Index, RowMajorOrder, Alignment >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
   if( RowMajorOrder )
   {
      using RealType = decltype( fetch( IndexType(), IndexType() ) );
      const IndexType segmentSize = this->segmentSize;
      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
         const IndexType begin = i * segmentSize;
         const IndexType end = begin + segmentSize;
         RealType aux( zero );
         for( IndexType j = begin; j < end; j++  )
            reduction( aux, fetch( i, j, args... ) );
         bool compute( true );
         for( IndexType j = begin; j < end && compute; j++  )
            reduction( aux, fetch( i, j, compute, args... ) );
         keeper( i, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
   }
   else
   {
      using RealType = decltype( fetch( IndexType(), IndexType() ) );
      const IndexType storageSize = this->getStorageSize();
      const IndexType alignedSize = this->alignedSize;
      auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable {
         const IndexType begin = i;
         const IndexType end = storageSize;
         RealType aux( zero );
         for( IndexType j = begin; j < end; j += alignedSize  )
            reduction( aux, fetch( i, j, args... ) );
         bool compute( true );
         for( IndexType j = begin; j < end && compute; j += alignedSize  )
            reduction( aux, fetch( i, j, compute, args... ) );
         keeper( i, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+8 −6
Original line number Diff line number Diff line
@@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes )
   const auto sizes_view = sizes.getConstView();
   auto slices_view = this->sliceOffsets.getView();
   auto slice_segment_size_view = this->sliceSegmentSizes.getView();
   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType {
   auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType {
      if( globalIdx < _size )
         return sizes_view[ globalIdx ];
      return 0;
@@ -341,7 +341,7 @@ void
SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::
segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
{
   using RealType = decltype( fetch( IndexType(), IndexType() ) );
   using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) );
   const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
   const auto sliceOffsets_view = this->sliceOffsets.getConstView();
   if( RowMajorOrder )
@@ -353,8 +353,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
         const IndexType end = begin + segmentSize;
         RealType aux( zero );
         for( IndexType globalIdx = begin; globalIdx< end; globalIdx++  )
            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
         bool compute( true );
         for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++  )
            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
         keeper( segmentIdx, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
@@ -368,8 +369,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red
         const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
         const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
         RealType aux( zero );
         for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize  )
            reduction( aux, fetch( segmentIdx, globalIdx, args... ) );
         bool compute( true );
         for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize  )
            reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) );
         keeper( segmentIdx, aux );
      };
      Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
Loading