Loading src/TNL/Containers/Segments/CSRView.hpp +3 −2 Original line number Diff line number Diff line Loading @@ -195,15 +195,16 @@ void CSRView< Device, Index >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading src/TNL/Containers/Segments/EllpackView.hpp +5 −3 Original line number Diff line number Diff line Loading @@ -245,7 +245,7 @@ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; Loading @@ -253,9 +253,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -268,9 +269,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i; const IndexType end = storageSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j += alignedSize ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading src/TNL/Containers/Segments/SlicedEllpackView.hpp +5 −3 Original line number Diff line number Diff line Loading @@ -293,7 +293,7 @@ void SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading @@ -305,9 +305,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -321,9 +322,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading src/TNL/Matrices/SparseMatrix.hpp +3 −3 Original line number Diff line number Diff line Loading @@ -417,12 +417,12 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ offset ]; auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; return valuesView[ offset ] * inVectorView[ column ]; return valuesView[ globalIdx ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; Loading src/UnitTests/Containers/Segments/SegmentsTest.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments() const auto v_view = v.getConstView(); auto result_view = result.getView(); auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { return v_view[ globalIdx ]; }; auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { Loading Loading
src/TNL/Containers/Segments/CSRView.hpp +3 −2 Original line number Diff line number Diff line Loading @@ -195,15 +195,16 @@ void CSRView< Device, Index >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading
src/TNL/Containers/Segments/EllpackView.hpp +5 −3 Original line number Diff line number Diff line Loading @@ -245,7 +245,7 @@ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; Loading @@ -253,9 +253,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -268,9 +269,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i; const IndexType end = storageSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j += alignedSize ) reduction( aux, fetch( i, j, compute, args... ) ); reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading
src/TNL/Containers/Segments/SlicedEllpackView.hpp +5 −3 Original line number Diff line number Diff line Loading @@ -293,7 +293,7 @@ void SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading @@ -305,9 +305,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -321,9 +322,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading
src/TNL/Matrices/SparseMatrix.hpp +3 −3 Original line number Diff line number Diff line Loading @@ -417,12 +417,12 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ offset ]; auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; return valuesView[ offset ] * inVectorView[ column ]; return valuesView[ globalIdx ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; Loading
src/UnitTests/Containers/Segments/SegmentsTest.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments() const auto v_view = v.getConstView(); auto result_view = result.getView(); auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { return v_view[ globalIdx ]; }; auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { Loading