diff --git a/Documentation/Examples/CMakeLists.txt b/Documentation/Examples/CMakeLists.txt index e984d2f1f2334ec7cd5cb9c8bf1ec680cab91e7c..7aa7364299a853bc261ebd6fba0b225204218121 100644 --- a/Documentation/Examples/CMakeLists.txt +++ b/Documentation/Examples/CMakeLists.txt @@ -3,7 +3,9 @@ ADD_SUBDIRECTORY( Containers ) ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Matrices ) -set( COMMON_EXAMPLES +set( COMMON_EXAMPLES ) + +set( CUDA_EXAMPLES FileExampleCuda ) @@ -24,7 +26,7 @@ set( HOST_EXAMPLES TimerExampleLogger ) if( BUILD_CUDA ) - foreach( target IN ITEMS ${COMMON_EXAMPLES} ) + foreach( target IN ITEMS ${COMMON_EXAMPLES} ${CUDA_EXAMPLES} ) cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) diff --git a/Documentation/Tutorials/Pointers/CMakeLists.txt b/Documentation/Tutorials/Pointers/CMakeLists.txt index 0535e8fd5df0c242c4df984a483ec6a34dd32e46..9b83841fbf8a928e0ede88273e58bbac8722ce45 100644 --- a/Documentation/Tutorials/Pointers/CMakeLists.txt +++ b/Documentation/Tutorials/Pointers/CMakeLists.txt @@ -1,13 +1,13 @@ IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cu ) - ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) + CUDA_ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cu ) + ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) CUDA_ADD_EXECUTABLE( SharedPointerExample SharedPointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND SharedPointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out ) CUDA_ADD_EXECUTABLE( DevicePointerExample DevicePointerExample.cu ) ADD_CUSTOM_COMMAND( COMMAND DevicePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out ) ELSE() - ADD_EXECUTABLE( UniquePointerExample UniquePointerExample.cpp ) - ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) + ADD_EXECUTABLE( UniquePointerExample_ UniquePointerExample.cpp ) + ADD_CUSTOM_COMMAND( COMMAND UniquePointerExample_ > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out ) ENDIF() ADD_EXECUTABLE( UniquePointerHostExample UniquePointerHostExample.cpp ) diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 1dc375f98938ce1abc68661305667815cf2315fc..aa0ea33941f0c1179b3f482b1a2ee4e1a4537cf0 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -67,7 +67,7 @@ struct export_CSR< Segments, typename TNL::enable_if_type< decltype(Segments{}.g static void e( Scope & s ) { s - .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsHolder& { + .def("getOffsets", []( const Segments& segments ) -> const typename Segments::OffsetsContainer& { return segments.getOffsets(); }, py::return_value_policy::reference_internal) ; diff --git a/src/TNL/Algorithms/Segments/BiEllpackView.h b/src/TNL/Algorithms/Segments/BiEllpackView.h index 91b055e2685c516e8c703de58e72057f0f314748..f14282efb687c4e677faa81c015e6f9ab8576801 100644 --- a/src/TNL/Algorithms/Segments/BiEllpackView.h +++ b/src/TNL/Algorithms/Segments/BiEllpackView.h @@ -15,7 +15,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/ElementsOrganization.h> #include <TNL/Algorithms/Segments/BiEllpackSegmentView.h> -#include <TNL/Algorithms/Segments/details/BiEllpack.h> +#include <TNL/Algorithms/Segments/detail/BiEllpack.h> #include <TNL/Algorithms/Segments/SegmentsPrinting.h> namespace TNL { @@ -205,7 +205,7 @@ class BiEllpackView Real_ zero ); template< typename Index_, typename Fetch_, int BlockDim_, int WarpSize_, bool B_ > - friend struct details::BiEllpackreduceSegmentsDispatcher; + friend struct detail::BiEllpackreduceSegmentsDispatcher; #endif }; diff --git a/src/TNL/Algorithms/Segments/BiEllpackView.hpp b/src/TNL/Algorithms/Segments/BiEllpackView.hpp index 8a1b035aa1122edd39b878fa851b76a46e6485b5..2014ae3dc983dbf025a51a971deaa9dca90d1ef4 100644 --- a/src/TNL/Algorithms/Segments/BiEllpackView.hpp +++ b/src/TNL/Algorithms/Segments/BiEllpackView.hpp @@ -425,7 +425,7 @@ reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& dim3 cudaGridSize = Cuda::getMaxGridSize(); if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - details::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > + detail::BiEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, BlockDim > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( *this, gridIdx, first, last, fetch, reduction, keeper, zero ); cudaThreadSynchronize(); diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp b/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp index 9a08957da19291ce89a6c6e5aac41a89a654e375..6218a451ce0e5e05581768fcc6c6fe12a90d0c60 100644 --- a/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp +++ b/src/TNL/Algorithms/Segments/ChunkedEllpack.hpp @@ -349,7 +349,7 @@ template< typename Device, auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { - return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSize( + return detail::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSize( rowToSliceMapping.getConstView(), slices.getConstView(), rowToChunkMapping.getConstView(), diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpackView.h b/src/TNL/Algorithms/Segments/ChunkedEllpackView.h index ae400d2fed6f7f63467a1ad8035251c6fe73c6c2..0ed8ed413fd8a7992f4428e7405459db3953c618 100644 --- a/src/TNL/Algorithms/Segments/ChunkedEllpackView.h +++ b/src/TNL/Algorithms/Segments/ChunkedEllpackView.h @@ -16,7 +16,7 @@ #include <TNL/Containers/Vector.h> #include <TNL/Algorithms/Segments/ElementsOrganization.h> #include <TNL/Algorithms/Segments/ChunkedEllpackSegmentView.h> -#include <TNL/Algorithms/Segments/details/ChunkedEllpack.h> +#include <TNL/Algorithms/Segments/detail/ChunkedEllpack.h> #include <TNL/Algorithms/Segments/SegmentsPrinting.h> namespace TNL { @@ -228,7 +228,7 @@ class ChunkedEllpackView Real_ zero ); template< typename Index_, typename Fetch_, bool B_ > - friend struct details::ChunkedEllpackreduceSegmentsDispatcher; + friend struct detail::ChunkedEllpackreduceSegmentsDispatcher; #endif }; } // namespace Segments diff --git a/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp b/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp index a48afead54e0e2e20774a25885a0e22c96d019dc..6133a843844b089bc60a16da7181cc8149c14c2a 100644 --- a/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp +++ b/src/TNL/Algorithms/Segments/ChunkedEllpackView.hpp @@ -456,7 +456,7 @@ reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - details::ChunkedEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real > + detail::ChunkedEllpackreduceSegmentsKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( *this, gridIdx, first, last, fetch, reduction, keeper, zero ); } diff --git a/src/TNL/Algorithms/Segments/EllpackView.hpp b/src/TNL/Algorithms/Segments/EllpackView.hpp index e283a75d0ebe2e960bdf522d44e7a59e62f87fa9..b5311d7939e1d49d826b6ba76bffb11af23aff0f 100644 --- a/src/TNL/Algorithms/Segments/EllpackView.hpp +++ b/src/TNL/Algorithms/Segments/EllpackView.hpp @@ -98,7 +98,7 @@ template< typename Index, typename Reduction, typename ResultKeeper, typename Real, - bool FullFetch = details::CheckFetchLambda< Index, Fetch >::hasAllParameters() > + bool FullFetch = detail::CheckFetchLambda< Index, Fetch >::hasAllParameters() > struct EllpackCudaReductionDispatcher { static void @@ -393,7 +393,7 @@ void EllpackView< Device, Index, Organization, Alignment >:: reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const { //using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >() ) ); - using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; + using RealType = typename detail::FetchLambdaAdapter< Index, Fetch >::ReturnType; if( Organization == RowMajorOrder ) { if( std::is_same< Device, Devices::Cuda >::value ) @@ -404,11 +404,11 @@ reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& auto l = [=] __cuda_callable__ ( const IndexType segmentIdx ) mutable { const IndexType begin = segmentIdx * segmentSize; const IndexType end = begin + segmentSize; - RealType aux( zero ); + Real aux( zero ); IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) - aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, j, compute ) ); + aux = reduction( aux, detail::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, j, compute ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l ); diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRAdaptiveKernelView.hpp b/src/TNL/Algorithms/Segments/Kernels/CSRAdaptiveKernelView.hpp index f213e9523548ea4607b10c5916fd3c53b7f46b1d..9c495fd70ad15b17db8219c69cc0638b0c42d7fe 100644 --- a/src/TNL/Algorithms/Segments/Kernels/CSRAdaptiveKernelView.hpp +++ b/src/TNL/Algorithms/Segments/Kernels/CSRAdaptiveKernelView.hpp @@ -237,7 +237,7 @@ struct CSRAdaptiveKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reductio Index blocksCount; - const Index threads = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize(); + const Index threads = detail::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize(); constexpr size_t maxGridSize = TNL::Cuda::getMaxGridXSize(); // Fill blocks diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp b/src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp index 07225cc4e05ec95ccbd7ec8344f61ba456a9d010..68198f995f4a8daecd67b8e05881432aacc4372d 100644 --- a/src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp +++ b/src/TNL/Algorithms/Segments/Kernels/CSRHybridKernel.hpp @@ -111,7 +111,7 @@ void reduceSegmentsCSRHybridMultivectorKernel( Index localIdx = laneIdx; for( Index globalIdx = beginIdx + laneIdx; globalIdx < endIdx && compute; globalIdx += ThreadsPerSegment ) { - result = reduce( result, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) ); + result = reduce( result, detail::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) ); localIdx += ThreadsPerSegment; } result += __shfl_down_sync(0xFFFFFFFF, result, 16); diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h index 50322b8261573ddae44057523fd2ad3f149c4afe..be5fc1331b22119384a9f390ae2deb04815230c3 100644 --- a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h +++ b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h @@ -14,7 +14,7 @@ #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> -#include <TNL/Algorithms/Segments/details/LambdaAdapter.h> +#include <TNL/Algorithms/Segments/detail/LambdaAdapter.h> namespace TNL { namespace Algorithms { diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp index 1c351828841f196347865c5648d37a38e2c6f0b4..93d3e2800e9d6b6b9277306458ea50c0c897a4e0 100644 --- a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp +++ b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp @@ -14,7 +14,7 @@ #include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Containers/VectorView.h> #include <TNL/Algorithms/ParallelFor.h> -#include <TNL/Algorithms/Segments/details/LambdaAdapter.h> +#include <TNL/Algorithms/Segments/detail/LambdaAdapter.h> #include <TNL/Algorithms/Segments/Kernels/CSRLightKernel.h> namespace TNL { @@ -317,7 +317,7 @@ void reduceSegmentsCSRLightMultivectorKernel( Index localIdx = laneIdx; for( Index globalIdx = beginIdx + laneIdx; globalIdx < endIdx && compute; globalIdx += ThreadsPerSegment ) { - result = reduce( result, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) ); + result = reduce( result, detail::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) ); localIdx += ThreadsPerSegment; } result += __shfl_down_sync(0xFFFFFFFF, result, 16); @@ -377,7 +377,7 @@ template< typename Index, typename Reduce, typename Keep, bool DispatchScalarCSR = - details::CheckFetchLambda< Index, Fetch >::hasAllParameters() || + detail::CheckFetchLambda< Index, Fetch >::hasAllParameters() || std::is_same< Device, Devices::Host >::value > struct CSRLightKernelreduceSegmentsDispatcher; diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRScalarKernel.hpp b/src/TNL/Algorithms/Segments/Kernels/CSRScalarKernel.hpp index 5b9c5e723366f135acd6a9ebc66f6143b62b5e4c..e901acfb99529d12a488b13722f6611ade7c30f1 100644 --- a/src/TNL/Algorithms/Segments/Kernels/CSRScalarKernel.hpp +++ b/src/TNL/Algorithms/Segments/Kernels/CSRScalarKernel.hpp @@ -26,7 +26,7 @@ template< typename Index, typename Fetch, typename Reduce, typename Keep, - bool DispatchScalarCSR = details::CheckFetchLambda< Index, Fetch >::hasAllParameters() > + bool DispatchScalarCSR = detail::CheckFetchLambda< Index, Fetch >::hasAllParameters() > struct CSRScalarKernelreduceSegmentsDispatcher; template< typename Index, diff --git a/src/TNL/Algorithms/Segments/SlicedEllpack.hpp b/src/TNL/Algorithms/Segments/SlicedEllpack.hpp index 652eceb566d9460d04a95ed31c93a178f9779083..b1e0a21f37bb18f1fcc0cad57b9ab2a5b64d9fb2 100644 --- a/src/TNL/Algorithms/Segments/SlicedEllpack.hpp +++ b/src/TNL/Algorithms/Segments/SlicedEllpack.hpp @@ -168,7 +168,8 @@ setSegmentsSizes( const SizesHolder& sizes ) slice_segment_size_view[ i ] = res; }; ellpack.reduceAllSegments( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); - this->sliceOffsets.template scan< Algorithms::ScanType::Exclusive >(); + Algorithms::inplaceExclusiveScan( this->sliceOffsets ); + //this->sliceOffsets.template exclusiveScan< Algorithms::detail::ScanType::Exclusive >(); this->size = sum( sizes ); this->alignedSize = this->sliceOffsets.getElement( slicesCount ); } diff --git a/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp b/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp index d7ef9524c85b1697af214383ae7274eedb5309be..80700367c3bed8ac190cbe7f04d3a4de73a6fc68 100644 --- a/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Algorithms/Segments/SlicedEllpackView.hpp @@ -330,7 +330,7 @@ void SlicedEllpackView< Device, Index, Organization, SliceSize >:: reduceSegments( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero ) const { - using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; + using RealType = typename detail::FetchLambdaAdapter< Index, Fetch >::ReturnType; //using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >() ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); diff --git a/src/TNL/Algorithms/Segments/detail/BiEllpack.h b/src/TNL/Algorithms/Segments/detail/BiEllpack.h index db64d392d99d85c6b8aba5938566fdf072144434..f5f51f020ae624b96643f39b3e8b414cee1c1325 100644 --- a/src/TNL/Algorithms/Segments/detail/BiEllpack.h +++ b/src/TNL/Algorithms/Segments/detail/BiEllpack.h @@ -292,7 +292,7 @@ template< typename Index, typename Fetch, int BlockDim = 256, int WarpSize = 32, - bool HasAllParameters = details::CheckFetchLambda< Index, Fetch >::hasAllParameters() > + bool HasAllParameters = detail::CheckFetchLambda< Index, Fetch >::hasAllParameters() > struct BiEllpackreduceSegmentsDispatcher{}; template< typename Index, typename Fetch, int BlockDim, int WarpSize > diff --git a/src/TNL/Algorithms/Segments/detail/ChunkedEllpack.h b/src/TNL/Algorithms/Segments/detail/ChunkedEllpack.h index d9a6c30f21f5256f8874723920e8977b0ac5e7d4..ed6163f3fac3251d08971cd49c63743331f6402d 100644 --- a/src/TNL/Algorithms/Segments/detail/ChunkedEllpack.h +++ b/src/TNL/Algorithms/Segments/detail/ChunkedEllpack.h @@ -65,7 +65,7 @@ class ChunkedEllpack using OffsetsContainer = Containers::Vector< IndexType, DeviceType, IndexType >; using OffsetsHolderView = typename OffsetsContainer::ConstViewType; using SegmentsSizes = OffsetsContainer; - using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; + using ChunkedEllpackSliceInfoType = detail::ChunkedEllpackSliceInfo< IndexType >; using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ConstViewType; @@ -233,7 +233,7 @@ class ChunkedEllpack #ifdef HAVE_CUDA template< typename Index, typename Fetch, - bool HasAllParameters = details::CheckFetchLambda< Index, Fetch >::hasAllParameters() > + bool HasAllParameters = detail::CheckFetchLambda< Index, Fetch >::hasAllParameters() > struct ChunkedEllpackreduceSegmentsDispatcher{}; template< typename Index, typename Fetch > diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index b8ad993946a307eb0ed691bf86efb1e5661c52ce..3a44269d1d78923c5b0e40091c9b20d9eb303483 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -485,7 +485,7 @@ reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return identity; }; - this->segments.reduceSegments( begin, end, fetch_, reduce, keep, zero ); + this->segments.reduceSegments( begin, end, fetch_, reduce, keep, identity ); } template< typename Real, @@ -502,11 +502,7 @@ reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return identity; }; -<<<<<<< HEAD - this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, identity ); -======= - this->segments.reduceSegments( begin, end, fetch_, reduce, keep, zero ); ->>>>>>> Renaming segmentsReduction to reduceSegments. + this->segments.reduceSegments( begin, end, fetch_, reduce, keep, identity ); } template< typename Real, diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp index 63f49e6c84746add2b368c5381e307d21b06eb0b..e21e420426ad351908abba1c9cd1d6a6c9607465 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp @@ -12,7 +12,7 @@ #include <functional> #include <sstream> -#include <TNL/Algorithms/Reduction.h> +#include <TNL/Algorithms/reduce.h> #include <TNL/Matrices/Sandbox/SparseSandboxMatrix.h> namespace TNL { @@ -253,7 +253,8 @@ setRowCapacities( const RowsCapacitiesVector& rowsCapacities ) } } this->rowPointers.setElement( this->getRows(), 0 ); - this->rowPointers.template scan< Algorithms::ScanType::Exclusive >(); + Algorithms::inplaceExclusiveScan( this->rowPointers ); + //this->rowPointers.template scan< Algorithms::ScanType::Exclusive >(); // End of sparse matrix format initiation. // SANDBOX_TODO: Compute number of all elements that need to be allocated by your format. diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp index 421b5c129ef596db970c0c7e0f776969c21223fb..07342e8e758065aceab6baf38af6f0db1e43eba4 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp @@ -12,7 +12,7 @@ #include <functional> #include <TNL/Matrices/Sandbox/SparseSandboxMatrixView.h> -#include <TNL/Algorithms/Reduction.h> +#include <TNL/Algorithms/reduce.h> #include <TNL/Algorithms/AtomicOperations.h> #include <TNL/Matrices/details/SparseMatrix.h> diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index e32236b3c03316d0ce516e9796c609acdb73a132..c3f7387fd05f6c75e8e5140d5620bb8852cf420c 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -520,7 +520,7 @@ reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, } return identity; }; - this->segments.reduceSegments( begin, end, fetch_, reduce, keep, zero ); + this->segments.reduceSegments( begin, end, fetch_, reduce, keep, identity ); } template< typename Real, @@ -549,7 +549,7 @@ reduceRows( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, } return identity; }; - this->segments.reduceSegments( begin, end, fetch_, reduce, keep, zero ); + this->segments.reduceSegments( begin, end, fetch_, reduce, keep, identity ); } template< typename Real,