Refactoring of ChunkedEllpack segments. (7fd8fa6b) · Commits · TNL / tnl-dev

src/TNL/Containers/Segments/ChunkedEllpack.h

+1 −6

Original line number	Diff line number	Diff line
		@@ -36,7 +36,7 @@ class ChunkedEllpack
		using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >;
		using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder >;
		using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
		using ChunkedEllpackSliceInfoType = ChunkedEllpackSliceInfo< IndexType >;
		using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
		//TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >;
		using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
		using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
		@@ -66,7 +66,6 @@ class ChunkedEllpack
		__cuda_callable__
		IndexType getSegmentsCount() const;

		__cuda_callable__
		IndexType getSegmentSize( const IndexType segmentIdx ) const;

		/**
		@@ -75,16 +74,12 @@ class ChunkedEllpack
		__cuda_callable__
		IndexType getSize() const;


		__cuda_callable__
		IndexType getStorageSize() const;

		__cuda_callable__
		IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;

		__cuda_callable__
		void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;

		__cuda_callable__
		SegmentViewType getSegmentView( const IndexType segmentIdx ) const;

src/TNL/Containers/Segments/ChunkedEllpack.hpp

+14 −121

Original line number	Diff line number	Diff line
		@@ -289,21 +289,15 @@ template< typename Device,
		typename Index,
		typename IndexAllocator,
		bool RowMajorOrder >
		__cuda_callable__
		Index
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		getSegmentSize( const IndexType segmentIdx ) const
		{
		const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
		TNL_ASSERT_LE( sliceIndex, this->getSegmentsCount(), "" );
		IndexType firstChunkOfSegment( 0 );
		if( segmentIdx != slices[ sliceIndex ].firstRow )
		firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];

		const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
		const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
		const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
		return chunkSize * segmentChunksCount;
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize(
		rowToSliceMapping.getView(),
		slices.getView(),
		rowToChunkMapping.getView(),
		segmentIdx );
		}

		template< typename Device,
		@@ -339,37 +333,13 @@ Index
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
		{
		const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
		TNL_ASSERT_LE( sliceIndex, this->rows, "" );
		IndexType firstChunkOfSegment( 0 );
		if( segmentIdx != slices[ sliceIndex ].firstRow )
		firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];

		const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
		const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
		const IndexType sliceOffset = slices[ sliceIndex ].pointer;
		const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
		TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" );

		if( RowMajorOrder )
		return sliceOffset + firstChunkOfSegment * chunkSize + localIdx;
		else
		{
		const IndexType inChunkOffset = localIdx % chunkSize;
		const IndexType chunkIdx = localIdx / chunkSize;
		return sliceOffset + inChunkOffset * segmentChunksCount + chunkIdx;
		}
		}

		template< typename Device,
		typename Index,
		typename IndexAllocator,
		bool RowMajorOrder >
		__cuda_callable__
		void
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
		{
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx,
		localIdx );
		}

		template< typename Device,
		@@ -381,16 +351,6 @@ auto
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
		{
		/*const IndexType sliceIdx = segmentIdx / SliceSize;
		const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
		const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ];
		const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ];

		if( RowMajorOrder )
		return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 );
		else
		return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize );
		*/
		}

		template< typename Device,
		@@ -402,38 +362,7 @@ void
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
		{
		/* const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
		const auto sliceOffsets_view = this->sliceOffsets.getConstView();
		if( RowMajorOrder )
		{
		auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
		const IndexType sliceIdx = segmentIdx / SliceSize;
		const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
		const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
		const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
		const IndexType end = begin + segmentSize;
		IndexType localIdx( 0 );
		for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ )
		if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
		break;
		};
		Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
		}
		else
		{
		auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
		const IndexType sliceIdx = segmentIdx / SliceSize;
		const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
		const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
		const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
		const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
		IndexType localIdx( 0 );
		for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
		if( ! f( segmentIdx, localIdx++, globalIdx, args... ) )
		break;
		};
		Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
		}*/
		this->getView().forSegments( first, last, f, args... );
		}

		template< typename Device,
		@@ -457,43 +386,7 @@ void
		ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::
		segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
		{
		/* using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) );
		const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView();
		const auto sliceOffsets_view = this->sliceOffsets.getConstView();
		if( RowMajorOrder )
		{
		auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
		const IndexType sliceIdx = segmentIdx / SliceSize;
		const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
		const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
		const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize;
		const IndexType end = begin + segmentSize;
		RealType aux( zero );
		bool compute( true );
		IndexType localIdx( 0 );
		for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ )
		reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
		keeper( segmentIdx, aux );
		};
		Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
		}
		else
		{
		auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
		const IndexType sliceIdx = segmentIdx / SliceSize;
		const IndexType segmentInSliceIdx = segmentIdx % SliceSize;
		const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ];
		const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx;
		const IndexType end = sliceOffsets_view[ sliceIdx + 1 ];
		RealType aux( zero );
		bool compute( true );
		IndexType localIdx( 0 );
		for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize )
		reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) );
		keeper( segmentIdx, aux );
		};
		Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
		}*/
		this->getView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... );
		}

		template< typename Device,

src/TNL/Containers/Segments/ChunkedEllpackView.h

+2 −35

Original line number	Diff line number	Diff line
		@@ -14,42 +14,12 @@

		#include <TNL/Containers/Vector.h>
		#include <TNL/Containers/Segments/ChunkedEllpackSegmentView.h>
		#include <TNL/Containers/Segments/details/ChunkedEllpack.h>

		namespace TNL {
		namespace Containers {
		namespace Segments {

		/***
		* In the ChunkedEllpack, the segments are split into slices. This is done
		* in ChunkedEllpack::resolveSliceSizes. All segments elements in each slice
		* are split into chunks. All chunks in one slice have the same size, but the size
		* of chunks can be different in each slice.
		*/
		template< typename Index >
		struct ChunkedEllpackSliceInfo
		{
		/**
		* The size of the slice, it means the number of the matrix rows covered by
		* the slice.
		*/
		Index size;

		/**
		* The chunk size, i.e. maximal number of non-zero elements that can be stored
		* in the chunk.
		*/
		Index chunkSize;

		/**
		* Index of the first segment covered be this slice.
		*/
		Index firstSegment;

		/**
		* Position of the first element of this slice.
		*/
		Index pointer;
		};

		template< typename Device,
		typename Index,
		@@ -67,7 +37,7 @@ class ChunkedEllpackView
		using ViewTemplate = ChunkedEllpackView< Device_, Index_ >;
		using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index > >;
		using SegmentViewType = ChunkedEllpackSegmentView< IndexType >;
		using ChunkedEllpackSliceInfoType = ChunkedEllpackSliceInfo< IndexType >;
		using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >;
		using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >;
		using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >;
		using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType;
		@@ -140,9 +110,6 @@ class ChunkedEllpackView
		__cuda_callable__
		IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const;

		__cuda_callable__
		void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const;

		__cuda_callable__
		SegmentViewType getSegmentView( const IndexType segmentIdx ) const;

src/TNL/Containers/Segments/ChunkedEllpackView.hpp

+73 −64

Original line number	Diff line number	Diff line
		@@ -179,16 +179,28 @@ Index
		ChunkedEllpackView< Device, Index, RowMajorOrder >::
		getSegmentSize( const IndexType segmentIdx ) const
		{
		const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
		TNL_ASSERT_LE( sliceIndex, this->getSegmentsCount(), "" );
		IndexType firstChunkOfSegment( 0 );
		if( segmentIdx != slices[ sliceIndex ].firstSegment )
		firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];

		const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
		const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
		const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
		return chunkSize * segmentChunksCount;
		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		segmentIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		segmentIdx );
		#else
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		segmentIdx );
		#endif
		}
		}

		template< typename Device,
		@@ -221,38 +233,36 @@ Index
		ChunkedEllpackView< Device, Index, RowMajorOrder >::
		getGlobalIndex( const Index segmentIdx, const Index localIdx ) const
		{
		const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
		TNL_ASSERT_LE( sliceIndex, this->size, "" );
		IndexType firstChunkOfSegment( 0 );
		if( segmentIdx != slices[ sliceIndex ].firstSegment )
		firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];

		const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
		const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
		const IndexType sliceOffset = slices[ sliceIndex ].pointer;
		const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
		TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" );

		if( RowMajorOrder )
		return sliceOffset + firstChunkOfSegment * chunkSize + localIdx;
		else
		{
		const IndexType inChunkOffset = localIdx % chunkSize;
		const IndexType chunkIdx = localIdx / chunkSize;
		return sliceOffset + inChunkOffset * chunksInSlice + firstChunkOfSegment + chunkIdx;
		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx,
		localIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx,
		localIdx );
		#else
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx,
		localIdx );
		#endif
		}
		}

		template< typename Device,
		typename Index,
		bool RowMajorOrder >
		__cuda_callable__
		void
		ChunkedEllpackView< Device, Index, RowMajorOrder >::
		getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const
		{
		}

		template< typename Device,
		typename Index,
		bool RowMajorOrder >
		@@ -261,32 +271,31 @@ auto
		ChunkedEllpackView< Device, Index, RowMajorOrder >::
		getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
		{
		const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ];
		TNL_ASSERT_LE( sliceIndex, this->size, "" );
		IndexType firstChunkOfSegment( 0 );
		if( segmentIdx != slices[ sliceIndex ].firstSegment )
		firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ];

		const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ];
		const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment;
		const IndexType sliceOffset = slices[ sliceIndex ].pointer;
		const IndexType chunkSize = slices[ sliceIndex ].chunkSize;
		const IndexType segmentSize = segmentChunksCount * chunkSize;

		if( RowMajorOrder )
		return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize,
		segmentSize,
		chunkSize,
		chunksInSlice );
		else // TODO FIX !!!!!!!!!!!!!!
		return SegmentViewType( sliceOffset + firstChunkOfSegment,
		segmentSize,
		chunkSize,
		chunksInSlice );




		if( std::is_same< DeviceType, Devices::Host >::value )
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx );
		if( std::is_same< DeviceType, Devices::Cuda >::value )
		{
		#ifdef __CUDA_ARCH__
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx );
		#else
		return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentView(
		rowToSliceMapping,
		slices,
		rowToChunkMapping,
		chunksInSlice,
		segmentIdx );
		#endif
		}
		}

		template< typename Device,

src/TNL/Containers/Segments/details/ChunkedEllpack.h

0 → 100644

+230 −0

File added.

Preview size limit exceeded, changes collapsed.