Loading src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +1 −1 Original line number Diff line number Diff line Loading @@ -51,7 +51,7 @@ df = pandas.concat(dfs) ## Post-processing print( "Postprocessing data frame..." ) # Drop norms of results differences df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True ) #df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True ) # show matrix formats as columns df = df.unstack() Loading src/TNL/Containers/Segments/CSRView.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -28,6 +28,15 @@ CSRView() { } template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: CSRView( const OffsetsView& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > __cuda_callable__ Loading src/TNL/Containers/Segments/ChunkedEllpack.h +31 −19 Original line number Diff line number Diff line Loading @@ -19,15 +19,6 @@ namespace TNL { namespace Containers { namespace Segments { template< typename IndexType > struct ChunkedEllpackSliceInfo { IndexType size; IndexType chunkSize; IndexType firstRow; IndexType pointer; }; template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, Loading @@ -38,16 +29,19 @@ class ChunkedEllpack using DeviceType = Device; using IndexType = Index; //using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr int getSliceSize() { return SliceSize; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder, SliceSize >; using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder >; template< typename Device_, typename Index_ > using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >; using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; using ChunkedEllpackSliceInfoType = ChunkedEllpackSliceInfo< IndexType >; //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; ChunkedEllpack(); ChunkedEllpack() = default; ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); Loading Loading @@ -119,12 +113,14 @@ class ChunkedEllpack ChunkedEllpack& operator=( const ChunkedEllpack& source ) = default; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ); ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ); void save( File& file ) const; void load( File& file ); void printStructure( std::ostream& str ); // TODO const; protected: template< typename SegmentsSizes > Loading @@ -135,11 +131,27 @@ class ChunkedEllpack const IndexType sliceIdx, IndexType& elementsToAllocation ); IndexType chunksInSlice, desiredChunkSize; IndexType size = 0, storageSize = 0; Containers::Vector< Index, Device, Index > rowToChunkMapping, rowToSliceMapping, rowPointers; IndexType chunksInSlice = 256, desiredChunkSize = 16; /** * For each segment, this keeps index of the slice which contains the * segment. */ OffsetsHolder rowToSliceMapping; /** * For each row, this keeps index of the first chunk within a slice. */ OffsetsHolder rowToChunkMapping; /** * Keeps index of the first segment index. */ OffsetsHolder rowPointers; Containers::Array< ChunkedEllpackSliceInfo, Device, Index > slices; ChunkedEllpackSliceInfoContainer slices; IndexType numberOfSlices; }; Loading src/TNL/Containers/Segments/ChunkedEllpack.hpp +143 −125 Original line number Diff line number Diff line Loading @@ -19,24 +19,13 @@ namespace TNL { namespace Containers { namespace Segments { template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) : size( 0 ), storageSize( 0 ), chunksInSlice( 0 ), desiredChunkSize( 0 ) { this->setSegmentsSizes( sizes ); } Loading @@ -46,10 +35,16 @@ template< typename Device, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const ChunkedEllpack& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) ChunkedEllpack( const ChunkedEllpack& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), chunksInSlice( chunkedEllpack.chunksInSlice ), desiredChunkSize( chunkedEllpack.desiredChunkSize ), rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), rowPointers( chunkedEllpack.rowPointers ), slices( chunkedEllpack.slices ), numberOfSlices( chunkedEllpack.numberOfSlices ) { } Loading @@ -58,10 +53,16 @@ template< typename Device, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const ChunkedEllpack&& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), chunksInSlice( chunkedEllpack.chunksInSlice ), desiredChunkSize( chunkedEllpack.desiredChunkSize ), rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), rowPointers( chunkedEllpack.rowPointers ), slices( chunkedEllpack.slices ), numberOfSlices( chunkedEllpack.numberOfSlices ) { } Loading Loading @@ -95,7 +96,12 @@ typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ViewTyp ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getView() { return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); return ViewType( size, storageSize, chunksInSlice, desiredChunkSize, rowToChunkMapping.getView(), rowToSliceMapping.getView(), rowPointers.getView(), slices.getView(), numberOfSlices ); } template< typename Device, Loading @@ -106,7 +112,12 @@ typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ConstVi ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getConstView() const { return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize, rowToChunkMapping.getConstView(), rowToSliceMapping.getConstView(), rowPointers.getConstView(), slices.getConstView(), numberOfSlices ); } template< typename Device, Loading @@ -116,7 +127,7 @@ template< typename Device, template< typename SegmentsSizes > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: resolveSliceSizes( SegmentsSizes& rowLengths ) resolveSliceSizes( SegmentsSizes& segmentsSizes ) { /**** * Iterate over rows and allocate slices so that each slice has Loading @@ -125,24 +136,24 @@ resolveSliceSizes( SegmentsSizes& rowLengths ) const IndexType desiredElementsInSlice = this->chunksInSlice * this->desiredChunkSize; IndexType row( 0 ), IndexType segmentIdx( 0 ), sliceSize( 0 ), allocatedElementsInSlice( 0 ); numberOfSlices = 0; while( row < this->rows ) while( segmentIdx < segmentsSizes.getSize() ) { /**** * Add one row to the current slice until we reach the desired * number of elements in a slice. */ allocatedElementsInSlice += rowLengths[ row ]; allocatedElementsInSlice += segmentsSizes[ segmentIdx ]; sliceSize++; row++; segmentIdx++; if( allocatedElementsInSlice < desiredElementsInSlice ) if( row < this->rows && sliceSize < chunksInSlice ) continue; if( segmentIdx < segmentsSizes.getSize() && sliceSize < chunksInSlice ) continue; TNL_ASSERT( sliceSize >0, ); this->slices[ numberOfSlices ].size = sliceSize; this->slices[ numberOfSlices ].firstRow = row - sliceSize; this->slices[ numberOfSlices ].firstSegment = segmentIdx - sliceSize; this->slices[ numberOfSlices ].pointer = allocatedElementsInSlice; // this is only temporary sliceSize = 0; numberOfSlices++; Loading @@ -169,7 +180,7 @@ setSlice( SegmentsSizes& rowLengths, * free chunks left, repeat it again. */ const IndexType sliceSize = this->slices[ sliceIndex ].size; const IndexType sliceBegin = this->slices[ sliceIndex ].firstRow; const IndexType sliceBegin = this->slices[ sliceIndex ].firstSegment; const IndexType allocatedElementsInSlice = this->slices[ sliceIndex ].pointer; const IndexType sliceEnd = sliceBegin + sliceSize; Loading Loading @@ -205,11 +216,11 @@ setSlice( SegmentsSizes& rowLengths, IndexType maxChunkInSlice( 0 ); for( IndexType i = sliceBegin; i < sliceEnd; i++ ) { maxChunkInSlice = max( maxChunkInSlice, TNL_ASSERT_NE( this->rowToChunkMapping[ i ], 0, "" ); maxChunkInSlice = TNL::max( maxChunkInSlice, roundUpDivision( rowLengths[ i ], this->rowToChunkMapping[ i ] ) ); } TNL_ASSERT( maxChunkInSlice > 0, std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl ); TNL_ASSERT_GT( maxChunkInSlice, 0, "" ); /**** * Set-up the slice info. Loading Loading @@ -242,32 +253,30 @@ setSlice( SegmentsSizes& rowLengths, template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename SizesHolder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: setSegmentsSizes( const SizesHolder& sizes ) setSegmentsSizes( const SizesHolder& segmentsSizes ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" ); IndexType elementsToAllocation( 0 ); this->size = segmentsSizes.getSize(); this->slices.setSize( this->size ); this->rowToChunkMapping.setSize( this->size ); this->rowToSliceMapping.setSize( this->size ); this->rowPointers.setSize( this->size + 1 ); this->resolveSliceSizes( sizes ); this->resolveSliceSizes( segmentsSizes ); this->rowPointers.setElement( 0, 0 ); this->storageSize = 0; for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) this->setSlice( rowLengths, sliceIndex, elementsToAllocation ); this->setSlice( segmentsSizes, sliceIndex, storageSize ); this->rowPointers.scan(); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -279,31 +288,29 @@ getSegmentsCount() const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getSegmentSize( const IndexType segmentIdx ) const { const Index sliceIdx = segmentIdx / SliceSize; if( std::is_same< DeviceType, Devices::Host >::value ) return this->sliceSegmentSizes[ sliceIdx ]; else { #ifdef __CUDA_ARCH__ return this->sliceSegmentSizes[ sliceIdx ]; #else return this->sliceSegmentSizes.getElement( sliceIdx ); #endif } const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ]; TNL_ASSERT_LE( sliceIndex, this->getSegmentsCount(), "" ); IndexType firstChunkOfSegment( 0 ); if( segmentIdx != slices[ sliceIndex ].firstRow ) firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; const IndexType chunkSize = slices[ sliceIndex ].chunkSize; std::cerr << chunkSize << " * " << segmentChunksCount << std::endl; return chunkSize * segmentChunksCount; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -315,55 +322,50 @@ getSize() const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getStorageSize() const { return this->alignedSize; return this->storageSize; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; IndexType sliceOffset, segmentSize; if( std::is_same< DeviceType, Devices::Host >::value ) { sliceOffset = this->sliceOffsets[ sliceIdx ]; segmentSize = this->sliceSegmentSizes[ sliceIdx ]; } const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ]; TNL_ASSERT_LE( sliceIndex, this->rows, "" ); IndexType firstChunkOfSegment( 0 ); if( segmentIdx != slices[ sliceIndex ].firstRow ) firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; const IndexType sliceOffset = slices[ sliceIndex ].pointer; const IndexType chunkSize = slices[ sliceIndex ].chunkSize; TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); if( RowMajorOrder ) return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; else { #ifdef __CUDA__ARCH__ sliceOffset = this->sliceOffsets[ sliceIdx ]; segmentSize = this->sliceSegmentSizes[ sliceIdx ]; #else sliceOffset = this->sliceOffsets.getElement( sliceIdx ); segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); #endif const IndexType inChunkOffset = localIdx % chunkSize; const IndexType chunkIdx = localIdx / chunkSize; return sliceOffset + inChunkOffset * segmentChunksCount + chunkIdx; } if( RowMajorOrder ) return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; else return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -374,14 +376,13 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; /*const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; Loading @@ -390,19 +391,19 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); */ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Function, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); /* const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { Loading Loading @@ -433,14 +434,13 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }*/ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Function, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -452,14 +452,13 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); /* using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading Loading @@ -495,14 +494,13 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }*/ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -514,51 +512,71 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >& ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >& ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ) operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ) { this->size = source.size; this->alignedSize = source.alignedSize; this->segmentsCount = source.segmentsCount; this->sliceOffsets = source.sliceOffsets; this->sliceSegmentSizes = source.sliceSegmentSizes; this->storageSize = source.storageSize; this->chunksInSlice = source.chunksInSlice; this->desiredChunkSize = source.desiredChunkSize; this->rowToChunkMapping = source.rowToChunkMapping; this->rowToSliceMapping = source.rowToSliceMapping; this->rowPointers = source.rowPointers; this->slices = source.slices; this->numberOfSlices = source.numberOfSlices; return *this; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: save( File& file ) const { file.save( &size ); file.save( &alignedSize ); file.save( &segmentsCount ); file << this->sliceOffsets; file << this->sliceSegmentSizes; file.save( &this->size ); file.save( &this->storageSize ); file.save( &this->chunksInSlice ); file.save( &this->desiredChunkSize ); file << this->rowToChunkMapping << this->rowToSliceMapping << this->rowPointers << this->slices; file.save( this->numberOfSlices ); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: load( File& file ) { file.load( &size ); file.load( &alignedSize ); file.load( &segmentsCount ); file >> this->sliceOffsets; file >> this->sliceSegmentSizes; file.load( &this->size ); file.load( &this->storageSize ); file.load( &this->chunksInSlice ); file.load( &this->desiredChunkSize ); file >> this->rowToChunkMapping >> this->rowToSliceMapping >> this->rowPointers >> this->slices; file.load( &this->numberOfSlices ); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: printStructure( std::ostream& str ) { this->getView().printStructure( str ); } } // namespace Segments Loading src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h 0 → 100644 +94 −0 File added.Preview size limit exceeded, changes collapsed. Show changes Loading
src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py +1 −1 Original line number Diff line number Diff line Loading @@ -51,7 +51,7 @@ df = pandas.concat(dfs) ## Post-processing print( "Postprocessing data frame..." ) # Drop norms of results differences df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True ) #df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True ) # show matrix formats as columns df = df.unstack() Loading
src/TNL/Containers/Segments/CSRView.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -28,6 +28,15 @@ CSRView() { } template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: CSRView( const OffsetsView& offsets_view ) : offsets( offsets_view ) { } template< typename Device, typename Index > __cuda_callable__ Loading
src/TNL/Containers/Segments/ChunkedEllpack.h +31 −19 Original line number Diff line number Diff line Loading @@ -19,15 +19,6 @@ namespace TNL { namespace Containers { namespace Segments { template< typename IndexType > struct ChunkedEllpackSliceInfo { IndexType size; IndexType chunkSize; IndexType firstRow; IndexType pointer; }; template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, Loading @@ -38,16 +29,19 @@ class ChunkedEllpack using DeviceType = Device; using IndexType = Index; //using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr int getSliceSize() { return SliceSize; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder, SliceSize >; using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder >; template< typename Device_, typename Index_ > using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >; using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; using ChunkedEllpackSliceInfoType = ChunkedEllpackSliceInfo< IndexType >; //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; ChunkedEllpack(); ChunkedEllpack() = default; ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); Loading Loading @@ -119,12 +113,14 @@ class ChunkedEllpack ChunkedEllpack& operator=( const ChunkedEllpack& source ) = default; template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ); ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ); void save( File& file ) const; void load( File& file ); void printStructure( std::ostream& str ); // TODO const; protected: template< typename SegmentsSizes > Loading @@ -135,11 +131,27 @@ class ChunkedEllpack const IndexType sliceIdx, IndexType& elementsToAllocation ); IndexType chunksInSlice, desiredChunkSize; IndexType size = 0, storageSize = 0; Containers::Vector< Index, Device, Index > rowToChunkMapping, rowToSliceMapping, rowPointers; IndexType chunksInSlice = 256, desiredChunkSize = 16; /** * For each segment, this keeps index of the slice which contains the * segment. */ OffsetsHolder rowToSliceMapping; /** * For each row, this keeps index of the first chunk within a slice. */ OffsetsHolder rowToChunkMapping; /** * Keeps index of the first segment index. */ OffsetsHolder rowPointers; Containers::Array< ChunkedEllpackSliceInfo, Device, Index > slices; ChunkedEllpackSliceInfoContainer slices; IndexType numberOfSlices; }; Loading
src/TNL/Containers/Segments/ChunkedEllpack.hpp +143 −125 Original line number Diff line number Diff line Loading @@ -19,24 +19,13 @@ namespace TNL { namespace Containers { namespace Segments { template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) : size( 0 ), storageSize( 0 ), chunksInSlice( 0 ), desiredChunkSize( 0 ) { this->setSegmentsSizes( sizes ); } Loading @@ -46,10 +35,16 @@ template< typename Device, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const ChunkedEllpack& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) ChunkedEllpack( const ChunkedEllpack& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), chunksInSlice( chunkedEllpack.chunksInSlice ), desiredChunkSize( chunkedEllpack.desiredChunkSize ), rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), rowPointers( chunkedEllpack.rowPointers ), slices( chunkedEllpack.slices ), numberOfSlices( chunkedEllpack.numberOfSlices ) { } Loading @@ -58,10 +53,16 @@ template< typename Device, typename IndexAllocator, bool RowMajorOrder > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: ChunkedEllpack( const ChunkedEllpack&& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), chunksInSlice( chunkedEllpack.chunksInSlice ), desiredChunkSize( chunkedEllpack.desiredChunkSize ), rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), rowPointers( chunkedEllpack.rowPointers ), slices( chunkedEllpack.slices ), numberOfSlices( chunkedEllpack.numberOfSlices ) { } Loading Loading @@ -95,7 +96,12 @@ typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ViewTyp ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getView() { return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); return ViewType( size, storageSize, chunksInSlice, desiredChunkSize, rowToChunkMapping.getView(), rowToSliceMapping.getView(), rowPointers.getView(), slices.getView(), numberOfSlices ); } template< typename Device, Loading @@ -106,7 +112,12 @@ typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ConstVi ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getConstView() const { return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize, rowToChunkMapping.getConstView(), rowToSliceMapping.getConstView(), rowPointers.getConstView(), slices.getConstView(), numberOfSlices ); } template< typename Device, Loading @@ -116,7 +127,7 @@ template< typename Device, template< typename SegmentsSizes > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: resolveSliceSizes( SegmentsSizes& rowLengths ) resolveSliceSizes( SegmentsSizes& segmentsSizes ) { /**** * Iterate over rows and allocate slices so that each slice has Loading @@ -125,24 +136,24 @@ resolveSliceSizes( SegmentsSizes& rowLengths ) const IndexType desiredElementsInSlice = this->chunksInSlice * this->desiredChunkSize; IndexType row( 0 ), IndexType segmentIdx( 0 ), sliceSize( 0 ), allocatedElementsInSlice( 0 ); numberOfSlices = 0; while( row < this->rows ) while( segmentIdx < segmentsSizes.getSize() ) { /**** * Add one row to the current slice until we reach the desired * number of elements in a slice. */ allocatedElementsInSlice += rowLengths[ row ]; allocatedElementsInSlice += segmentsSizes[ segmentIdx ]; sliceSize++; row++; segmentIdx++; if( allocatedElementsInSlice < desiredElementsInSlice ) if( row < this->rows && sliceSize < chunksInSlice ) continue; if( segmentIdx < segmentsSizes.getSize() && sliceSize < chunksInSlice ) continue; TNL_ASSERT( sliceSize >0, ); this->slices[ numberOfSlices ].size = sliceSize; this->slices[ numberOfSlices ].firstRow = row - sliceSize; this->slices[ numberOfSlices ].firstSegment = segmentIdx - sliceSize; this->slices[ numberOfSlices ].pointer = allocatedElementsInSlice; // this is only temporary sliceSize = 0; numberOfSlices++; Loading @@ -169,7 +180,7 @@ setSlice( SegmentsSizes& rowLengths, * free chunks left, repeat it again. */ const IndexType sliceSize = this->slices[ sliceIndex ].size; const IndexType sliceBegin = this->slices[ sliceIndex ].firstRow; const IndexType sliceBegin = this->slices[ sliceIndex ].firstSegment; const IndexType allocatedElementsInSlice = this->slices[ sliceIndex ].pointer; const IndexType sliceEnd = sliceBegin + sliceSize; Loading Loading @@ -205,11 +216,11 @@ setSlice( SegmentsSizes& rowLengths, IndexType maxChunkInSlice( 0 ); for( IndexType i = sliceBegin; i < sliceEnd; i++ ) { maxChunkInSlice = max( maxChunkInSlice, TNL_ASSERT_NE( this->rowToChunkMapping[ i ], 0, "" ); maxChunkInSlice = TNL::max( maxChunkInSlice, roundUpDivision( rowLengths[ i ], this->rowToChunkMapping[ i ] ) ); } TNL_ASSERT( maxChunkInSlice > 0, std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl ); TNL_ASSERT_GT( maxChunkInSlice, 0, "" ); /**** * Set-up the slice info. Loading Loading @@ -242,32 +253,30 @@ setSlice( SegmentsSizes& rowLengths, template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename SizesHolder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: setSegmentsSizes( const SizesHolder& sizes ) setSegmentsSizes( const SizesHolder& segmentsSizes ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" ); IndexType elementsToAllocation( 0 ); this->size = segmentsSizes.getSize(); this->slices.setSize( this->size ); this->rowToChunkMapping.setSize( this->size ); this->rowToSliceMapping.setSize( this->size ); this->rowPointers.setSize( this->size + 1 ); this->resolveSliceSizes( sizes ); this->resolveSliceSizes( segmentsSizes ); this->rowPointers.setElement( 0, 0 ); this->storageSize = 0; for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) this->setSlice( rowLengths, sliceIndex, elementsToAllocation ); this->setSlice( segmentsSizes, sliceIndex, storageSize ); this->rowPointers.scan(); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -279,31 +288,29 @@ getSegmentsCount() const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getSegmentSize( const IndexType segmentIdx ) const { const Index sliceIdx = segmentIdx / SliceSize; if( std::is_same< DeviceType, Devices::Host >::value ) return this->sliceSegmentSizes[ sliceIdx ]; else { #ifdef __CUDA_ARCH__ return this->sliceSegmentSizes[ sliceIdx ]; #else return this->sliceSegmentSizes.getElement( sliceIdx ); #endif } const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ]; TNL_ASSERT_LE( sliceIndex, this->getSegmentsCount(), "" ); IndexType firstChunkOfSegment( 0 ); if( segmentIdx != slices[ sliceIndex ].firstRow ) firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; const IndexType chunkSize = slices[ sliceIndex ].chunkSize; std::cerr << chunkSize << " * " << segmentChunksCount << std::endl; return chunkSize * segmentChunksCount; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -315,55 +322,50 @@ getSize() const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getStorageSize() const { return this->alignedSize; return this->storageSize; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ Index ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; IndexType sliceOffset, segmentSize; if( std::is_same< DeviceType, Devices::Host >::value ) { sliceOffset = this->sliceOffsets[ sliceIdx ]; segmentSize = this->sliceSegmentSizes[ sliceIdx ]; } const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ]; TNL_ASSERT_LE( sliceIndex, this->rows, "" ); IndexType firstChunkOfSegment( 0 ); if( segmentIdx != slices[ sliceIndex ].firstRow ) firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; const IndexType sliceOffset = slices[ sliceIndex ].pointer; const IndexType chunkSize = slices[ sliceIndex ].chunkSize; TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); if( RowMajorOrder ) return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; else { #ifdef __CUDA__ARCH__ sliceOffset = this->sliceOffsets[ sliceIdx ]; segmentSize = this->sliceSegmentSizes[ sliceIdx ]; #else sliceOffset = this->sliceOffsets.getElement( sliceIdx ); segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); #endif const IndexType inChunkOffset = localIdx % chunkSize; const IndexType chunkIdx = localIdx / chunkSize; return sliceOffset + inChunkOffset * segmentChunksCount + chunkIdx; } if( RowMajorOrder ) return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; else return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -374,14 +376,13 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > __cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; /*const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; Loading @@ -390,19 +391,19 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); */ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Function, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); /* const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { Loading Loading @@ -433,14 +434,13 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }*/ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Function, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -452,14 +452,13 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); /* using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading Loading @@ -495,14 +494,13 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } }*/ } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: Loading @@ -514,51 +512,71 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >& ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >& ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ) operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ) { this->size = source.size; this->alignedSize = source.alignedSize; this->segmentsCount = source.segmentsCount; this->sliceOffsets = source.sliceOffsets; this->sliceSegmentSizes = source.sliceSegmentSizes; this->storageSize = source.storageSize; this->chunksInSlice = source.chunksInSlice; this->desiredChunkSize = source.desiredChunkSize; this->rowToChunkMapping = source.rowToChunkMapping; this->rowToSliceMapping = source.rowToSliceMapping; this->rowPointers = source.rowPointers; this->slices = source.slices; this->numberOfSlices = source.numberOfSlices; return *this; } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: save( File& file ) const { file.save( &size ); file.save( &alignedSize ); file.save( &segmentsCount ); file << this->sliceOffsets; file << this->sliceSegmentSizes; file.save( &this->size ); file.save( &this->storageSize ); file.save( &this->chunksInSlice ); file.save( &this->desiredChunkSize ); file << this->rowToChunkMapping << this->rowToSliceMapping << this->rowPointers << this->slices; file.save( this->numberOfSlices ); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder, int SliceSize > bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: load( File& file ) { file.load( &size ); file.load( &alignedSize ); file.load( &segmentsCount ); file >> this->sliceOffsets; file >> this->sliceSegmentSizes; file.load( &this->size ); file.load( &this->storageSize ); file.load( &this->chunksInSlice ); file.load( &this->desiredChunkSize ); file >> this->rowToChunkMapping >> this->rowToSliceMapping >> this->rowPointers >> this->slices; file.load( &this->numberOfSlices ); } template< typename Device, typename Index, typename IndexAllocator, bool RowMajorOrder > void ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: printStructure( std::ostream& str ) { this->getView().printStructure( str ); } } // namespace Segments Loading
src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h 0 → 100644 +94 −0 File added.Preview size limit exceeded, changes collapsed. Show changes