Loading src/TNL/Algorithms/Segments/CSRKernelAdaptive.h +45 −50 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks, const Index segmentIdx = block.getFirstSegment();//block.index[0]; //minID = offsets[block.index[0] ]; const Index end = offsets[segmentIdx + 1]; const int tid = threadIdx.x; //const int tid = threadIdx.x; //const int inBlockWarpIdx = block.getWarpIdx(); //if( to > end ) Loading Loading @@ -382,11 +382,9 @@ struct CSRKernelAdaptive Index &sum ) { sum = 0; TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType > hostOffsets( offsets ); for (Index current = start; current < size - 1; current++ ) { Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ]; Index elements = offsets[ current + 1 ] - offsets[ current ]; sum += elements; if( sum > SHARED_PER_WARP ) { Loading @@ -401,7 +399,6 @@ struct CSRKernelAdaptive type = details::Type::VECTOR; else type = details::Type::LONG; //type = Type::LONG; // return current + 1; } } Loading @@ -413,6 +410,8 @@ struct CSRKernelAdaptive template< typename Offsets > void init( const Offsets& offsets ) { using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >; HostOffsetsType hostOffsets( offsets ); const Index rows = offsets.getSize(); Index sum, start( 0 ), nextStart( 0 ); Loading @@ -423,7 +422,7 @@ struct CSRKernelAdaptive while( nextStart != rows - 1 ) { details::Type type; nextStart = findLimit( start, offsets, rows, type, sum ); nextStart = findLimit( start, hostOffsets, rows, type, sum ); if( type == details::Type::LONG ) { Loading @@ -432,13 +431,9 @@ struct CSRKernelAdaptive Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; if( warpsLeft == 0 ) warpsLeft = warpsPerCudaBlock; //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP); inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft ); for( Index index = 1; index < warpsLeft; index++ ) { for( Index index = 0; index < warpsLeft; index++ ) inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); } } else { inBlocks.emplace_back(start, type, Loading @@ -451,7 +446,7 @@ struct CSRKernelAdaptive inBlocks.emplace_back(nextStart); this->blocks = inBlocks; this->view.setBlocks( blocks ); }; } void reset() { Loading Loading
src/TNL/Algorithms/Segments/CSRKernelAdaptive.h +45 −50 Original line number Diff line number Diff line Loading @@ -119,7 +119,7 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks, const Index segmentIdx = block.getFirstSegment();//block.index[0]; //minID = offsets[block.index[0] ]; const Index end = offsets[segmentIdx + 1]; const int tid = threadIdx.x; //const int tid = threadIdx.x; //const int inBlockWarpIdx = block.getWarpIdx(); //if( to > end ) Loading Loading @@ -382,11 +382,9 @@ struct CSRKernelAdaptive Index &sum ) { sum = 0; TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType > hostOffsets( offsets ); for (Index current = start; current < size - 1; current++ ) { Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ]; Index elements = offsets[ current + 1 ] - offsets[ current ]; sum += elements; if( sum > SHARED_PER_WARP ) { Loading @@ -401,7 +399,6 @@ struct CSRKernelAdaptive type = details::Type::VECTOR; else type = details::Type::LONG; //type = Type::LONG; // return current + 1; } } Loading @@ -413,6 +410,8 @@ struct CSRKernelAdaptive template< typename Offsets > void init( const Offsets& offsets ) { using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >; HostOffsetsType hostOffsets( offsets ); const Index rows = offsets.getSize(); Index sum, start( 0 ), nextStart( 0 ); Loading @@ -423,7 +422,7 @@ struct CSRKernelAdaptive while( nextStart != rows - 1 ) { details::Type type; nextStart = findLimit( start, offsets, rows, type, sum ); nextStart = findLimit( start, hostOffsets, rows, type, sum ); if( type == details::Type::LONG ) { Loading @@ -432,13 +431,9 @@ struct CSRKernelAdaptive Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount; if( warpsLeft == 0 ) warpsLeft = warpsPerCudaBlock; //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP); inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft ); for( Index index = 1; index < warpsLeft; index++ ) { for( Index index = 0; index < warpsLeft; index++ ) inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft ); } } else { inBlocks.emplace_back(start, type, Loading @@ -451,7 +446,7 @@ struct CSRKernelAdaptive inBlocks.emplace_back(nextStart); this->blocks = inBlocks; this->view.setBlocks( blocks ); }; } void reset() { Loading