Merge branch 'TO/matrices' into 'develop' (bf4dc990) · Commits · TNL / tnl-dev

src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py

+10 −6

Original line number	Diff line number	Diff line
		@@ -20,6 +20,7 @@ cpu_matrix_formats = [ 'CSR',
		gpu_matrix_formats = [ 'CSR Legacy Scalar', 'CSR Legacy Vector', 'CSR Legacy MultiVector',
		'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic',
		'CSR Legacy Adaptive',
		'CSR< Scalar >', 'CSR< Vector >', 'CSR< Hybrid >', 'CSR< Adaptive >',
		'Ellpack', 'Ellpack Legacy',
		'SlicedEllpack', 'SlicedEllpack Legacy',
		'ChunkedEllpack', 'ChunkedEllpack Legacy',
		@@ -36,7 +37,10 @@ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar',
		"""
		GPU formats to be compared
		"""
		gpu_comparison_formats = { #'CSR' : 'CSR Legacy Scalar',
		gpu_comparison_formats = { 'CSR< Scalar >' : 'CSR Legacy Scalar',
		'CSR< Vector >' : 'CSR Legacy Vector',
		'CSR< Hybrid >' : 'CSR Legacy LightWithoutAtomic',
		'CSR< Adaptive >' : 'CSR Legacy Adaptive',
		'Ellpack' : 'Ellpack Legacy',
		'SlicedEllpack' : 'SlicedEllpack Legacy',
		'BiEllpack' : 'BiEllpack Legacy'

src/TNL/Algorithms/Segments/CSRKernelAdaptive.h

+21 −16

Original line number	Diff line number	Diff line
		@@ -220,6 +220,11 @@ struct CSRKernelAdaptiveView

		ConstViewType getConstView() const { return *this; };

		static TNL::String getKernelType()
		{
		return "Adaptive";
		};

		template< typename OffsetsView,
		typename Fetch,
		typename Reduction,
		@@ -344,6 +349,10 @@ struct CSRKernelAdaptive
		using BlocksType = typename ViewType::BlocksType;
		using BlocksView = typename BlocksType::ViewType;

		static TNL::String getKernelType()
		{
		return ViewType::getKernelType();
		};

		static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;

		@@ -373,10 +382,11 @@ struct CSRKernelAdaptive
		Index &sum )
		{
		sum = 0;
		TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >
		hostOffsets( offsets );
		for (Index current = start; current < size - 1; current++ )
		{
		Index elements = offsets.getElement(current + 1) -
		offsets.getElement(current);
		Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ];
		sum += elements;
		if( sum > SHARED_PER_WARP )
		{
		@@ -407,8 +417,8 @@ struct CSRKernelAdaptive
		Index sum, start( 0 ), nextStart( 0 );

		// Fill blocks
		std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock;
		inBlock.reserve( rows );
		std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
		inBlocks.reserve( rows );

		while( nextStart != rows - 1 )
		{
		@@ -417,34 +427,29 @@ struct CSRKernelAdaptive

		if( type == details::Type::LONG )
		{
		const Index blocksCount = inBlock.size();
		const Index blocksCount = inBlocks.size();
		const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
		Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
		if( warpsLeft == 0 )
		warpsLeft = warpsPerCudaBlock;
		//Index parts = roundUpDivision(sum, this->SHARED_PER_WARP);
		inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft );
		inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft );
		for( Index index = 1; index < warpsLeft; index++ )
		{
		inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft );
		inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
		}
		}
		else
		{
		inBlock.emplace_back(start, type,
		inBlocks.emplace_back(start, type,
		nextStart,
		offsets.getElement(nextStart),
		offsets.getElement(start) );
		}
		start = nextStart;
		}
		inBlock.emplace_back(nextStart);

		// Copy values
		this->blocks.setSize(inBlock.size());
		for (size_t i = 0; i < inBlock.size(); ++i)
		this->blocks.setElement(i, inBlock[i]);

		inBlocks.emplace_back(nextStart);
		this->blocks = inBlocks;
		this->view.setBlocks( blocks );
		};

src/TNL/Algorithms/Segments/CSRKernelHybrid.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -38,6 +38,7 @@ struct CSRKernelHybrid

		ConstViewType getConstView() const;

		static TNL::String getKernelType();

		template< typename OffsetsView,
		typename Fetch,

src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp

+9 −0

Original line number	Diff line number	Diff line
		@@ -114,6 +114,15 @@ getView() -> ViewType
		return *this;
		}

		template< typename Index,
		typename Device >
		TNL::String
		CSRKernelHybrid< Index, Device >::
		getKernelType()
		{
		return "Hybrid";
		}

		template< typename Index,
		typename Device >
		auto

src/TNL/Algorithms/Segments/CSRKernelScalar.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -38,6 +38,8 @@ struct CSRKernelScalar

		ConstViewType getConstView() const;

		static TNL::String getKernelType();

		template< typename OffsetsView,
		typename Fetch,
		typename Reduction,