Refactoring: moved reorderMatrix from benchmarks into SparseOperatiosn.h as copySparseMatrix (12b37fb0) · Commits · TNL / tnl-dev

src/Benchmarks/DistSpMV/ordering.h

+0 −112

Original line number	Diff line number	Diff line
		#pragma once

		#include <algorithm>

		#include <TNL/Devices/Host.h>
		#include <TNL/ParallelFor.h>

		using namespace TNL;

		template< typename Matrix, typename PermutationVector >
		void
		getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm )
		@@ -26,108 +19,3 @@ getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVe
		}
		}

		template< typename Vector, typename PermutationVector >
		void
		reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm )
		{
		TNL_ASSERT_EQ( src.getSize(), perm.getSize(),
		"Source vector and permutation must have the same size." );
		using RealType = typename Vector::RealType;
		using DeviceType = typename Vector::DeviceType;
		using IndexType = typename Vector::IndexType;

		auto kernel = [] __cuda_callable__
		( IndexType i,
		const RealType* src,
		RealType* dest,
		const typename PermutationVector::RealType* perm )
		{
		dest[ i ] = src[ perm[ i ] ];
		};

		dest.setLike( src );

		ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
		kernel,
		src.getData(),
		dest.getData(),
		perm.getData() );
		}

		template< typename Matrix, typename PermutationVector >
		void
		reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm )
		{
		// TODO: implement on GPU
		static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
		static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );

		using namespace TNL;
		using IndexType = typename Matrix::IndexType;

		matrix2.setLike( matrix1 );

		// general multidimensional accessors for permutation indices
		// TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed
		auto perm = [&]( IndexType dof ) {
		TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
		const IndexType i = dof / _perm.getSize();
		return i * _perm.getSize() + _perm[ dof % _perm.getSize() ];
		};
		auto iperm = [&]( IndexType dof ) {
		TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
		const IndexType i = dof / _iperm.getSize();
		return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ];
		};

		// set row lengths
		typename Matrix::CompressedRowLengthsVector rowLengths;
		rowLengths.setSize( matrix1.getRows() );
		for( IndexType i = 0; i < matrix1.getRows(); i++ ) {
		const IndexType maxLength = matrix1.getRowLength( perm( i ) );
		const auto row = matrix1.getRow( perm( i ) );
		IndexType length = 0;
		for( IndexType j = 0; j < maxLength; j++ )
		if( row.getElementColumn( j ) < matrix1.getColumns() )
		length++;
		rowLengths[ i ] = length;
		}
		matrix2.setCompressedRowLengths( rowLengths );

		// set row elements
		for( IndexType i = 0; i < matrix2.getRows(); i++ ) {
		const IndexType rowLength = rowLengths[ i ];

		// extract sparse row
		const auto row1 = matrix1.getRow( perm( i ) );

		// permute
		typename Matrix::IndexType columns[ rowLength ];
		typename Matrix::RealType values[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ ) {
		columns[ j ] = iperm( row1.getElementColumn( j ) );
		values[ j ] = row1.getElementValue( j );
		}

		// sort
		IndexType indices[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ )
		indices[ j ] = j;
		// nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?)
		// error: a variable captured by a lambda cannot have a type involving a variable-length array
		IndexType* _columns = columns;
		auto comparator = [=]( IndexType a, IndexType b ) {
		return _columns[ a ] < _columns[ b ];
		};
		std::sort( indices, indices + rowLength, comparator );

		typename Matrix::IndexType sortedColumns[ rowLength ];
		typename Matrix::RealType sortedValues[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ ) {
		sortedColumns[ j ] = columns[ indices[ j ] ];
		sortedValues[ j ] = values[ indices[ j ] ];
		}

		matrix2.setRow( i, sortedColumns, sortedValues, rowLength );
		}
		}

src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -190,7 +190,7 @@ struct SpmvBenchmark
		PermutationVector perm, iperm;
		getTrivialOrdering( matrix, perm, iperm );
		MatrixType matrix_perm;
		reorderMatrix( matrix, matrix_perm, perm, iperm );
		Matrices::reorderSparseMatrix( matrix, matrix_perm, perm, iperm );
		if( CommunicatorType::isDistributed() )
		runDistributed( benchmark, metadata, parameters, matrix_perm, vector );
		else

src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -361,9 +361,9 @@ struct LinearSolversBenchmark
		getTrivialOrdering( *matrixPointer, perm, iperm );
		SharedPointer< MatrixType > matrix_perm;
		VectorType x0_perm, b_perm;
		reorderMatrix( matrixPointer, matrix_perm, perm, iperm );
		reorderVector( x0, x0_perm, perm );
		reorderVector( b, b_perm, perm );
		Matrices::reorderSparseMatrix( matrixPointer, matrix_perm, perm, iperm );
		Matrices::reorderVector( x0, x0_perm, perm );
		Matrices::reorderVector( b, b_perm, perm );
		if( CommunicatorType::isDistributed() )
		runDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm );
		else

src/TNL/Matrices/SparseOperations.h

+14 −0

Original line number	Diff line number	Diff line
		@@ -31,6 +31,20 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B,
		bool has_symmetric_pattern = false,
		bool ignore_diagonal = true );

		// Applies a permutation to the rows of a sparse matrix and its inverse
		// permutation to the columns of the matrix, i.e. A_perm = PAP^{-1}, where
		// P is the permutation matrix represented by the perm vector and P^{-1} is the
		// inverse permutation represented by the iperm vector.
		template< typename Matrix1, typename Matrix2, typename PermutationVector >
		void
		reorderSparseMatrix( const Matrix1& A, Matrix2& A_perm,
		const PermutationVector& perm, const PermutationVector& iperm );

		// TODO: the method does not belong here, but there is no better place...
		template< typename Vector, typename PermutationVector >
		void
		reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm );

		} // namespace Matrices
		} // namespace TNL

src/TNL/Matrices/SparseOperations_impl.h

+96 −0

Original line number	Diff line number	Diff line
		@@ -14,8 +14,10 @@

		#include <type_traits>
		#include <stdexcept>
		#include <algorithm>

		#include <TNL/Pointers/DevicePointer.h>
		#include <TNL/ParallelFor.h>

		namespace TNL {
		namespace Matrices {
		@@ -260,5 +262,99 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B,
		}
		}


		template< typename Matrix1, typename Matrix2, typename PermutationVector >
		void
		reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationVector& perm, const PermutationVector& iperm )
		{
		// TODO: implement on GPU
		static_assert( std::is_same< typename Matrix1::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
		static_assert( std::is_same< typename Matrix2::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
		static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );

		using IndexType = typename Matrix1::IndexType;

		matrix2.setDimensions( matrix1.getRows(), matrix1.getColumns() );

		// set row lengths
		typename Matrix2::CompressedRowLengthsVector rowLengths;
		rowLengths.setSize( matrix1.getRows() );
		for( IndexType i = 0; i < matrix1.getRows(); i++ ) {
		const IndexType maxLength = matrix1.getRowLength( perm[ i ] );
		const auto row = matrix1.getRow( perm[ i ] );
		IndexType length = 0;
		for( IndexType j = 0; j < maxLength; j++ )
		if( row.getElementColumn( j ) < matrix1.getColumns() )
		length++;
		rowLengths[ i ] = length;
		}
		matrix2.setCompressedRowLengths( rowLengths );

		// set row elements
		for( IndexType i = 0; i < matrix2.getRows(); i++ ) {
		const IndexType rowLength = rowLengths[ i ];

		// extract sparse row
		const auto row1 = matrix1.getRow( perm[ i ] );

		// permute
		typename Matrix2::IndexType columns[ rowLength ];
		typename Matrix2::RealType values[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ ) {
		columns[ j ] = iperm[ row1.getElementColumn( j ) ];
		values[ j ] = row1.getElementValue( j );
		}

		// sort
		IndexType indices[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ )
		indices[ j ] = j;
		// nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?)
		// error: a variable captured by a lambda cannot have a type involving a variable-length array
		IndexType* _columns = columns;
		auto comparator = [=]( IndexType a, IndexType b ) {
		return _columns[ a ] < _columns[ b ];
		};
		std::sort( indices, indices + rowLength, comparator );

		typename Matrix2::IndexType sortedColumns[ rowLength ];
		typename Matrix2::RealType sortedValues[ rowLength ];
		for( IndexType j = 0; j < rowLength; j++ ) {
		sortedColumns[ j ] = columns[ indices[ j ] ];
		sortedValues[ j ] = values[ indices[ j ] ];
		}

		matrix2.setRow( i, sortedColumns, sortedValues, rowLength );
		}
		}

		template< typename Vector, typename PermutationVector >
		void
		reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm )
		{
		TNL_ASSERT_EQ( src.getSize(), perm.getSize(),
		"Source vector and permutation must have the same size." );
		using RealType = typename Vector::RealType;
		using DeviceType = typename Vector::DeviceType;
		using IndexType = typename Vector::IndexType;

		auto kernel = [] __cuda_callable__
		( IndexType i,
		const RealType* src,
		RealType* dest,
		const typename PermutationVector::RealType* perm )
		{
		dest[ i ] = src[ perm[ i ] ];
		};

		dest.setLike( src );

		ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
		kernel,
		src.getData(),
		dest.getData(),
		perm.getData() );
		}

		} // namespace Matrices
		} // namespace TNL