Added tnl-benchmark-distributed-spmv (76111496) · Commits · TNL / tnl-dev

src/Benchmarks/CMakeLists.txt

+1 −0

Original line number	Original line	Diff line number	Diff line
	add_subdirectory( HeatEquation )		add_subdirectory( HeatEquation )
	add_subdirectory( BLAS )		add_subdirectory( BLAS )
	add_subdirectory( SpMV )		add_subdirectory( SpMV )
			add_subdirectory( DistSpMV )
	add_subdirectory( LinearSolvers )		add_subdirectory( LinearSolvers )

	set( headers		set( headers

src/Benchmarks/DistSpMV/CMakeLists.txt

0 → 100644

+11 −0

Original line number	Original line	Diff line number	Diff line
			if( BUILD_CUDA )
			cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu )
			target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl )

			install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin )
			endif()

			add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp )
			target_link_libraries( tnl-benchmark-distributed-spmv tnl )

			install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin )

src/Benchmarks/DistSpMV/ordering.h

0 → 100644

+133 −0

Original line number	Original line	Diff line number	Diff line
			#pragma once

			#include <algorithm>

			#include <TNL/Devices/Host.h>
			#include <TNL/ParallelFor.h>

			using namespace TNL;

			template< typename Matrix, typename PermutationVector >
			void
			getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm )
			{
			using IndexType = typename Matrix::IndexType;

			// allocate permutation vectors
			perm.setSize( matrix.getRows() );
			iperm.setSize( matrix.getRows() );

			const IndexType N = matrix.getRows() / 2;
			for( IndexType i = 0; i < N; i++ ) {
			perm[ 2 * i ] = i;
			perm[ 2 * i + 1 ] = i + N;
			iperm[ i ] = 2 * i;
			iperm[ i + N ] = 2 * i + 1;
			}
			}

			template< typename Vector, typename PermutationVector >
			void
			reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm )
			{
			TNL_ASSERT_EQ( src.getSize(), perm.getSize(),
			"Source vector and permutation must have the same size." );
			using RealType = typename Vector::RealType;
			using DeviceType = typename Vector::DeviceType;
			using IndexType = typename Vector::IndexType;

			auto kernel = [] __cuda_callable__
			( IndexType i,
			const RealType* src,
			RealType* dest,
			const typename PermutationVector::RealType* perm )
			{
			dest[ i ] = src[ perm[ i ] ];
			};

			dest.setLike( src );

			ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
			kernel,
			src.getData(),
			dest.getData(),
			perm.getData() );
			}

			template< typename Matrix, typename PermutationVector >
			void
			reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm )
			{
			// TODO: implement on GPU
			static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
			static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );

			using namespace TNL;
			using IndexType = typename Matrix::IndexType;

			matrix2.setLike( matrix1 );

			// general multidimensional accessors for permutation indices
			// TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed
			auto perm = [&]( IndexType dof ) {
			TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
			const IndexType i = dof / _perm.getSize();
			return i * _perm.getSize() + _perm[ dof % _perm.getSize() ];
			};
			auto iperm = [&]( IndexType dof ) {
			TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
			const IndexType i = dof / _iperm.getSize();
			return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ];
			};

			// set row lengths
			typename Matrix::CompressedRowLengthsVector rowLengths;
			rowLengths.setSize( matrix1.getRows() );
			for( IndexType i = 0; i < matrix1.getRows(); i++ ) {
			const IndexType maxLength = matrix1.getRowLength( perm( i ) );
			const auto row = matrix1.getRow( perm( i ) );
			IndexType length = 0;
			for( IndexType j = 0; j < maxLength; j++ )
			if( row.getElementColumn( j ) < matrix1.getColumns() )
			length++;
			rowLengths[ i ] = length;
			}
			matrix2.setCompressedRowLengths( rowLengths );

			// set row elements
			for( IndexType i = 0; i < matrix2.getRows(); i++ ) {
			const IndexType rowLength = rowLengths[ i ];

			// extract sparse row
			const auto row1 = matrix1.getRow( perm( i ) );

			// permute
			typename Matrix::IndexType columns[ rowLength ];
			typename Matrix::RealType values[ rowLength ];
			for( IndexType j = 0; j < rowLength; j++ ) {
			columns[ j ] = iperm( row1.getElementColumn( j ) );
			values[ j ] = row1.getElementValue( j );
			}

			// sort
			IndexType indices[ rowLength ];
			for( IndexType j = 0; j < rowLength; j++ )
			indices[ j ] = j;
			// nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?)
			// error: a variable captured by a lambda cannot have a type involving a variable-length array
			IndexType* _columns = columns;
			auto comparator = [=]( IndexType a, IndexType b ) {
			return _columns[ a ] < _columns[ b ];
			};
			std::sort( indices, indices + rowLength, comparator );

			typename Matrix::IndexType sortedColumns[ rowLength ];
			typename Matrix::RealType sortedValues[ rowLength ];
			for( IndexType j = 0; j < rowLength; j++ ) {
			sortedColumns[ j ] = columns[ indices[ j ] ];
			sortedValues[ j ] = values[ indices[ j ] ];
			}

			matrix2.setRow( i, sortedColumns, sortedValues, rowLength );
			}
			}

src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp

0 → 100644

+11 −0

Original line number	Original line	Diff line number	Diff line
			/***************************************************************************
			tnl-benchmark-distributed-spmv.cpp - description
			-------------------
			begin : Sep 11, 2018
			copyright : (C) 2018 by Tomas Oberhuber et al.
			email : tomas.oberhuber@fjfi.cvut.cz
			***************************************************************************/

			/* See Copyright Notice in tnl/Copyright */

			#include "tnl-benchmark-distributed-spmv.h"

src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu

0 → 100644

+11 −0

Original line number	Original line	Diff line number	Diff line
			/***************************************************************************
			tnl-benchmark-distributed-spmv.cu - description
			-------------------
			begin : Sep 11, 2018
			copyright : (C) 2018 by Tomas Oberhuber et al.
			email : tomas.oberhuber@fjfi.cvut.cz
			***************************************************************************/

			/* See Copyright Notice in tnl/Copyright */

			#include "tnl-benchmark-distributed-spmv.h"