Commit 12b37fb0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactoring: moved reorderMatrix from benchmarks into SparseOperatiosn.h as copySparseMatrix

parent ea079456
Loading
Loading
Loading
Loading
+0 −112
Original line number Diff line number Diff line
#pragma once

#include <algorithm>

#include <TNL/Devices/Host.h>
#include <TNL/ParallelFor.h>

using namespace TNL;

template< typename Matrix, typename PermutationVector >
void
getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm )
@@ -26,108 +19,3 @@ getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVe
   }
}
template< typename Vector, typename PermutationVector >
void
reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm )
{
   TNL_ASSERT_EQ( src.getSize(), perm.getSize(),
                  "Source vector and permutation must have the same size." );
   using RealType = typename Vector::RealType;
   using DeviceType = typename Vector::DeviceType;
   using IndexType = typename Vector::IndexType;

   auto kernel = [] __cuda_callable__
      ( IndexType i,
        const RealType* src,
        RealType* dest,
        const typename PermutationVector::RealType* perm )
   {
      dest[ i ] = src[ perm[ i ] ];
   };

   dest.setLike( src );

   ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
                                    kernel,
                                    src.getData(),
                                    dest.getData(),
                                    perm.getData() );
}

template< typename Matrix, typename PermutationVector >
void
reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm )
{
   // TODO: implement on GPU
   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
   static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );

   using namespace TNL;
   using IndexType = typename Matrix::IndexType;

   matrix2.setLike( matrix1 );

   // general multidimensional accessors for permutation indices
   // TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed
   auto perm = [&]( IndexType dof ) {
      TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
      const IndexType i = dof / _perm.getSize();
      return i * _perm.getSize() + _perm[ dof % _perm.getSize() ];
   };
   auto iperm = [&]( IndexType dof ) {
      TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" );
      const IndexType i = dof / _iperm.getSize();
      return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ];
   };

   // set row lengths
   typename Matrix::CompressedRowLengthsVector rowLengths;
   rowLengths.setSize( matrix1.getRows() );
   for( IndexType i = 0; i < matrix1.getRows(); i++ ) {
      const IndexType maxLength = matrix1.getRowLength( perm( i ) );
      const auto row = matrix1.getRow( perm( i ) );
      IndexType length = 0;
      for( IndexType j = 0; j < maxLength; j++ )
         if( row.getElementColumn( j ) < matrix1.getColumns() )
            length++;
      rowLengths[ i ] = length;
   }
   matrix2.setCompressedRowLengths( rowLengths );

   // set row elements
   for( IndexType i = 0; i < matrix2.getRows(); i++ ) {
      const IndexType rowLength = rowLengths[ i ];

      // extract sparse row
      const auto row1 = matrix1.getRow( perm( i ) );

      // permute
      typename Matrix::IndexType columns[ rowLength ];
      typename Matrix::RealType values[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ ) {
         columns[ j ] = iperm( row1.getElementColumn( j ) );
         values[ j ] = row1.getElementValue( j );
      }

      // sort
      IndexType indices[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ )
         indices[ j ] = j;
      // nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?)
      //    error: a variable captured by a lambda cannot have a type involving a variable-length array
      IndexType* _columns = columns;
      auto comparator = [=]( IndexType a, IndexType b ) {
         return _columns[ a ] < _columns[ b ];
      };
      std::sort( indices, indices + rowLength, comparator );

      typename Matrix::IndexType sortedColumns[ rowLength ];
      typename Matrix::RealType sortedValues[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ ) {
         sortedColumns[ j ] = columns[ indices[ j ] ];
         sortedValues[ j ] = values[ indices[ j ] ];
      }

      matrix2.setRow( i, sortedColumns, sortedValues, rowLength );
   }
}
+1 −1
Original line number Diff line number Diff line
@@ -190,7 +190,7 @@ struct SpmvBenchmark
         PermutationVector perm, iperm;
         getTrivialOrdering( matrix, perm, iperm );
         MatrixType matrix_perm;
         reorderMatrix( matrix, matrix_perm, perm, iperm );
         Matrices::reorderSparseMatrix( matrix, matrix_perm, perm, iperm );
         if( CommunicatorType::isDistributed() )
            runDistributed( benchmark, metadata, parameters, matrix_perm, vector );
         else
+3 −3
Original line number Diff line number Diff line
@@ -361,9 +361,9 @@ struct LinearSolversBenchmark
         getTrivialOrdering( *matrixPointer, perm, iperm );
         SharedPointer< MatrixType > matrix_perm;
         VectorType x0_perm, b_perm;
         reorderMatrix( *matrixPointer, *matrix_perm, perm, iperm );
         reorderVector( x0, x0_perm, perm );
         reorderVector( b, b_perm, perm );
         Matrices::reorderSparseMatrix( *matrixPointer, *matrix_perm, perm, iperm );
         Matrices::reorderVector( x0, x0_perm, perm );
         Matrices::reorderVector( b, b_perm, perm );
         if( CommunicatorType::isDistributed() )
            runDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm );
         else
+14 −0
Original line number Diff line number Diff line
@@ -31,6 +31,20 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B,
                        bool has_symmetric_pattern = false,
                        bool ignore_diagonal = true );

// Applies a permutation to the rows of a sparse matrix and its inverse
// permutation to the columns of the matrix, i.e. A_perm = P*A*P^{-1}, where
// P is the permutation matrix represented by the perm vector and P^{-1} is the
// inverse permutation represented by the iperm vector.
template< typename Matrix1, typename Matrix2, typename PermutationVector >
void
reorderSparseMatrix( const Matrix1& A, Matrix2& A_perm,
                     const PermutationVector& perm, const PermutationVector& iperm );

// TODO: the method does not belong here, but there is no better place...
template< typename Vector, typename PermutationVector >
void
reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm );

} // namespace Matrices
} // namespace TNL

+96 −0
Original line number Diff line number Diff line
@@ -14,8 +14,10 @@

#include <type_traits>
#include <stdexcept>
#include <algorithm>

#include <TNL/Pointers/DevicePointer.h>
#include <TNL/ParallelFor.h>

namespace TNL {
namespace Matrices {
@@ -260,5 +262,99 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B,
   }
}


template< typename Matrix1, typename Matrix2, typename PermutationVector >
void
reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationVector& perm, const PermutationVector& iperm )
{
   // TODO: implement on GPU
   static_assert( std::is_same< typename Matrix1::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
   static_assert( std::is_same< typename Matrix2::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );
   static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" );

   using IndexType = typename Matrix1::IndexType;

   matrix2.setDimensions( matrix1.getRows(), matrix1.getColumns() );

   // set row lengths
   typename Matrix2::CompressedRowLengthsVector rowLengths;
   rowLengths.setSize( matrix1.getRows() );
   for( IndexType i = 0; i < matrix1.getRows(); i++ ) {
      const IndexType maxLength = matrix1.getRowLength( perm[ i ] );
      const auto row = matrix1.getRow( perm[ i ] );
      IndexType length = 0;
      for( IndexType j = 0; j < maxLength; j++ )
         if( row.getElementColumn( j ) < matrix1.getColumns() )
            length++;
      rowLengths[ i ] = length;
   }
   matrix2.setCompressedRowLengths( rowLengths );

   // set row elements
   for( IndexType i = 0; i < matrix2.getRows(); i++ ) {
      const IndexType rowLength = rowLengths[ i ];

      // extract sparse row
      const auto row1 = matrix1.getRow( perm[ i ] );

      // permute
      typename Matrix2::IndexType columns[ rowLength ];
      typename Matrix2::RealType values[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ ) {
         columns[ j ] = iperm[ row1.getElementColumn( j ) ];
         values[ j ] = row1.getElementValue( j );
      }

      // sort
      IndexType indices[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ )
         indices[ j ] = j;
      // nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?)
      //    error: a variable captured by a lambda cannot have a type involving a variable-length array
      IndexType* _columns = columns;
      auto comparator = [=]( IndexType a, IndexType b ) {
         return _columns[ a ] < _columns[ b ];
      };
      std::sort( indices, indices + rowLength, comparator );

      typename Matrix2::IndexType sortedColumns[ rowLength ];
      typename Matrix2::RealType sortedValues[ rowLength ];
      for( IndexType j = 0; j < rowLength; j++ ) {
         sortedColumns[ j ] = columns[ indices[ j ] ];
         sortedValues[ j ] = values[ indices[ j ] ];
      }

      matrix2.setRow( i, sortedColumns, sortedValues, rowLength );
   }
}

template< typename Vector, typename PermutationVector >
void
reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm )
{
   TNL_ASSERT_EQ( src.getSize(), perm.getSize(),
                  "Source vector and permutation must have the same size." );
   using RealType = typename Vector::RealType;
   using DeviceType = typename Vector::DeviceType;
   using IndexType = typename Vector::IndexType;

   auto kernel = [] __cuda_callable__
      ( IndexType i,
        const RealType* src,
        RealType* dest,
        const typename PermutationVector::RealType* perm )
   {
      dest[ i ] = src[ perm[ i ] ];
   };

   dest.setLike( src );

   ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(),
                                    kernel,
                                    src.getData(),
                                    dest.getData(),
                                    perm.getData() );
}

} // namespace Matrices
} // namespace TNL