Commit 20dc15de authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Basic implementation of DistributedMatrix

parent 00971191
Loading
Loading
Loading
Loading
+166 −0
Original line number Diff line number Diff line
/***************************************************************************
                          DistributedMatrix.h  -  description
                             -------------------
    begin                : Sep 10, 2018
    copyright            : (C) 2018 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

// Implemented by: Jakub Klinkovský

#pragma once

#include <type_traits>  // std::add_const

#include <TNL/Containers/Vector.h>
#include <TNL/Matrices/SparseRow.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/DistributedContainers/IndexMap.h>
#include <TNL/DistributedContainers/DistributedVector.h>

namespace TNL {
namespace DistributedContainers {

// TODO: 2D distribution for dense matrices (maybe it should be in different template,
//       because e.g. setRowFast doesn't make sense for dense matrices)
template< typename Matrix,
          typename Communicator = Communicators::MpiCommunicator,
          typename IndexMap = Subrange< typename Matrix::IndexType > >
class DistributedMatrix
: public Object
{
   using CommunicationGroup = typename Communicator::CommunicationGroup;

   template< typename Real >
   using DistVector = DistributedVector< Real, typename Matrix::DeviceType, Communicator, typename Matrix::IndexType, IndexMap >;

public:
   using MatrixType = Matrix;
   using RealType = typename Matrix::RealType;
   using DeviceType = typename Matrix::DeviceType;
   using IndexType = typename Matrix::IndexType;
   using CommunicatorType = Communicator;
   using IndexMapType = IndexMap;

   using HostType = DistributedMatrix< typename Matrix::HostType, Communicator, IndexMap >;
   using CudaType = DistributedMatrix< typename Matrix::CudaType, Communicator, IndexMap >;

   using CompressedRowLengthsVector = DistributedVector< IndexType, DeviceType, CommunicatorType, IndexType, IndexMapType >;

   using MatrixRow = Matrices::SparseRow< RealType, IndexType >;
   using ConstMatrixRow = Matrices::SparseRow< typename std::add_const< RealType >::type, typename std::add_const< IndexType >::type >;

   DistributedMatrix() = default;

   DistributedMatrix( DistributedMatrix& ) = default;

   DistributedMatrix( IndexMap rowIndexMap, IndexType columns, CommunicationGroup group = Communicator::AllGroup );

   void setDistribution( IndexMap rowIndexMap, IndexType columns, CommunicationGroup group = Communicator::AllGroup );

   const IndexMap& getRowIndexMap() const;

   CommunicationGroup getCommunicationGroup() const;

   const Matrix& getLocalMatrix() const;


   static String getType();

   virtual String getTypeVirtual() const;

   // TODO: no getSerializationType method until there is support for serialization


   /*
    * Some common Matrix methods follow below.
    */

   DistributedMatrix& operator=( const DistributedMatrix& matrix );

   template< typename MatrixT >
   DistributedMatrix& operator=( const MatrixT& matrix );

   template< typename MatrixT >
   void setLike( const MatrixT& matrix );

   void reset();

   __cuda_callable__
   IndexType getRows() const;

   __cuda_callable__
   IndexType getColumns() const;

   void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths );

   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;

   IndexType getRowLength( IndexType row ) const;

   bool setElement( IndexType row,
                    IndexType column,
                    RealType value );

   __cuda_callable__
   bool setElementFast( IndexType row,
                        IndexType column,
                        RealType value );

   RealType getElement( IndexType row,
                        IndexType column ) const;

   __cuda_callable__
   RealType getElementFast( IndexType row,
                            IndexType column ) const;

   __cuda_callable__
   bool setRowFast( IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
                    IndexType elements );

   __cuda_callable__
   void getRowFast( IndexType row,
                    IndexType* columns,
                    RealType* values ) const;

   __cuda_callable__
   MatrixRow getRow( IndexType row );

   __cuda_callable__
   ConstMatrixRow getRow( IndexType row ) const;

   // multiplication with a global vector
   template< typename Vector,
             typename RealOut >
   void vectorProduct( const Vector& inVector,
                       DistVector< RealOut >& outVector ) const;

   // optimization for matrix-vector multiplication
   void updateVectorProductPrefetchPattern();

   // multiplication with a distributed vector
   template< typename RealIn,
             typename RealOut >
   void vectorProduct( const DistVector< RealIn >& inVector,
                       DistVector< RealOut >& outVector ) const;

protected:
   IndexMap rowIndexMap;
   CommunicationGroup group = Communicator::NullGroup;
   Matrix localMatrix;

private:
   // TODO: disabled until they are implemented
   using Object::save;
   using Object::load;
   using Object::boundLoad;
};

} // namespace DistributedContainers
} // namespace TNL

#include "DistributedMatrix_impl.h"
+338 −0
Original line number Diff line number Diff line
/***************************************************************************
                          DistributedMatrix.h  -  description
                             -------------------
    begin                : Sep 10, 2018
    copyright            : (C) 2018 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

// Implemented by: Jakub Klinkovský

#pragma once

#include "DistributedMatrix.h"

namespace TNL {
namespace DistributedContainers {

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
DistributedMatrix< Matrix, Communicator, IndexMap >::
DistributedMatrix( IndexMap rowIndexMap, IndexType columns, CommunicationGroup group )
{
   setDistribution( rowIndexMap, columns, group );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
setDistribution( IndexMap rowIndexMap, IndexType columns, CommunicationGroup group )
{
   this->rowIndexMap = rowIndexMap;
   this->group = group;
   if( group != Communicator::NullGroup )
      localMatrix.setDimensions( rowIndexMap.getLocalSize(), columns );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
const IndexMap&
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRowIndexMap() const
{
   return rowIndexMap;
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
typename Communicator::CommunicationGroup
DistributedMatrix< Matrix, Communicator, IndexMap >::
getCommunicationGroup() const
{
   return group;
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
const Matrix&
DistributedMatrix< Matrix, Communicator, IndexMap >::
getLocalMatrix() const
{
   return localMatrix;
}


template< typename Matrix,
          typename Communicator,
          typename IndexMap >
String
DistributedMatrix< Matrix, Communicator, IndexMap >::
getType()
{
   return String( "DistributedContainers::DistributedMatrix< " ) +
          Matrix::getType() + ", " +
          // TODO: communicators don't have a getType method
          "<Communicator>, " +
          IndexMap::getType() + " >";
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
String
DistributedMatrix< Matrix, Communicator, IndexMap >::
getTypeVirtual() const
{
   return getType();
}


/*
 * Some common Matrix methods follow below.
 */

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
DistributedMatrix< Matrix, Communicator, IndexMap >&
DistributedMatrix< Matrix, Communicator, IndexMap >::
operator=( const DistributedMatrix& matrix )
{
   setLike( matrix );
   localMatrix = matrix.getLocalMatrix();
   return *this;
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
   template< typename MatrixT >
DistributedMatrix< Matrix, Communicator, IndexMap >&
DistributedMatrix< Matrix, Communicator, IndexMap >::
operator=( const MatrixT& matrix )
{
   setLike( matrix );
   localMatrix = matrix.getLocalMatrix();
   return *this;
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
   template< typename MatrixT >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
setLike( const MatrixT& matrix )
{
   rowIndexMap = matrix.getRowIndexMap();
   group = matrix.getCommunicationGroup();
   localMatrix.setLike( matrix.getLocalMatrix() );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
reset()
{
   rowIndexMap.reset();
   group = Communicator::NullGroup;
   localMatrix.reset();
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
typename Matrix::IndexType
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRows() const
{
   return rowIndexMap.getGlobalSize();
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
typename Matrix::IndexType
DistributedMatrix< Matrix, Communicator, IndexMap >::
getColumns() const
{
   return localMatrix.getColumns();
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths )
{
   TNL_ASSERT_EQ( rowLengths.getSize(), getRows(), "row lengths vector has wrong size" );
   TNL_ASSERT_EQ( rowLengths.getIndexMap(), getRowIndexMap(), "row lengths vector has wrong distribution" );
   TNL_ASSERT_EQ( rowLengths.getCommunicationGroup(), getCommunicationGroup(), "row lengths vector has wrong communication group" );

   if( getCommunicationGroup() != CommunicatorType::NullGroup )
      localMatrix.setCompressedRowLengths( rowLengths.getLocalVectorView() );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
{
   if( getCommunicationGroup() != CommunicatorType::NullGroup ) {
      rowLengths.setDistribution( getRowIndexMap(), getCommunicationGroup() );
      localMatrix.getCompressedRowLengths( rowLengths.getLocalVectorView() );
   }
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
typename Matrix::IndexType
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRowLength( IndexType row ) const
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getRowLength( localRow );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
bool
DistributedMatrix< Matrix, Communicator, IndexMap >::
setElement( IndexType row,
            IndexType column,
            RealType value )
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.setElement( localRow, column, value );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
bool
DistributedMatrix< Matrix, Communicator, IndexMap >::
setElementFast( IndexType row,
                IndexType column,
                RealType value )
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.setElementFast( localRow, column, value );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
typename Matrix::RealType
DistributedMatrix< Matrix, Communicator, IndexMap >::
getElement( IndexType row,
            IndexType column ) const
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getElement( localRow, column );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
typename Matrix::RealType
DistributedMatrix< Matrix, Communicator, IndexMap >::
getElementFast( IndexType row,
                IndexType column ) const
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getElementFast( localRow, column );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
bool
DistributedMatrix< Matrix, Communicator, IndexMap >::
setRowFast( IndexType row,
            const IndexType* columnIndexes,
            const RealType* values,
            IndexType elements )
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.setRowFast( localRow, columnIndexes, values, elements );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRowFast( IndexType row,
            IndexType* columns,
            RealType* values ) const
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getRowFast( localRow, columns, values );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
typename DistributedMatrix< Matrix, Communicator, IndexMap >::MatrixRow
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRow( IndexType row )
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getRow( localRow );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
__cuda_callable__
typename DistributedMatrix< Matrix, Communicator, IndexMap >::ConstMatrixRow
DistributedMatrix< Matrix, Communicator, IndexMap >::
getRow( IndexType row ) const
{
   const IndexType localRow = rowIndexMap.getLocalIndex( row );
   return localMatrix.getRow( localRow );
}

template< typename Matrix,
          typename Communicator,
          typename IndexMap >
   template< typename Vector,
             typename RealOut >
void
DistributedMatrix< Matrix, Communicator, IndexMap >::
vectorProduct( const Vector& inVector,
               DistVector< RealOut >& outVector ) const
{
   TNL_ASSERT_EQ( inVector.getSize(), getColumns(), "input vector has wrong size" );
   TNL_ASSERT_EQ( outVector.getSize(), getRows(), "output vector has wrong size" );
   TNL_ASSERT_EQ( outVector.getIndexMap(), getRowIndexMap(), "output vector has wrong distribution" );
   TNL_ASSERT_EQ( outVector.getCommunicationGroup(), getCommunicationGroup(), "output vector has wrong communication group" );

   auto outView = outVector.getLocalVectorView();
   localMatrix.vectorProduct( inVector, outView );
}

} // namespace DistributedContainers
} // namespace TNL
+15 −0
Original line number Diff line number Diff line
@@ -12,6 +12,12 @@ if( BUILD_CUDA )
   TARGET_LINK_LIBRARIES( DistributedVectorTest
                              ${GTEST_BOTH_LIBRARIES}
                              tnl )

   CUDA_ADD_EXECUTABLE( DistributedMatrixTest DistributedMatrixTest.cu
                        OPTIONS ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( DistributedMatrixTest
                              ${GTEST_BOTH_LIBRARIES}
                              tnl )
else()
   ADD_EXECUTABLE( DistributedArrayTest DistributedArrayTest.cpp )
   TARGET_COMPILE_OPTIONS( DistributedArrayTest PRIVATE ${CXX_TESTS_FLAGS} )
@@ -24,6 +30,12 @@ else()
   TARGET_LINK_LIBRARIES( DistributedVectorTest
                              ${GTEST_BOTH_LIBRARIES}
                              tnl )

   ADD_EXECUTABLE( DistributedMatrixTest DistributedMatrixTest.cpp )
   TARGET_COMPILE_OPTIONS( DistributedMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
   TARGET_LINK_LIBRARIES( DistributedMatrixTest
                              ${GTEST_BOTH_LIBRARIES}
                              tnl )
endif()

SET( mpi_test_parameters -np 4 -H localhost:4 "${EXECUTABLE_OUTPUT_PATH}/DistributedArrayTest${CMAKE_EXECUTABLE_SUFFIX}" )
@@ -32,4 +44,7 @@ ADD_TEST( NAME DistributedArrayTest COMMAND "mpirun" ${mpi_test_parameters})
SET( mpi_test_parameters -np 4 -H localhost:4 "${EXECUTABLE_OUTPUT_PATH}/DistributedVectorTest${CMAKE_EXECUTABLE_SUFFIX}" )
ADD_TEST( NAME DistributedVectorTest COMMAND "mpirun" ${mpi_test_parameters})

SET( mpi_test_parameters -np 4 -H localhost:4 "${EXECUTABLE_OUTPUT_PATH}/DistributedMatrixTest${CMAKE_EXECUTABLE_SUFFIX}" )
ADD_TEST( NAME DistributedMatrixTest COMMAND "mpirun" ${mpi_test_parameters})

endif()
+1 −0
Original line number Diff line number Diff line
#include "DistributedMatrixTest.h"
+1 −0
Original line number Diff line number Diff line
#include "DistributedMatrixTest.h"
Loading