From 03949d09e39871858d3b7b1c64da75394596c1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Sat, 28 Dec 2019 13:33:43 +0100 Subject: [PATCH] Added MatrixView. --- src/TNL/Matrices/Matrix.h | 21 +- .../Matrices/{Matrix_impl.h => Matrix.hpp} | 33 +- src/TNL/Matrices/MatrixView.h | 150 +++++++++ src/TNL/Matrices/MatrixView.hpp | 286 ++++++++++++++++++ 4 files changed, 477 insertions(+), 13 deletions(-) rename src/TNL/Matrices/{Matrix_impl.h => Matrix.hpp} (92%) create mode 100644 src/TNL/Matrices/MatrixView.h create mode 100644 src/TNL/Matrices/MatrixView.hpp diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 4a038eb2e8..96409c89b7 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -15,6 +15,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> +#include <TNL/Matrices/MatrixView.h> namespace TNL { /** @@ -30,13 +31,15 @@ class Matrix : public Object { public: using RealType = Real; - typedef Device DeviceType; - typedef Index IndexType; - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >; using RealAllocatorType = RealAllocator; + using ViewType = MatrixView< Real, Device, Index >; + using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >; Matrix( const RealAllocatorType& allocator = RealAllocatorType() ); @@ -44,6 +47,10 @@ public: const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); + ViewType getView(); + + ConstViewType getConstView() const; + virtual void setDimensions( const IndexType rows, const IndexType columns ); @@ -162,4 +169,4 @@ void MatrixVectorProductCuda( const Matrix& matrix, } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Matrix_impl.h> +#include <TNL/Matrices/Matrix.hpp> diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix.hpp similarity index 92% rename from src/TNL/Matrices/Matrix_impl.h rename to src/TNL/Matrices/Matrix.hpp index a93c7a893f..91b81ffcf0 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix.hpp @@ -43,6 +43,28 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType { } +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +auto +Matrix< Real, Device, Index, RealAllocator >:: +getView() -> ViewType +{ + return ViewType( rows, columns, values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +auto +Matrix< Real, Device, Index, RealAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( rows, columns, values.getConstView() ); +} + template< typename Real, typename Device, typename Index, @@ -105,12 +127,11 @@ template< typename Real, typename RealAllocator > Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const { - IndexType nonZeroElements( 0 ); - for( IndexType i = 0; this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0.0 ) - nonZeroElements++; - - return nonZeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h new file mode 100644 index 0000000000..a2fa975cf1 --- /dev/null +++ b/src/TNL/Matrices/MatrixView.h @@ -0,0 +1,150 @@ +/*************************************************************************** + MatrixView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Object.h> +#include <TNL/Allocators/Default.h> +#include <TNL/Devices/Host.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> + +namespace TNL { +/** + * \brief Namespace for matrix formats. + */ +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int > +class MatrixView : public Object +{ +public: + using RealType = Real; + typedef Device DeviceType; + typedef Index IndexType; + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView; + + __cuda_callable__ + MatrixView(); + + __cuda_callable__ + MatrixView( const IndexType rows, + const IndexType columns, + const ValuesView& values ); + + __cuda_callable__ + MatrixView( const MatrixView& view ) = default; + + virtual IndexType getRowLength( const IndexType row ) const = 0; + + // TODO: implementation is not parallel + // TODO: it would be nice if padding zeros could be stripped + void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + + virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + + IndexType getNumberOfMatrixElements() const; + + virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; + + void reset(); + + __cuda_callable__ + IndexType getRows() const; + + __cuda_callable__ + IndexType getColumns() const; + + /**** + * TODO: The fast variants of the following methods cannot be virtual. + * If they were, they could not be used in the CUDA kernels. If CUDA allows it + * in the future and it does not slow down, declare them as virtual here. + */ + + virtual bool setElement( const IndexType row, + const IndexType column, + const RealType& value ) = 0; + + virtual bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ) = 0; + + virtual bool setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements ) = 0; + + virtual bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ) = 0; + + virtual Real getElement( const IndexType row, + const IndexType column ) const = 0; + + const ValuesView& getValues() const; + + ValuesView& getValues(); + + // TODO: parallelize and optimize for sparse matrices + template< typename Matrix > + bool operator == ( const Matrix& matrix ) const; + + template< typename Matrix > + bool operator != ( const Matrix& matrix ) const; + + virtual void save( File& file ) const; + + virtual void load( File& file ); + + virtual void print( std::ostream& str ) const; + + + // TODO: method for symmetric matrices, should not be in general Matrix interface + __cuda_callable__ + const IndexType& getNumberOfColors() const; + + // TODO: method for symmetric matrices, should not be in general Matrix interface + void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector); + + protected: + + IndexType rows, columns; + + ValuesView values; +}; + +template< typename Real, typename Device, typename Index > +std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m ) +{ + m.print( str ); + return str; +} + +/* +template< typename Matrix, + typename InVector, + typename OutVector > +void MatrixVectorProductCuda( const Matrix& matrix, + const InVector& inVector, + OutVector& outVector ); +*/ + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/MatrixView.hpp> diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp new file mode 100644 index 0000000000..bd3d9beaee --- /dev/null +++ b/src/TNL/Matrices/MatrixView.hpp @@ -0,0 +1,286 @@ +/*************************************************************************** + MatrixView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Cuda/MemoryHelpers.h> +#include <TNL/Cuda/SharedMemory.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView() +: rows( 0 ), + columns( 0 ) +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView( const IndexType rows_, + const IndexType columns_, + const ValuesView& values_ ) + : rows( rows_ ), columns( columns_ ), values( values_ ) +{ +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + getCompressedRowLengths( rowLengths.getView() ); +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + +template< typename Real, + typename Device, + typename Index > +Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const +{ + return this->values.getSize(); +} + +template< typename Real, + typename Device, + typename Index > +Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getRows() const +{ + return this->rows; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getColumns() const +{ + return this->columns; +} + +template< typename Real, + typename Device, + typename Index > +const typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() const +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index > +typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::reset() +{ + this->rows = 0; + this->columns = 0; + this->values.reset(); +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const +{ + if( this->getRows() != matrix.getRows() || + this->getColumns() != matrix.getColumns() ) + return false; + for( IndexType row = 0; row < this->getRows(); row++ ) + for( IndexType column = 0; column < this->getColumns(); column++ ) + if( this->getElement( row, column ) != matrix.getElement( row, column ) ) + return false; + return true; +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const +{ + return ! operator == ( matrix ); +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::save( File& file ) const +{ + Object::save( file ); + file.save( &this->rows ); + file.save( &this->columns ); + file << this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::load( File& file ) +{ + Object::load( file ); + file.load( &this->rows ); + file.load( &this->columns ); + file >> this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::print( std::ostream& str ) const +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +const Index& +MatrixView< Real, Device, Index >:: +getNumberOfColors() const +{ + return this->numberOfColors; +} + +template< typename Real, + typename Device, + typename Index > +void +MatrixView< Real, Device, Index >:: +computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) +{ + for( IndexType i = this->getRows() - 1; i >= 0; i-- ) + { + // init color array + Containers::Vector< Index, Device, Index > usedColors; + usedColors.setSize( this->numberOfColors ); + for( IndexType j = 0; j < this->numberOfColors; j++ ) + usedColors.setElement( j, 0 ); + + // find all colors used in given row + for( IndexType j = i + 1; j < this->getColumns(); j++ ) + if( this->getElement( i, j ) != 0.0 ) + usedColors.setElement( colorsVector.getElement( j ), 1 ); + + // find unused color + bool found = false; + for( IndexType j = 0; j < this->numberOfColors; j++ ) + if( usedColors.getElement( j ) == 0 ) + { + colorsVector.setElement( i, j ); + found = true; + break; + } + if( !found ) + { + colorsVector.setElement( i, this->numberOfColors ); + this->numberOfColors++; + } + } +} + +/* +#ifdef HAVE_CUDA +template< typename Matrix, + typename InVector, + typename OutVector > +__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, + const InVector* inVector, + OutVector* outVector, + int gridIdx ) +{ + static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); + const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < matrix->getRows() ) + ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); +} +#endif + +template< typename Matrix, + typename InVector, + typename OutVector > +void MatrixVectorProductCuda( const Matrix& matrix, + const InVector& inVector, + OutVector& outVector ) +{ +#ifdef HAVE_CUDA + typedef typename Matrix::IndexType IndexType; + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_this, + kernel_inVector, + kernel_outVector, + gridIdx ); + TNL_CHECK_CUDA_DEVICE; + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); + TNL_CHECK_CUDA_DEVICE; +#endif +} +*/ + +} // namespace Matrices +} // namespace TNL -- GitLab