From 03949d09e39871858d3b7b1c64da75394596c1aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sat, 28 Dec 2019 13:33:43 +0100
Subject: [PATCH] Added MatrixView.

---
 src/TNL/Matrices/Matrix.h                     |  21 +-
 .../Matrices/{Matrix_impl.h => Matrix.hpp}    |  33 +-
 src/TNL/Matrices/MatrixView.h                 | 150 +++++++++
 src/TNL/Matrices/MatrixView.hpp               | 286 ++++++++++++++++++
 4 files changed, 477 insertions(+), 13 deletions(-)
 rename src/TNL/Matrices/{Matrix_impl.h => Matrix.hpp} (92%)
 create mode 100644 src/TNL/Matrices/MatrixView.h
 create mode 100644 src/TNL/Matrices/MatrixView.hpp

diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 4a038eb2e8..96409c89b7 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -15,6 +15,7 @@
 #include <TNL/Devices/Host.h>
 #include <TNL/Containers/Vector.h>
 #include <TNL/Containers/VectorView.h>
+#include <TNL/Matrices/MatrixView.h>
 
 namespace TNL {
 /**
@@ -30,13 +31,15 @@ class Matrix : public Object
 {
 public:
    using RealType = Real;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
-   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
-   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
-   typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector;
+   using DeviceType = Device;
+   using IndexType = Index;
+   using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >;
+   using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >;
+   using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType;
+   using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >;
    using RealAllocatorType = RealAllocator;
+   using ViewType = MatrixView< Real, Device, Index >;
+   using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >;
 
    Matrix( const RealAllocatorType& allocator = RealAllocatorType() );
 
@@ -44,6 +47,10 @@ public:
            const IndexType columns,
            const RealAllocatorType& allocator = RealAllocatorType() );
 
+   ViewType getView();
+
+   ConstViewType getConstView() const;
+
    virtual void setDimensions( const IndexType rows,
                                const IndexType columns );
 
@@ -162,4 +169,4 @@ void MatrixVectorProductCuda( const Matrix& matrix,
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Matrix_impl.h>
+#include <TNL/Matrices/Matrix.hpp>
diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix.hpp
similarity index 92%
rename from src/TNL/Matrices/Matrix_impl.h
rename to src/TNL/Matrices/Matrix.hpp
index a93c7a893f..91b81ffcf0 100644
--- a/src/TNL/Matrices/Matrix_impl.h
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -43,6 +43,28 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType
 {
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+auto
+Matrix< Real, Device, Index, RealAllocator >::
+getView() -> ViewType
+{
+   return ViewType( rows, columns, values.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          typename RealAllocator >
+auto
+Matrix< Real, Device, Index, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( rows, columns, values.getConstView() );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -105,12 +127,11 @@ template< typename Real,
           typename RealAllocator >
 Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const
 {
-    IndexType nonZeroElements( 0 );
-    for( IndexType i = 0; this->values.getSize(); i++ )
-        if( this->values.getElement( i ) != 0.0 )
-            nonZeroElements++;
-      
-    return nonZeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h
new file mode 100644
index 0000000000..a2fa975cf1
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.h
@@ -0,0 +1,150 @@
+/***************************************************************************
+                          MatrixView.h  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Object.h>
+#include <TNL/Allocators/Default.h>
+#include <TNL/Devices/Host.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+
+namespace TNL {
+/**
+ * \brief Namespace for matrix formats.
+ */
+namespace Matrices {
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class MatrixView : public Object
+{
+public:
+   using RealType = Real;
+   typedef Device DeviceType;
+   typedef Index IndexType;
+   typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+   typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+   typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
+   typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView;
+
+   __cuda_callable__
+   MatrixView();
+
+   __cuda_callable__
+   MatrixView( const IndexType rows,
+               const IndexType columns,
+               const ValuesView& values );
+
+   __cuda_callable__
+   MatrixView( const MatrixView& view ) = default;
+
+   virtual IndexType getRowLength( const IndexType row ) const = 0;
+
+   // TODO: implementation is not parallel
+   // TODO: it would be nice if padding zeros could be stripped
+   void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const;
+
+   virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const;
+
+   IndexType getNumberOfMatrixElements() const;
+
+   virtual IndexType getNumberOfNonzeroMatrixElements() const = 0;
+
+   void reset();
+
+   __cuda_callable__
+   IndexType getRows() const;
+
+   __cuda_callable__
+   IndexType getColumns() const;
+
+   /****
+    * TODO: The fast variants of the following methods cannot be virtual.
+    * If they were, they could not be used in the CUDA kernels. If CUDA allows it
+    * in the future and it does not slow down, declare them as virtual here.
+    */
+
+   virtual bool setElement( const IndexType row,
+                            const IndexType column,
+                            const RealType& value ) = 0;
+
+   virtual bool addElement( const IndexType row,
+                            const IndexType column,
+                            const RealType& value,
+                            const RealType& thisElementMultiplicator = 1.0 ) = 0;
+
+   virtual bool setRow( const IndexType row,
+                        const IndexType* columns,
+                        const RealType* values,
+                        const IndexType numberOfElements ) = 0;
+
+   virtual bool addRow( const IndexType row,
+                        const IndexType* columns,
+                        const RealType* values,
+                        const IndexType numberOfElements,
+                        const RealType& thisElementMultiplicator = 1.0 ) = 0;
+
+   virtual Real getElement( const IndexType row,
+                            const IndexType column ) const = 0;
+
+   const ValuesView& getValues() const;
+
+   ValuesView& getValues();
+
+   // TODO: parallelize and optimize for sparse matrices
+   template< typename Matrix >
+   bool operator == ( const Matrix& matrix ) const;
+
+   template< typename Matrix >
+   bool operator != ( const Matrix& matrix ) const;
+
+   virtual void save( File& file ) const;
+
+   virtual void load( File& file );
+
+   virtual void print( std::ostream& str ) const;
+
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   __cuda_callable__
+   const IndexType& getNumberOfColors() const;
+
+   // TODO: method for symmetric matrices, should not be in general Matrix interface
+   void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector);
+
+   protected:
+
+   IndexType rows, columns;
+
+   ValuesView values;
+};
+
+template< typename Real, typename Device, typename Index >
+std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m )
+{
+   m.print( str );
+   return str;
+}
+
+/*
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+void MatrixVectorProductCuda( const Matrix& matrix,
+                              const InVector& inVector,
+                              OutVector& outVector );
+*/
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/MatrixView.hpp>
diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp
new file mode 100644
index 0000000000..bd3d9beaee
--- /dev/null
+++ b/src/TNL/Matrices/MatrixView.hpp
@@ -0,0 +1,286 @@
+/***************************************************************************
+                          MatrixView.hpp  -  description
+                             -------------------
+    begin                : Dec 28, 2019
+    copyright            : (C) 2019 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Cuda/MemoryHelpers.h>
+#include <TNL/Cuda/SharedMemory.h>
+
+namespace TNL {
+namespace Matrices {
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView()
+: rows( 0 ),
+  columns( 0 )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+MatrixView< Real, Device, Index >::
+MatrixView( const IndexType rows_, 
+            const IndexType columns_,
+            const ValuesView& values_ )
+ : rows( rows_ ), columns( columns_ ), values( values_ )
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const
+{
+   rowLengths.setSize( this->getRows() );
+   getCompressedRowLengths( rowLengths.getView() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const
+{
+   TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" );
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      rowLengths.setElement( row, this->getRowLength( row ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const
+{
+   return this->values.getSize();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const
+{
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getRows() const
+{
+   return this->rows;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index MatrixView< Real, Device, Index >::getColumns() const
+{
+   return this->columns;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+const typename MatrixView< Real, Device, Index >::ValuesView&
+MatrixView< Real, Device, Index >::
+getValues() const
+{
+   return this->values;
+}
+   
+template< typename Real,
+          typename Device,
+          typename Index >
+typename MatrixView< Real, Device, Index >::ValuesView& 
+MatrixView< Real, Device, Index >::
+getValues()
+{
+   return this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::reset()
+{
+   this->rows = 0;
+   this->columns = 0;
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const
+{
+   if( this->getRows() != matrix.getRows() ||
+       this->getColumns() != matrix.getColumns() )
+      return false;
+   for( IndexType row = 0; row < this->getRows(); row++ )
+      for( IndexType column = 0; column < this->getColumns(); column++ )
+         if( this->getElement( row, column ) != matrix.getElement( row, column ) )
+            return false;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename MatrixT >
+bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const
+{
+   return ! operator == ( matrix );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::save( File& file ) const
+{
+   Object::save( file );
+   file.save( &this->rows );
+   file.save( &this->columns );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::load( File& file )
+{
+   Object::load( file );
+   file.load( &this->rows );
+   file.load( &this->columns );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void MatrixView< Real, Device, Index >::print( std::ostream& str ) const
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const Index&
+MatrixView< Real, Device, Index >::
+getNumberOfColors() const
+{
+   return this->numberOfColors;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void 
+MatrixView< Real, Device, Index >::
+computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
+{
+    for( IndexType i = this->getRows() - 1; i >= 0; i-- )
+    {
+        // init color array
+        Containers::Vector< Index, Device, Index > usedColors;
+        usedColors.setSize( this->numberOfColors );
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            usedColors.setElement( j, 0 );
+
+        // find all colors used in given row
+        for( IndexType j = i + 1; j < this->getColumns(); j++ )
+             if( this->getElement( i, j ) != 0.0 )
+                 usedColors.setElement( colorsVector.getElement( j ), 1 );
+
+        // find unused color
+        bool found = false;
+        for( IndexType j = 0; j < this->numberOfColors; j++ )
+            if( usedColors.getElement( j ) == 0 )
+            {
+                colorsVector.setElement( i, j );
+                found = true;
+                break;
+            }
+        if( !found )
+        {
+            colorsVector.setElement( i, this->numberOfColors );
+            this->numberOfColors++;
+        }
+    }
+}
+
+/*
+#ifdef HAVE_CUDA
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
+                                               const InVector* inVector,
+                                               OutVector* outVector,
+                                               int gridIdx )
+{
+   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
+   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < matrix->getRows() )
+      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
+}
+#endif
+
+template< typename Matrix,
+          typename InVector,
+          typename OutVector >
+void MatrixVectorProductCuda( const Matrix& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+{
+#ifdef HAVE_CUDA
+   typedef typename Matrix::IndexType IndexType;
+   Matrix* kernel_this = Cuda::passToDevice( matrix );
+   InVector* kernel_inVector = Cuda::passToDevice( inVector );
+   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
+   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+   {
+      if( gridIdx == cudaGrids - 1 )
+         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                     ( kernel_this,
+                                       kernel_inVector,
+                                       kernel_outVector,
+                                       gridIdx );
+      TNL_CHECK_CUDA_DEVICE;
+   }
+   Cuda::freeFromDevice( kernel_this );
+   Cuda::freeFromDevice( kernel_inVector );
+   Cuda::freeFromDevice( kernel_outVector );
+   TNL_CHECK_CUDA_DEVICE;
+#endif
+}
+*/
+
+} // namespace Matrices
+} // namespace TNL
-- 
GitLab